├── .gitignore
├── AudioCodec
    ├── MimiCodec
    │   ├── config
    │   │   └── mimi24k.yaml
    │   ├── dataloaders
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   └── base_dataloader.cpython-312.pyc
    │   │   └── base_dataloader.py
    │   ├── get_scp.py
    │   ├── inference.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── __init__.cpython-39.pyc
    │   │   │   ├── basic_loss.cpython-312.pyc
    │   │   │   ├── basic_loss.cpython-37.pyc
    │   │   │   ├── basic_loss.cpython-38.pyc
    │   │   │   ├── basic_loss.cpython-39.pyc
    │   │   │   ├── discriminator_loss.cpython-312.pyc
    │   │   │   ├── discriminator_loss.cpython-37.pyc
    │   │   │   ├── discriminator_loss.cpython-38.pyc
    │   │   │   ├── discriminator_loss.cpython-39.pyc
    │   │   │   ├── enh_loss.cpython-312.pyc
    │   │   │   ├── enh_loss.cpython-38.pyc
    │   │   │   ├── enh_loss.cpython-39.pyc
    │   │   │   ├── generator_loss.cpython-310.pyc
    │   │   │   ├── generator_loss.cpython-312.pyc
    │   │   │   ├── generator_loss.cpython-37.pyc
    │   │   │   ├── generator_loss.cpython-38.pyc
    │   │   │   └── generator_loss.cpython-39.pyc
    │   │   ├── basic_loss.py
    │   │   ├── discriminator_loss.py
    │   │   ├── enh_loss.py
    │   │   └── generator_loss.py
    │   ├── models
    │   │   ├── MimiCodec.py
    │   │   ├── __init__.py
    │   │   └── __pycache__
    │   │   │   ├── MimiCodec.cpython-312.pyc
    │   │   │   ├── MimiCodec.cpython-38.pyc
    │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   └── __init__.cpython-38.pyc
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── __init__.cpython-39.pyc
    │   │   │   ├── conv.cpython-310.pyc
    │   │   │   ├── conv.cpython-312.pyc
    │   │   │   ├── conv.cpython-37.pyc
    │   │   │   ├── conv.cpython-38.pyc
    │   │   │   ├── conv.cpython-39.pyc
    │   │   │   ├── gating.cpython-312.pyc
    │   │   │   ├── lstm.cpython-310.pyc
    │   │   │   ├── lstm.cpython-37.pyc
    │   │   │   ├── lstm.cpython-38.pyc
    │   │   │   ├── lstm.cpython-39.pyc
    │   │   │   ├── norm.cpython-310.pyc
    │   │   │   ├── norm.cpython-37.pyc
    │   │   │   ├── norm.cpython-38.pyc
    │   │   │   ├── norm.cpython-39.pyc
    │   │   │   ├── resample.cpython-312.pyc
    │   │   │   ├── rope.cpython-312.pyc
    │   │   │   ├── seanet.cpython-310.pyc
    │   │   │   ├── seanet.cpython-312.pyc
    │   │   │   ├── seanet.cpython-37.pyc
    │   │   │   ├── seanet.cpython-38.pyc
    │   │   │   ├── seanet.cpython-39.pyc
    │   │   │   ├── streaming.cpython-312.pyc
    │   │   │   ├── streaming.cpython-38.pyc
    │   │   │   ├── transformer.cpython-310.pyc
    │   │   │   ├── transformer.cpython-312.pyc
    │   │   │   ├── transformer.cpython-37.pyc
    │   │   │   ├── transformer.cpython-38.pyc
    │   │   │   └── transformer.cpython-39.pyc
    │   │   ├── commons
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   ├── __init__.cpython-39.pyc
    │   │   │   │   ├── base_layers.cpython-310.pyc
    │   │   │   │   ├── base_layers.cpython-312.pyc
    │   │   │   │   ├── base_layers.cpython-37.pyc
    │   │   │   │   ├── base_layers.cpython-38.pyc
    │   │   │   │   ├── base_layers.cpython-39.pyc
    │   │   │   │   ├── ops.cpython-310.pyc
    │   │   │   │   ├── ops.cpython-312.pyc
    │   │   │   │   ├── ops.cpython-37.pyc
    │   │   │   │   ├── ops.cpython-38.pyc
    │   │   │   │   ├── ops.cpython-39.pyc
    │   │   │   │   ├── pqmf.cpython-310.pyc
    │   │   │   │   ├── pqmf.cpython-312.pyc
    │   │   │   │   ├── pqmf.cpython-37.pyc
    │   │   │   │   ├── pqmf.cpython-38.pyc
    │   │   │   │   ├── pqmf.cpython-39.pyc
    │   │   │   │   ├── torch_stft.cpython-310.pyc
    │   │   │   │   ├── torch_stft.cpython-312.pyc
    │   │   │   │   ├── torch_stft.cpython-37.pyc
    │   │   │   │   ├── torch_stft.cpython-38.pyc
    │   │   │   │   └── torch_stft.cpython-39.pyc
    │   │   │   ├── base_layers.py
    │   │   │   ├── ops.py
    │   │   │   ├── position_encoding.py
    │   │   │   ├── pqmf.py
    │   │   │   └── torch_stft.py
    │   │   ├── conv.py
    │   │   ├── discriminators
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   │   ├── frequency_discriminator.cpython-312.pyc
    │   │   │   │   ├── period_discriminator.cpython-312.pyc
    │   │   │   │   └── scale_discriminator.cpython-312.pyc
    │   │   │   ├── combd_sbd.py
    │   │   │   ├── frequency_discriminator.py
    │   │   │   ├── frequency_discriminator_bak.py
    │   │   │   ├── mrd.py
    │   │   │   ├── period_discriminator.py
    │   │   │   └── scale_discriminator.py
    │   │   ├── gating.py
    │   │   ├── loss.py
    │   │   ├── resample.py
    │   │   ├── rope.py
    │   │   ├── seanet.py
    │   │   ├── streaming.py
    │   │   └── transformer.py
    │   ├── path.sh
    │   ├── quantization
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   ├── base.cpython-310.pyc
    │   │   │   ├── base.cpython-312.pyc
    │   │   │   ├── core_vq.cpython-310.pyc
    │   │   │   ├── core_vq.cpython-312.pyc
    │   │   │   ├── vq.cpython-310.pyc
    │   │   │   ├── vq.cpython-312.pyc
    │   │   │   ├── vq_dc.cpython-310.pyc
    │   │   │   └── vq_dc.cpython-312.pyc
    │   │   ├── base.py
    │   │   ├── core_vq.py
    │   │   ├── vq.py
    │   │   └── vq_dc.py
    │   ├── run.sh
    │   ├── semantic_features
    │   │   ├── WavLM.py
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── WavLM.cpython-312.pyc
    │   │   │   ├── WavLM.cpython-38.pyc
    │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   ├── modules.cpython-312.pyc
    │   │   │   ├── modules.cpython-38.pyc
    │   │   │   └── wavlm_feature.cpython-312.pyc
    │   │   ├── hubert_feature.py
    │   │   ├── modules.py
    │   │   ├── w2vec2bert_feature.py
    │   │   ├── wavlm_feature.py
    │   │   └── whisper_feature.py
    │   ├── train.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-310.pyc
    │   │       ├── __init__.cpython-312.pyc
    │   │       ├── __init__.cpython-37.pyc
    │   │       ├── __init__.cpython-38.pyc
    │   │       ├── __init__.cpython-39.pyc
    │   │       ├── compile.cpython-312.pyc
    │   │       ├── ddp_utils.cpython-38.pyc
    │   │       ├── ddp_utils.cpython-39.pyc
    │   │       ├── hifigan_mel.cpython-310.pyc
    │   │       ├── hifigan_mel.cpython-312.pyc
    │   │       ├── hifigan_mel.cpython-37.pyc
    │   │       ├── hifigan_mel.cpython-38.pyc
    │   │       ├── hifigan_mel.cpython-39.pyc
    │   │       ├── utils.cpython-310.pyc
    │   │       ├── utils.cpython-312.pyc
    │   │       ├── utils.cpython-37.pyc
    │   │       ├── utils.cpython-38.pyc
    │   │       └── utils.cpython-39.pyc
    │   │   ├── autocast.py
    │   │   ├── compile.py
    │   │   ├── ddp_utils.py
    │   │   ├── hifigan_mel.py
    │   │   ├── sampling.py
    │   │   └── utils.py
    └── readme.md
├── DataPipeline
    └── readme.md
├── Evaluation
    ├── codec
    │   ├── compute_dnsmos.sh
    │   ├── compute_mcd.py
    │   ├── compute_metrics.sh
    │   ├── compute_ms_stft_loss.py
    │   ├── compute_pesq.py
    │   ├── compute_sisnr.py
    │   ├── compute_ssim.py
    │   ├── compute_stoi.py
    │   └── compute_visqol.py
    └── readme.md
├── MLLM
    ├── egs
    │   └── moshi_ft
    │   │   ├── data_scripts
    │   │       ├── create_data_json.py
    │   │       └── offline_tokenization.py
    │   │   ├── local
    │   │       ├── asr_whisperx.py
    │   │       └── vad_segment.py
    │   │   ├── readme.md
    │   │   ├── run.sh
    │   │   └── utils
    │   │       ├── run.pl
    │   │       └── split_scp.pl
    ├── models
    │   ├── __pycache__
    │   │   └── model.cpython-312.pyc
    │   ├── model.py
    │   └── model_lora.py
    ├── modules
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-312.pyc
    │   │   ├── conv.cpython-312.pyc
    │   │   ├── gating.cpython-312.pyc
    │   │   ├── resample.cpython-312.pyc
    │   │   ├── rope.cpython-312.pyc
    │   │   ├── seanet.cpython-312.pyc
    │   │   ├── streaming.cpython-312.pyc
    │   │   ├── transformer.cpython-312.pyc
    │   │   └── transformer_lora.cpython-312.pyc
    │   ├── conv.py
    │   ├── gating.py
    │   ├── resample.py
    │   ├── rope.py
    │   ├── seanet.py
    │   ├── streaming.py
    │   ├── transformer.py
    │   └── transformer_lora.py
    ├── readme.md
    ├── tools
    │   ├── data_scripts
    │   │   ├── create_data_json.py
    │   │   ├── filter_scp.py
    │   │   ├── find_peer_utts.py
    │   │   ├── merge_then_split.py
    │   │   ├── offline_tokenization.py
    │   │   └── select_spk2utt.py
    │   ├── kaldi
    │   │   └── utils
    │   │   │   ├── add_disambig.pl
    │   │   │   ├── add_lex_disambig.pl
    │   │   │   ├── analyze_segments.pl
    │   │   │   ├── apply_map.pl
    │   │   │   ├── best_wer.sh
    │   │   │   ├── build_const_arpa_lm.sh
    │   │   │   ├── build_kenlm_model_from_arpa.sh
    │   │   │   ├── combine_data.sh
    │   │   │   ├── convert_ctm.pl
    │   │   │   ├── convert_slf.pl
    │   │   │   ├── convert_slf_parallel.sh
    │   │   │   ├── copy_data_dir.sh
    │   │   │   ├── create_data_link.pl
    │   │   │   ├── create_split_dir.pl
    │   │   │   ├── ctm
    │   │   │       ├── convert_ctm.pl
    │   │   │       ├── fix_ctm.sh
    │   │   │       └── resolve_ctm_overlaps.py
    │   │   │   ├── data
    │   │   │       ├── combine_data.sh
    │   │   │       ├── combine_short_segments.sh
    │   │   │       ├── convert_data_dir_to_whole.sh
    │   │   │       ├── copy_data_dir.sh
    │   │   │       ├── extend_segment_times.py
    │   │   │       ├── extract_wav_segments_data_dir.sh
    │   │   │       ├── fix_data_dir.sh
    │   │   │       ├── fix_subsegment_feats.pl
    │   │   │       ├── get_allowed_durations.py
    │   │   │       ├── get_frame_shift.sh
    │   │   │       ├── get_num_frames.sh
    │   │   │       ├── get_reco2dur.sh
    │   │   │       ├── get_reco2utt_for_data.sh
    │   │   │       ├── get_segments_for_data.sh
    │   │   │       ├── get_uniform_subsegments.py
    │   │   │       ├── get_utt2dur.sh
    │   │   │       ├── get_utt2num_frames.sh
    │   │   │       ├── internal
    │   │   │       │   ├── choose_utts_to_combine.py
    │   │   │       │   ├── combine_segments_to_recording.py
    │   │   │       │   ├── modify_speaker_info.py
    │   │   │       │   └── perturb_volume.py
    │   │   │       ├── limit_feature_dim.sh
    │   │   │       ├── modify_speaker_info.sh
    │   │   │       ├── modify_speaker_info_to_recording.sh
    │   │   │       ├── normalize_data_range.pl
    │   │   │       ├── perturb_data_dir_speed.sh
    │   │   │       ├── perturb_data_dir_speed_3way.sh
    │   │   │       ├── perturb_data_dir_volume.sh
    │   │   │       ├── perturb_speed_to_allowed_lengths.py
    │   │   │       ├── remove_dup_utts.sh
    │   │   │       ├── resample_data_dir.sh
    │   │   │       ├── shift_and_combine_feats.sh
    │   │   │       ├── shift_feats.sh
    │   │   │       ├── split_data.sh
    │   │   │       ├── subsegment_data_dir.sh
    │   │   │       ├── subset_data_dir.sh
    │   │   │       └── validate_data_dir.sh
    │   │   │   ├── dict_dir_add_pronprobs.sh
    │   │   │   ├── eps2disambig.pl
    │   │   │   ├── filt.py
    │   │   │   ├── filter_scp.pl
    │   │   │   ├── filter_scps.pl
    │   │   │   ├── find_arpa_oovs.pl
    │   │   │   ├── fix_ctm.sh
    │   │   │   ├── fix_data_dir.sh
    │   │   │   ├── format_lm.sh
    │   │   │   ├── format_lm_sri.sh
    │   │   │   ├── gen_topo.pl
    │   │   │   ├── int2sym.pl
    │   │   │   ├── kwslist_post_process.pl
    │   │   │   ├── lang
    │   │   │       ├── add_lex_disambig.pl
    │   │   │       ├── add_unigrams_arpa.pl
    │   │   │       ├── adjust_unk_arpa.pl
    │   │   │       ├── adjust_unk_graph.sh
    │   │   │       ├── bpe
    │   │   │       │   ├── add_final_optional_silence.sh
    │   │   │       │   ├── apply_bpe.py
    │   │   │       │   ├── bidi.py
    │   │   │       │   ├── learn_bpe.py
    │   │   │       │   ├── prepend_words.py
    │   │   │       │   └── reverse.py
    │   │   │       ├── check_g_properties.pl
    │   │   │       ├── check_phones_compatible.sh
    │   │   │       ├── compute_sentence_probs_arpa.py
    │   │   │       ├── extend_lang.sh
    │   │   │       ├── get_word_position_phone_map.pl
    │   │   │       ├── grammar
    │   │   │       │   ├── augment_phones_txt.py
    │   │   │       │   └── augment_words_txt.py
    │   │   │       ├── internal
    │   │   │       │   ├── apply_unk_lm.sh
    │   │   │       │   ├── arpa2fst_constrained.py
    │   │   │       │   └── modify_unk_pron.py
    │   │   │       ├── limit_arpa_unk_history.py
    │   │   │       ├── make_kn_lm.py
    │   │   │       ├── make_lexicon_fst.py
    │   │   │       ├── make_lexicon_fst_silprob.py
    │   │   │       ├── make_phone_bigram_lang.sh
    │   │   │       ├── make_phone_lm.py
    │   │   │       ├── make_position_dependent_subword_lexicon.py
    │   │   │       ├── make_subword_lexicon_fst.py
    │   │   │       ├── make_unk_lm.sh
    │   │   │       ├── ngram_entropy_pruning.py
    │   │   │       ├── prepare_lang.sh
    │   │   │       ├── validate_disambig_sym_file.pl
    │   │   │       └── validate_lang.pl
    │   │   │   ├── ln.pl
    │   │   │   ├── make_absolute.sh
    │   │   │   ├── make_lexicon_fst.pl
    │   │   │   ├── make_lexicon_fst_silprob.pl
    │   │   │   ├── make_unigram_grammar.pl
    │   │   │   ├── map_arpa_lm.pl
    │   │   │   ├── mkgraph.sh
    │   │   │   ├── mkgraph_lookahead.sh
    │   │   │   ├── nnet-cpu
    │   │   │       ├── make_nnet_config.pl
    │   │   │       ├── make_nnet_config_block.pl
    │   │   │       ├── make_nnet_config_preconditioned.pl
    │   │   │       └── update_learning_rates.pl
    │   │   │   ├── nnet
    │   │   │       ├── gen_dct_mat.py
    │   │   │       ├── gen_hamm_mat.py
    │   │   │       ├── gen_splice.py
    │   │   │       ├── make_blstm_proto.py
    │   │   │       ├── make_cnn_proto.py
    │   │   │       ├── make_lstm_proto.py
    │   │   │       ├── make_nnet_proto.py
    │   │   │       └── subset_data_tr_cv.sh
    │   │   │   ├── nnet3
    │   │   │       └── convert_config_tdnn_to_affine.py
    │   │   │   ├── parallel
    │   │   │       ├── limit_num_gpus.sh
    │   │   │       ├── pbs.pl
    │   │   │       ├── queue.pl
    │   │   │       ├── retry.pl
    │   │   │       ├── run.pl
    │   │   │       └── slurm.pl
    │   │   │   ├── parse_options.sh
    │   │   │   ├── pbs.pl
    │   │   │   ├── perturb_data_dir_speed.sh
    │   │   │   ├── pinyin_map.pl
    │   │   │   ├── prepare_extended_lang.sh
    │   │   │   ├── prepare_lang.sh
    │   │   │   ├── prepare_online_nnet_dist_build.sh
    │   │   │   ├── queue.pl
    │   │   │   ├── remove_data_links.sh
    │   │   │   ├── remove_oovs.pl
    │   │   │   ├── require_argument.sh
    │   │   │   ├── require_argument_all.sh
    │   │   │   ├── retry.pl
    │   │   │   ├── reverse_arpa.py
    │   │   │   ├── rnnlm_compute_scores.sh
    │   │   │   ├── run.pl
    │   │   │   ├── s2eps.pl
    │   │   │   ├── scoring
    │   │   │       ├── wer_ops_details.pl
    │   │   │       ├── wer_per_spk_details.pl
    │   │   │       ├── wer_per_utt_details.pl
    │   │   │       └── wer_report.pl
    │   │   │   ├── segmentation.pl
    │   │   │   ├── show_lattice.sh
    │   │   │   ├── shuffle_list.pl
    │   │   │   ├── slurm.pl
    │   │   │   ├── spk2utt_to_utt2spk.pl
    │   │   │   ├── split_data.sh
    │   │   │   ├── split_scp.pl
    │   │   │   ├── ssh.pl
    │   │   │   ├── subset_data_dir.sh
    │   │   │   ├── subset_data_dir_tr_cv.sh
    │   │   │   ├── subset_scp.pl
    │   │   │   ├── subword
    │   │   │       ├── prepare_lang_subword.sh
    │   │   │       └── prepare_subword_text.sh
    │   │   │   ├── summarize_logs.pl
    │   │   │   ├── summarize_warnings.pl
    │   │   │   ├── sym2int.pl
    │   │   │   ├── train_arpa_with_kenlm.sh
    │   │   │   ├── utt2spk_to_spk2utt.pl
    │   │   │   ├── validate_data_dir.sh
    │   │   │   ├── validate_dict_dir.pl
    │   │   │   ├── validate_lang.pl
    │   │   │   ├── validate_text.pl
    │   │   │   └── write_kwslist.pl
    │   └── tokenizer
    │   │   ├── MimiCodec
    │   │       ├── __pycache__
    │   │       │   ├── mimi_tokenizer.cpython-310.pyc
    │   │       │   └── mimi_tokenizer.cpython-312.pyc
    │   │       ├── mimi_config.yaml
    │   │       ├── mimi_tokenizer.py
    │   │       └── model
    │   │       │   ├── models
    │   │       │       ├── MimiCodec.py
    │   │       │       ├── __init__.py
    │   │       │       └── __pycache__
    │   │       │       │   ├── MimiCodec.cpython-312.pyc
    │   │       │       │   └── __init__.cpython-312.pyc
    │   │       │   ├── modules
    │   │       │       ├── __init__.py
    │   │       │       ├── __pycache__
    │   │       │       │   ├── __init__.cpython-312.pyc
    │   │       │       │   ├── conv.cpython-312.pyc
    │   │       │       │   ├── gating.cpython-312.pyc
    │   │       │       │   ├── resample.cpython-312.pyc
    │   │       │       │   ├── rope.cpython-312.pyc
    │   │       │       │   ├── seanet.cpython-312.pyc
    │   │       │       │   ├── streaming.cpython-312.pyc
    │   │       │       │   └── transformer.cpython-312.pyc
    │   │       │       ├── conv.py
    │   │       │       ├── gating.py
    │   │       │       ├── resample.py
    │   │       │       ├── rope.py
    │   │       │       ├── seanet.py
    │   │       │       ├── streaming.py
    │   │       │       └── transformer.py
    │   │       │   ├── quantization
    │   │       │       ├── __init__.py
    │   │       │       ├── __pycache__
    │   │       │       │   ├── __init__.cpython-312.pyc
    │   │       │       │   ├── base.cpython-312.pyc
    │   │       │       │   ├── core_vq.cpython-312.pyc
    │   │       │       │   └── vq.cpython-312.pyc
    │   │       │       ├── base.py
    │   │       │       ├── core_vq.py
    │   │       │       └── vq.py
    │   │       │   └── utils
    │   │       │       ├── __init__.py
    │   │       │       ├── __pycache__
    │   │       │           ├── __init__.cpython-312.pyc
    │   │       │           └── compile.cpython-312.pyc
    │   │       │       └── compile.py
    │   │   ├── Text2ID
    │   │       ├── __pycache__
    │   │       │   ├── moshi_text_tokenizer.cpython-312.pyc
    │   │       │   └── text_tokenizer.cpython-38.pyc
    │   │       └── moshi_text_tokenizer.py
    │   │   ├── __pycache__
    │   │       ├── abs_tokenizer.cpython-310.pyc
    │   │       ├── abs_tokenizer.cpython-312.pyc
    │   │       ├── abs_tokenizer.cpython-38.pyc
    │   │       └── common.cpython-38.pyc
    │   │   ├── abs_tokenizer.py
    │   │   └── common.py
    ├── trainer
    │   ├── finetuning_full_ds.py
    │   ├── finetuning_full_fsdp.py
    │   └── finetuning_lora.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-310.pyc
    │       ├── __init__.cpython-312.pyc
    │       ├── abs_scheduler.cpython-312.pyc
    │       ├── arguments.cpython-312.pyc
    │       ├── compile.cpython-312.pyc
    │       ├── dataloader.cpython-310.pyc
    │       ├── dataloader.cpython-312.pyc
    │       ├── reporter.cpython-312.pyc
    │       ├── sampling.cpython-312.pyc
    │       ├── task_definition.cpython-312.pyc
    │       └── train_utils.cpython-312.pyc
    │   ├── abs_scheduler.py
    │   ├── arguments.py
    │   ├── autocast.py
    │   ├── compile.py
    │   ├── dataloader.py
    │   ├── reporter.py
    │   ├── sampling.py
    │   ├── task_definition.py
    │   └── train_utils.py
├── MLLM_v2
    ├── configs
    │   └── llama3.yaml
    ├── egs
    │   ├── extract_tokens
    │   │   ├── data_scripts
    │   │   │   ├── create_data_json.py
    │   │   │   └── offline_tokenization.py
    │   │   ├── get_wav.py
    │   │   ├── local
    │   │   │   ├── asr_whisperx.py
    │   │   │   ├── asr_whisperx_tar.py
    │   │   │   └── vad_segment.py
    │   │   ├── run.sh
    │   │   └── utils
    │   │   │   ├── run.pl
    │   │   │   └── split_scp.pl
    │   ├── moshi_ft
    │   │   ├── data_scripts
    │   │   │   ├── create_data_json.py
    │   │   │   └── offline_tokenization.py
    │   │   ├── local
    │   │   │   ├── asr_whisperx.py
    │   │   │   └── vad_segment.py
    │   │   ├── readme.md
    │   │   ├── run.sh
    │   │   └── utils
    │   │   │   ├── run.pl
    │   │   │   └── split_scp.pl
    │   └── pretraining
    │   │   ├── data_scripts
    │   │       ├── create_data_json.py
    │   │       ├── emilia
    │   │       │   ├── config.json
    │   │       │   ├── env.sh
    │   │       │   ├── main.py
    │   │       │   ├── models
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── dnsmos.py
    │   │       │   │   ├── separate_fast.py
    │   │       │   │   ├── silero_vad.py
    │   │       │   │   └── whisper_asr.py
    │   │       │   ├── requirements.txt
    │   │       │   └── utils
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── logger.py
    │   │       │   │   └── tool.py
    │   │       ├── filter_scp.py
    │   │       ├── offline_tokenization_tar.py
    │   │       ├── text_tokenization.py
    │   │       ├── text_tokenization_scp.py
    │   │       └── text_tokenization_utt2json.py
    │   │   ├── extract_token.sh
    │   │   ├── infer.sh
    │   │   ├── local
    │   │       ├── asr_whisperx.py
    │   │       ├── asr_whisperx_tar.py
    │   │       ├── offline_codec_tokenization.py
    │   │       └── vad_segment.py
    │   │   ├── path.sh
    │   │   ├── prepare_broadcast_data.sh
    │   │   ├── readme.md
    │   │   ├── run.sh
    │   │   └── utils
    │   │       ├── run.pl
    │   │       └── split_scp.pl
    ├── infer_no_streaming.py
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── __init__.cpython-312.pyc
    │   │   ├── __init__.cpython-39.pyc
    │   │   ├── config.cpython-310.pyc
    │   │   ├── config.cpython-312.pyc
    │   │   ├── config.cpython-39.pyc
    │   │   ├── lit_model.cpython-310.pyc
    │   │   ├── lit_model.cpython-312.pyc
    │   │   ├── lit_model.cpython-39.pyc
    │   │   ├── llama_streaming.cpython-310.pyc
    │   │   ├── llama_streaming.cpython-312.pyc
    │   │   ├── llama_streaming.cpython-39.pyc
    │   │   ├── llama_streaming_lora.cpython-310.pyc
    │   │   ├── mlp.cpython-310.pyc
    │   │   ├── mlp.cpython-312.pyc
    │   │   ├── mlp.cpython-39.pyc
    │   │   ├── model.cpython-310.pyc
    │   │   └── model.cpython-312.pyc
    │   ├── config.py
    │   ├── lit_model.py
    │   ├── llama_streaming.py
    │   ├── mlp.py
    │   ├── model.py
    │   ├── model_llama.py
    │   └── model_lora.py
    ├── modules
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-312.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── conv.cpython-312.pyc
    │   │   ├── conv.cpython-38.pyc
    │   │   ├── gating.cpython-312.pyc
    │   │   ├── resample.cpython-312.pyc
    │   │   ├── rope.cpython-312.pyc
    │   │   ├── seanet.cpython-312.pyc
    │   │   ├── streaming.cpython-312.pyc
    │   │   ├── streaming.cpython-38.pyc
    │   │   ├── transformer.cpython-312.pyc
    │   │   └── transformer_lora.cpython-312.pyc
    │   ├── conv.py
    │   ├── gating.py
    │   ├── resample.py
    │   ├── rope.py
    │   ├── seanet.py
    │   ├── streaming.py
    │   ├── transformer.py
    │   └── transformer_lora.py
    ├── moshi
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-312.pyc
    │   ├── client.py
    │   ├── client_utils.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   ├── compression.cpython-312.pyc
    │   │   │   ├── lm.cpython-312.pyc
    │   │   │   └── loaders.cpython-312.pyc
    │   │   ├── compression.py
    │   │   ├── lm.py
    │   │   └── loaders.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   ├── conv.cpython-312.pyc
    │   │   │   ├── gating.cpython-312.pyc
    │   │   │   ├── resample.cpython-312.pyc
    │   │   │   ├── rope.cpython-312.pyc
    │   │   │   ├── seanet.cpython-312.pyc
    │   │   │   ├── streaming.cpython-312.pyc
    │   │   │   └── transformer.cpython-312.pyc
    │   │   ├── conv.py
    │   │   ├── conv_test.py
    │   │   ├── gating.py
    │   │   ├── resample.py
    │   │   ├── rope.py
    │   │   ├── seanet.py
    │   │   ├── seanet_test.py
    │   │   ├── streaming.py
    │   │   └── transformer.py
    │   ├── quantization
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-312.pyc
    │   │   │   ├── base.cpython-312.pyc
    │   │   │   ├── core_vq.cpython-312.pyc
    │   │   │   └── vq.cpython-312.pyc
    │   │   ├── base.py
    │   │   ├── core_vq.py
    │   │   └── vq.py
    │   ├── server.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-312.pyc
    │   │       ├── compile.cpython-312.pyc
    │   │       └── sampling.cpython-312.pyc
    │   │   ├── autocast.py
    │   │   ├── compile.py
    │   │   └── sampling.py
    ├── readme.md
    ├── tools
    │   ├── data_scripts
    │   │   ├── create_data_json.py
    │   │   ├── filter_scp.py
    │   │   ├── find_peer_utts.py
    │   │   ├── merge_then_split.py
    │   │   ├── offline_tokenization.py
    │   │   └── select_spk2utt.py
    │   ├── kaldi
    │   │   └── utils
    │   │   │   ├── add_disambig.pl
    │   │   │   ├── add_lex_disambig.pl
    │   │   │   ├── analyze_segments.pl
    │   │   │   ├── apply_map.pl
    │   │   │   ├── best_wer.sh
    │   │   │   ├── build_const_arpa_lm.sh
    │   │   │   ├── build_kenlm_model_from_arpa.sh
    │   │   │   ├── combine_data.sh
    │   │   │   ├── convert_ctm.pl
    │   │   │   ├── convert_slf.pl
    │   │   │   ├── convert_slf_parallel.sh
    │   │   │   ├── copy_data_dir.sh
    │   │   │   ├── create_data_link.pl
    │   │   │   ├── create_split_dir.pl
    │   │   │   ├── ctm
    │   │   │       ├── convert_ctm.pl
    │   │   │       ├── fix_ctm.sh
    │   │   │       └── resolve_ctm_overlaps.py
    │   │   │   ├── data
    │   │   │       ├── combine_data.sh
    │   │   │       ├── combine_short_segments.sh
    │   │   │       ├── convert_data_dir_to_whole.sh
    │   │   │       ├── copy_data_dir.sh
    │   │   │       ├── extend_segment_times.py
    │   │   │       ├── extract_wav_segments_data_dir.sh
    │   │   │       ├── fix_data_dir.sh
    │   │   │       ├── fix_subsegment_feats.pl
    │   │   │       ├── get_allowed_durations.py
    │   │   │       ├── get_frame_shift.sh
    │   │   │       ├── get_num_frames.sh
    │   │   │       ├── get_reco2dur.sh
    │   │   │       ├── get_reco2utt_for_data.sh
    │   │   │       ├── get_segments_for_data.sh
    │   │   │       ├── get_uniform_subsegments.py
    │   │   │       ├── get_utt2dur.sh
    │   │   │       ├── get_utt2num_frames.sh
    │   │   │       ├── internal
    │   │   │       │   ├── choose_utts_to_combine.py
    │   │   │       │   ├── combine_segments_to_recording.py
    │   │   │       │   ├── modify_speaker_info.py
    │   │   │       │   └── perturb_volume.py
    │   │   │       ├── limit_feature_dim.sh
    │   │   │       ├── modify_speaker_info.sh
    │   │   │       ├── modify_speaker_info_to_recording.sh
    │   │   │       ├── normalize_data_range.pl
    │   │   │       ├── perturb_data_dir_speed.sh
    │   │   │       ├── perturb_data_dir_speed_3way.sh
    │   │   │       ├── perturb_data_dir_volume.sh
    │   │   │       ├── perturb_speed_to_allowed_lengths.py
    │   │   │       ├── remove_dup_utts.sh
    │   │   │       ├── resample_data_dir.sh
    │   │   │       ├── shift_and_combine_feats.sh
    │   │   │       ├── shift_feats.sh
    │   │   │       ├── split_data.sh
    │   │   │       ├── subsegment_data_dir.sh
    │   │   │       ├── subset_data_dir.sh
    │   │   │       └── validate_data_dir.sh
    │   │   │   ├── dict_dir_add_pronprobs.sh
    │   │   │   ├── eps2disambig.pl
    │   │   │   ├── filt.py
    │   │   │   ├── filter_scp.pl
    │   │   │   ├── filter_scps.pl
    │   │   │   ├── find_arpa_oovs.pl
    │   │   │   ├── fix_ctm.sh
    │   │   │   ├── fix_data_dir.sh
    │   │   │   ├── format_lm.sh
    │   │   │   ├── format_lm_sri.sh
    │   │   │   ├── gen_topo.pl
    │   │   │   ├── int2sym.pl
    │   │   │   ├── kwslist_post_process.pl
    │   │   │   ├── lang
    │   │   │       ├── add_lex_disambig.pl
    │   │   │       ├── add_unigrams_arpa.pl
    │   │   │       ├── adjust_unk_arpa.pl
    │   │   │       ├── adjust_unk_graph.sh
    │   │   │       ├── bpe
    │   │   │       │   ├── add_final_optional_silence.sh
    │   │   │       │   ├── apply_bpe.py
    │   │   │       │   ├── bidi.py
    │   │   │       │   ├── learn_bpe.py
    │   │   │       │   ├── prepend_words.py
    │   │   │       │   └── reverse.py
    │   │   │       ├── check_g_properties.pl
    │   │   │       ├── check_phones_compatible.sh
    │   │   │       ├── compute_sentence_probs_arpa.py
    │   │   │       ├── extend_lang.sh
    │   │   │       ├── get_word_position_phone_map.pl
    │   │   │       ├── grammar
    │   │   │       │   ├── augment_phones_txt.py
    │   │   │       │   └── augment_words_txt.py
    │   │   │       ├── internal
    │   │   │       │   ├── apply_unk_lm.sh
    │   │   │       │   ├── arpa2fst_constrained.py
    │   │   │       │   └── modify_unk_pron.py
    │   │   │       ├── limit_arpa_unk_history.py
    │   │   │       ├── make_kn_lm.py
    │   │   │       ├── make_lexicon_fst.py
    │   │   │       ├── make_lexicon_fst_silprob.py
    │   │   │       ├── make_phone_bigram_lang.sh
    │   │   │       ├── make_phone_lm.py
    │   │   │       ├── make_position_dependent_subword_lexicon.py
    │   │   │       ├── make_subword_lexicon_fst.py
    │   │   │       ├── make_unk_lm.sh
    │   │   │       ├── ngram_entropy_pruning.py
    │   │   │       ├── prepare_lang.sh
    │   │   │       ├── validate_disambig_sym_file.pl
    │   │   │       └── validate_lang.pl
    │   │   │   ├── ln.pl
    │   │   │   ├── make_absolute.sh
    │   │   │   ├── make_lexicon_fst.pl
    │   │   │   ├── make_lexicon_fst_silprob.pl
    │   │   │   ├── make_unigram_grammar.pl
    │   │   │   ├── map_arpa_lm.pl
    │   │   │   ├── mkgraph.sh
    │   │   │   ├── mkgraph_lookahead.sh
    │   │   │   ├── nnet-cpu
    │   │   │       ├── make_nnet_config.pl
    │   │   │       ├── make_nnet_config_block.pl
    │   │   │       ├── make_nnet_config_preconditioned.pl
    │   │   │       └── update_learning_rates.pl
    │   │   │   ├── nnet
    │   │   │       ├── gen_dct_mat.py
    │   │   │       ├── gen_hamm_mat.py
    │   │   │       ├── gen_splice.py
    │   │   │       ├── make_blstm_proto.py
    │   │   │       ├── make_cnn_proto.py
    │   │   │       ├── make_lstm_proto.py
    │   │   │       ├── make_nnet_proto.py
    │   │   │       └── subset_data_tr_cv.sh
    │   │   │   ├── nnet3
    │   │   │       └── convert_config_tdnn_to_affine.py
    │   │   │   ├── parallel
    │   │   │       ├── limit_num_gpus.sh
    │   │   │       ├── pbs.pl
    │   │   │       ├── queue.pl
    │   │   │       ├── retry.pl
    │   │   │       ├── run.pl
    │   │   │       └── slurm.pl
    │   │   │   ├── parse_options.sh
    │   │   │   ├── pbs.pl
    │   │   │   ├── perturb_data_dir_speed.sh
    │   │   │   ├── pinyin_map.pl
    │   │   │   ├── prepare_extended_lang.sh
    │   │   │   ├── prepare_lang.sh
    │   │   │   ├── prepare_online_nnet_dist_build.sh
    │   │   │   ├── queue.pl
    │   │   │   ├── remove_data_links.sh
    │   │   │   ├── remove_oovs.pl
    │   │   │   ├── require_argument.sh
    │   │   │   ├── require_argument_all.sh
    │   │   │   ├── retry.pl
    │   │   │   ├── reverse_arpa.py
    │   │   │   ├── rnnlm_compute_scores.sh
    │   │   │   ├── run.pl
    │   │   │   ├── s2eps.pl
    │   │   │   ├── scoring
    │   │   │       ├── wer_ops_details.pl
    │   │   │       ├── wer_per_spk_details.pl
    │   │   │       ├── wer_per_utt_details.pl
    │   │   │       └── wer_report.pl
    │   │   │   ├── segmentation.pl
    │   │   │   ├── show_lattice.sh
    │   │   │   ├── shuffle_list.pl
    │   │   │   ├── slurm.pl
    │   │   │   ├── spk2utt_to_utt2spk.pl
    │   │   │   ├── split_data.sh
    │   │   │   ├── split_scp.pl
    │   │   │   ├── ssh.pl
    │   │   │   ├── subset_data_dir.sh
    │   │   │   ├── subset_data_dir_tr_cv.sh
    │   │   │   ├── subset_scp.pl
    │   │   │   ├── subword
    │   │   │       ├── prepare_lang_subword.sh
    │   │   │       └── prepare_subword_text.sh
    │   │   │   ├── summarize_logs.pl
    │   │   │   ├── summarize_warnings.pl
    │   │   │   ├── sym2int.pl
    │   │   │   ├── train_arpa_with_kenlm.sh
    │   │   │   ├── utt2spk_to_spk2utt.pl
    │   │   │   ├── validate_data_dir.sh
    │   │   │   ├── validate_dict_dir.pl
    │   │   │   ├── validate_lang.pl
    │   │   │   ├── validate_text.pl
    │   │   │   └── write_kwslist.pl
    │   └── tokenizer
    │   │   ├── GLM4V
    │   │       ├── __init__.py
    │   │       ├── configuration_whisper.py
    │   │       ├── cosyvoice
    │   │       │   ├── __init__.py
    │   │       │   ├── __pycache__
    │   │       │   │   ├── __init__.cpython-310.pyc
    │   │       │   │   └── __init__.cpython-312.pyc
    │   │       │   ├── bin
    │   │       │   │   ├── inference.py
    │   │       │   │   └── train.py
    │   │       │   ├── cli
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── cosyvoice.py
    │   │       │   │   ├── frontend.py
    │   │       │   │   └── model.py
    │   │       │   ├── dataset
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── dataset.py
    │   │       │   │   └── processor.py
    │   │       │   ├── flow
    │   │       │   │   ├── __pycache__
    │   │       │   │   │   ├── decoder.cpython-310.pyc
    │   │       │   │   │   ├── decoder.cpython-312.pyc
    │   │       │   │   │   ├── flow.cpython-310.pyc
    │   │       │   │   │   ├── flow.cpython-312.pyc
    │   │       │   │   │   ├── flow_matching.cpython-310.pyc
    │   │       │   │   │   ├── flow_matching.cpython-312.pyc
    │   │       │   │   │   ├── length_regulator.cpython-310.pyc
    │   │       │   │   │   └── length_regulator.cpython-312.pyc
    │   │       │   │   ├── decoder.py
    │   │       │   │   ├── flow.py
    │   │       │   │   ├── flow_gradtts.py
    │   │       │   │   ├── flow_matching.py
    │   │       │   │   ├── flow_matching_dit.py
    │   │       │   │   ├── length_regulator.py
    │   │       │   │   └── stable
    │   │       │   │   │   ├── adp.py
    │   │       │   │   │   ├── blocks.py
    │   │       │   │   │   ├── dit.py
    │   │       │   │   │   ├── dit_v2.py
    │   │       │   │   │   ├── sampling.py
    │   │       │   │   │   ├── stable_diffusion.py
    │   │       │   │   │   ├── stable_diffusion_test.py
    │   │       │   │   │   ├── transformer.py
    │   │       │   │   │   └── transformer_use_mask.py
    │   │       │   ├── hifigan
    │   │       │   │   ├── __pycache__
    │   │       │   │   │   ├── f0_predictor.cpython-310.pyc
    │   │       │   │   │   ├── f0_predictor.cpython-312.pyc
    │   │       │   │   │   ├── generator.cpython-310.pyc
    │   │       │   │   │   └── generator.cpython-312.pyc
    │   │       │   │   ├── f0_predictor.py
    │   │       │   │   └── generator.py
    │   │       │   ├── llm
    │   │       │   │   ├── __pycache__
    │   │       │   │   │   ├── llm.cpython-310.pyc
    │   │       │   │   │   └── llm.cpython-312.pyc
    │   │       │   │   └── llm.py
    │   │       │   ├── transformer
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── __pycache__
    │   │       │   │   │   ├── __init__.cpython-310.pyc
    │   │       │   │   │   ├── __init__.cpython-312.pyc
    │   │       │   │   │   ├── activation.cpython-310.pyc
    │   │       │   │   │   ├── activation.cpython-312.pyc
    │   │       │   │   │   ├── attention.cpython-310.pyc
    │   │       │   │   │   ├── attention.cpython-312.pyc
    │   │       │   │   │   ├── convolution.cpython-310.pyc
    │   │       │   │   │   ├── convolution.cpython-312.pyc
    │   │       │   │   │   ├── embedding.cpython-310.pyc
    │   │       │   │   │   ├── embedding.cpython-312.pyc
    │   │       │   │   │   ├── encoder.cpython-310.pyc
    │   │       │   │   │   ├── encoder.cpython-312.pyc
    │   │       │   │   │   ├── encoder_layer.cpython-310.pyc
    │   │       │   │   │   ├── encoder_layer.cpython-312.pyc
    │   │       │   │   │   ├── label_smoothing_loss.cpython-310.pyc
    │   │       │   │   │   ├── label_smoothing_loss.cpython-312.pyc
    │   │       │   │   │   ├── positionwise_feed_forward.cpython-310.pyc
    │   │       │   │   │   ├── positionwise_feed_forward.cpython-312.pyc
    │   │       │   │   │   ├── subsampling.cpython-310.pyc
    │   │       │   │   │   └── subsampling.cpython-312.pyc
    │   │       │   │   ├── activation.py
    │   │       │   │   ├── attention.py
    │   │       │   │   ├── convolution.py
    │   │       │   │   ├── decoder.py
    │   │       │   │   ├── decoder_layer.py
    │   │       │   │   ├── embedding.py
    │   │       │   │   ├── encoder.py
    │   │       │   │   ├── encoder_layer.py
    │   │       │   │   ├── label_smoothing_loss.py
    │   │       │   │   ├── positionwise_feed_forward.py
    │   │       │   │   └── subsampling.py
    │   │       │   └── utils
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── __pycache__
    │   │       │   │       ├── __init__.cpython-310.pyc
    │   │       │   │       ├── __init__.cpython-312.pyc
    │   │       │   │       ├── block_mask_util.cpython-310.pyc
    │   │       │   │       ├── block_mask_util.cpython-312.pyc
    │   │       │   │       ├── class_utils.cpython-310.pyc
    │   │       │   │       ├── class_utils.cpython-312.pyc
    │   │       │   │       ├── common.cpython-310.pyc
    │   │       │   │       ├── common.cpython-312.pyc
    │   │       │   │       ├── mask.cpython-310.pyc
    │   │       │   │       └── mask.cpython-312.pyc
    │   │       │   │   ├── block_mask_util.py
    │   │       │   │   ├── class_utils.py
    │   │       │   │   ├── common.py
    │   │       │   │   ├── executor.py
    │   │       │   │   ├── file_utils.py
    │   │       │   │   ├── frontend_utils.py
    │   │       │   │   ├── mask.py
    │   │       │   │   ├── scheduler.py
    │   │       │   │   └── train_utils.py
    │   │       ├── flow_inference.py
    │   │       ├── generation_whisper.py
    │   │       ├── modeling_whisper.py
    │   │       ├── semantic.py
    │   │       ├── third_party
    │   │       │   └── Matcha-TTS
    │   │       │   │   ├── .env.example
    │   │       │   │   ├── .github
    │   │       │   │       ├── PULL_REQUEST_TEMPLATE.md
    │   │       │   │       ├── codecov.yml
    │   │       │   │       ├── dependabot.yml
    │   │       │   │       └── release-drafter.yml
    │   │       │   │   ├── .gitignore
    │   │       │   │   ├── .pre-commit-config.yaml
    │   │       │   │   ├── .project-root
    │   │       │   │   ├── .pylintrc
    │   │       │   │   ├── LICENSE
    │   │       │   │   ├── MANIFEST.in
    │   │       │   │   ├── Makefile
    │   │       │   │   ├── README.md
    │   │       │   │   ├── configs
    │   │       │   │       ├── __init__.py
    │   │       │   │       ├── callbacks
    │   │       │   │       │   ├── default.yaml
    │   │       │   │       │   ├── model_checkpoint.yaml
    │   │       │   │       │   ├── model_summary.yaml
    │   │       │   │       │   ├── none.yaml
    │   │       │   │       │   └── rich_progress_bar.yaml
    │   │       │   │       ├── data
    │   │       │   │       │   ├── hi-fi_en-US_female.yaml
    │   │       │   │       │   ├── ljspeech.yaml
    │   │       │   │       │   └── vctk.yaml
    │   │       │   │       ├── debug
    │   │       │   │       │   ├── default.yaml
    │   │       │   │       │   ├── fdr.yaml
    │   │       │   │       │   ├── limit.yaml
    │   │       │   │       │   ├── overfit.yaml
    │   │       │   │       │   └── profiler.yaml
    │   │       │   │       ├── eval.yaml
    │   │       │   │       ├── experiment
    │   │       │   │       │   ├── hifi_dataset_piper_phonemizer.yaml
    │   │       │   │       │   ├── ljspeech.yaml
    │   │       │   │       │   ├── ljspeech_min_memory.yaml
    │   │       │   │       │   └── multispeaker.yaml
    │   │       │   │       ├── extras
    │   │       │   │       │   └── default.yaml
    │   │       │   │       ├── hparams_search
    │   │       │   │       │   └── mnist_optuna.yaml
    │   │       │   │       ├── hydra
    │   │       │   │       │   └── default.yaml
    │   │       │   │       ├── local
    │   │       │   │       │   └── .gitkeep
    │   │       │   │       ├── logger
    │   │       │   │       │   ├── aim.yaml
    │   │       │   │       │   ├── comet.yaml
    │   │       │   │       │   ├── csv.yaml
    │   │       │   │       │   ├── many_loggers.yaml
    │   │       │   │       │   ├── mlflow.yaml
    │   │       │   │       │   ├── neptune.yaml
    │   │       │   │       │   ├── tensorboard.yaml
    │   │       │   │       │   └── wandb.yaml
    │   │       │   │       ├── model
    │   │       │   │       │   ├── cfm
    │   │       │   │       │   │   └── default.yaml
    │   │       │   │       │   ├── decoder
    │   │       │   │       │   │   └── default.yaml
    │   │       │   │       │   ├── encoder
    │   │       │   │       │   │   └── default.yaml
    │   │       │   │       │   ├── matcha.yaml
    │   │       │   │       │   └── optimizer
    │   │       │   │       │   │   └── adam.yaml
    │   │       │   │       ├── paths
    │   │       │   │       │   └── default.yaml
    │   │       │   │       ├── train.yaml
    │   │       │   │       └── trainer
    │   │       │   │       │   ├── cpu.yaml
    │   │       │   │       │   ├── ddp.yaml
    │   │       │   │       │   ├── ddp_sim.yaml
    │   │       │   │       │   ├── default.yaml
    │   │       │   │       │   ├── gpu.yaml
    │   │       │   │       │   └── mps.yaml
    │   │       │   │   ├── matcha
    │   │       │   │       ├── VERSION
    │   │       │   │       ├── __init__.py
    │   │       │   │       ├── app.py
    │   │       │   │       ├── cli.py
    │   │       │   │       ├── data
    │   │       │   │       │   ├── __init__.py
    │   │       │   │       │   ├── components
    │   │       │   │       │   │   └── __init__.py
    │   │       │   │       │   └── text_mel_datamodule.py
    │   │       │   │       ├── hifigan
    │   │       │   │       │   ├── LICENSE
    │   │       │   │       │   ├── README.md
    │   │       │   │       │   ├── __init__.py
    │   │       │   │       │   ├── config.py
    │   │       │   │       │   ├── denoiser.py
    │   │       │   │       │   ├── env.py
    │   │       │   │       │   ├── meldataset.py
    │   │       │   │       │   ├── models.py
    │   │       │   │       │   └── xutils.py
    │   │       │   │       ├── models
    │   │       │   │       │   ├── __init__.py
    │   │       │   │       │   ├── baselightningmodule.py
    │   │       │   │       │   ├── components
    │   │       │   │       │   │   ├── __init__.py
    │   │       │   │       │   │   ├── decoder.py
    │   │       │   │       │   │   ├── flow_matching.py
    │   │       │   │       │   │   ├── text_encoder.py
    │   │       │   │       │   │   └── transformer.py
    │   │       │   │       │   └── matcha_tts.py
    │   │       │   │       ├── onnx
    │   │       │   │       │   ├── __init__.py
    │   │       │   │       │   ├── export.py
    │   │       │   │       │   └── infer.py
    │   │       │   │       ├── text
    │   │       │   │       │   ├── __init__.py
    │   │       │   │       │   ├── cleaners.py
    │   │       │   │       │   ├── numbers.py
    │   │       │   │       │   └── symbols.py
    │   │       │   │       ├── train.py
    │   │       │   │       └── utils
    │   │       │   │       │   ├── __init__.py
    │   │       │   │       │   ├── audio.py
    │   │       │   │       │   ├── generate_data_statistics.py
    │   │       │   │       │   ├── instantiators.py
    │   │       │   │       │   ├── logging_utils.py
    │   │       │   │       │   ├── model.py
    │   │       │   │       │   ├── monotonic_align
    │   │       │   │       │       ├── __init__.py
    │   │       │   │       │       ├── core.pyx
    │   │       │   │       │       └── setup.py
    │   │       │   │       │   ├── pylogger.py
    │   │       │   │       │   ├── rich_utils.py
    │   │       │   │       │   └── utils.py
    │   │       │   │   ├── notebooks
    │   │       │   │       └── .gitkeep
    │   │       │   │   ├── pyproject.toml
    │   │       │   │   ├── requirements.txt
    │   │       │   │   ├── scripts
    │   │       │   │       └── schedule.sh
    │   │       │   │   ├── setup.py
    │   │       │   │   └── synthesis.ipynb
    │   │       └── utils.py
    │   │   ├── MimiCodec
    │   │       ├── mimi_config.yaml
    │   │       ├── mimi_tokenizer.py
    │   │       └── model
    │   │       │   ├── models
    │   │       │       ├── MimiCodec.py
    │   │       │       └── __init__.py
    │   │       │   ├── modules
    │   │       │       ├── __init__.py
    │   │       │       ├── conv.py
    │   │       │       ├── gating.py
    │   │       │       ├── resample.py
    │   │       │       ├── rope.py
    │   │       │       ├── seanet.py
    │   │       │       ├── streaming.py
    │   │       │       └── transformer.py
    │   │       │   ├── quantization
    │   │       │       ├── __init__.py
    │   │       │       ├── base.py
    │   │       │       ├── core_vq.py
    │   │       │       └── vq.py
    │   │       │   └── utils
    │   │       │       ├── __init__.py
    │   │       │       └── compile.py
    │   │   ├── Text2ID
    │   │       ├── moshi_text_tokenizer.py
    │   │       └── text_tokenizer.py
    │   │   ├── abs_tokenizer.py
    │   │   └── common.py
    ├── trainer
    │   ├── pre_training_full.py
    │   ├── pre_training_lora.py
    │   └── pre_training_lora_ds.py
    └── utils
    │   ├── __init__.py
    │   ├── abs_scheduler.py
    │   ├── arguments.py
    │   ├── autocast.py
    │   ├── compile.py
    │   ├── dataloader.py
    │   ├── reporter.py
    │   ├── sampling.py
    │   ├── task_definition.py
    │   └── train_utils.py
├── RSTnet.pdf
├── RSTnet.png
├── demos
    ├── .DS_Store
    └── tts
    │   ├── setence_level_text_audio_interleaved_1272-128104-0006_sample.wav
    │   ├── setence_level_text_audio_interleaved_1272-141231-0011_sample.wav
    │   ├── setence_level_text_audio_interleaved_174-168635-0014_sample.wav
    │   ├── setence_level_text_audio_interleaved_251-137823-0008_sample.wav
    │   ├── setence_level_text_audio_interleaved_652-129742-0018_sample.wav
    │   └── setence_level_text_audio_interleaved_777-126732-0080_sample.wav
└── readme.md


/.gitignore:
--------------------------------------------------------------------------------
 1 | **/__pycache__/
 2 | __pycache__/
 3 | *.pyc
 4 | processed/
 5 | speech_data/
 6 | *.pt
 7 | data/
 8 | ckpts/
 9 | debug_data/
10 | debug_data_processed/


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/dataloaders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/dataloaders/__init__.py


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/dataloaders/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/dataloaders/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/dataloaders/__pycache__/base_dataloader.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/dataloaders/__pycache__/base_dataloader.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__init__.py


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/basic_loss.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/basic_loss.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/basic_loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/basic_loss.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/basic_loss.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/basic_loss.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/basic_loss.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/basic_loss.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/discriminator_loss.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/discriminator_loss.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/discriminator_loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/discriminator_loss.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/discriminator_loss.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/discriminator_loss.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/discriminator_loss.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/discriminator_loss.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/enh_loss.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/enh_loss.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/enh_loss.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/enh_loss.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/enh_loss.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/enh_loss.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/generator_loss.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/generator_loss.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/generator_loss.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/generator_loss.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/generator_loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/generator_loss.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/generator_loss.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/generator_loss.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/losses/__pycache__/generator_loss.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/losses/__pycache__/generator_loss.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/models/__init__.py


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/models/__pycache__/MimiCodec.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/models/__pycache__/MimiCodec.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/models/__pycache__/MimiCodec.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/models/__pycache__/MimiCodec.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/models/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/models/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Kyutai, all rights reserved.
 2 | # This source code is licensed under the license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 6 | # All rights reserved.
 7 | #
 8 | # This source code is licensed under the license found in the
 9 | # LICENSE file in the root directory of this source tree.
10 | """Modules used for building the models."""
11 | 
12 | # flake8: noqa
13 | from .conv import (
14 |     NormConv1d,
15 |     NormConvTranspose1d,
16 |     StreamingConv1d,
17 |     StreamingConvTranspose1d,
18 |     pad_for_conv1d,
19 |     pad1d,
20 |     unpad1d,
21 | )
22 | from .seanet import SEANetEncoder, SEANetDecoder
23 | from .transformer import StreamingTransformer
24 | 


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/conv.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/conv.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/conv.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/conv.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/conv.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/conv.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/conv.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/conv.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/conv.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/conv.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/gating.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/gating.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/lstm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/lstm.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/lstm.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/lstm.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/lstm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/lstm.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/lstm.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/lstm.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/norm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/norm.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/norm.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/norm.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/norm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/norm.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/norm.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/norm.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/resample.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/resample.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/rope.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/rope.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/seanet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/seanet.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/seanet.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/seanet.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/seanet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/seanet.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/seanet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/seanet.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/seanet.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/seanet.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/streaming.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/streaming.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/streaming.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/streaming.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/transformer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/transformer.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/transformer.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/transformer.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/transformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/transformer.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/transformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/transformer.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/__pycache__/transformer.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/__pycache__/transformer.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__init__.py


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/base_layers.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/base_layers.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/base_layers.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/base_layers.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/base_layers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/base_layers.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/base_layers.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/base_layers.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/base_layers.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/base_layers.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/ops.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/ops.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/ops.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/ops.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/ops.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/ops.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/ops.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/ops.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/ops.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/ops.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/pqmf.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/pqmf.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/pqmf.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/pqmf.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/pqmf.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/pqmf.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/pqmf.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/pqmf.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/pqmf.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/pqmf.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/torch_stft.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/torch_stft.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/torch_stft.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/torch_stft.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/torch_stft.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/torch_stft.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/torch_stft.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/torch_stft.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/commons/__pycache__/torch_stft.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/commons/__pycache__/torch_stft.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/discriminators/__init__.py:
--------------------------------------------------------------------------------
1 | from modules.discriminators.frequency_discriminator import MultiFrequencyDiscriminator
2 | from modules.discriminators.period_discriminator import MultiPeriodDiscriminator
3 | from modules.discriminators.scale_discriminator import MultiScaleDiscriminator
4 | 


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/discriminators/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/discriminators/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/discriminators/__pycache__/frequency_discriminator.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/discriminators/__pycache__/frequency_discriminator.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/discriminators/__pycache__/period_discriminator.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/discriminators/__pycache__/period_discriminator.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/modules/discriminators/__pycache__/scale_discriminator.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/modules/discriminators/__pycache__/scale_discriminator.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/path.sh:
--------------------------------------------------------------------------------
1 | export LC_ALL=C
2 | export PYTHONIOENCODING=UTF-8
3 | export OMP_NUM_THREADS=1
4 | 
5 | # python import root
6 | export PYTHONPATH=${PYTHONPATH}:./
7 | 


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/quantization/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Kyutai, all rights reserved.
 2 | # This source code is licensed under the license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 6 | # All rights reserved.
 7 | #
 8 | # This source code is licensed under the license found in the
 9 | # LICENSE file in the root directory of this source tree.
10 | """RVQ."""
11 | # flake8: noqa
12 | from .vq import ResidualVectorQuantizer, SplitResidualVectorQuantizer
13 | from .base import BaseQuantizer, DummyQuantizer, QuantizedResult
14 | 


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/quantization/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/quantization/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/quantization/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/quantization/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/quantization/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/quantization/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/quantization/__pycache__/base.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/quantization/__pycache__/base.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/quantization/__pycache__/core_vq.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/quantization/__pycache__/core_vq.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/quantization/__pycache__/core_vq.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/quantization/__pycache__/core_vq.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/quantization/__pycache__/vq.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/quantization/__pycache__/vq.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/quantization/__pycache__/vq.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/quantization/__pycache__/vq.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/quantization/__pycache__/vq_dc.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/quantization/__pycache__/vq_dc.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/quantization/__pycache__/vq_dc.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/quantization/__pycache__/vq_dc.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/semantic_features/__init__.py:
--------------------------------------------------------------------------------
1 | from semantic_features.wavlm_feature import WavLMFeature
2 | from semantic_features.WavLM import WavLM, WavLMConfig
3 | from semantic_features.hubert_feature import HuBertFeature


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/semantic_features/__pycache__/WavLM.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/semantic_features/__pycache__/WavLM.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/semantic_features/__pycache__/WavLM.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/semantic_features/__pycache__/WavLM.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/semantic_features/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/semantic_features/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/semantic_features/__pycache__/modules.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/semantic_features/__pycache__/modules.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/semantic_features/__pycache__/modules.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/semantic_features/__pycache__/modules.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/semantic_features/__pycache__/wavlm_feature.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/semantic_features/__pycache__/wavlm_feature.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/semantic_features/hubert_feature.py:
--------------------------------------------------------------------------------
 1 | from transformers import HubertModel, Wav2Vec2Processor, Wav2Vec2FeatureExtractor
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | class HuBertFeature(nn.Module):
 6 |     def __init__(self, ckpt_path, device='cpu'):
 7 |         super(HuBertFeature, self).__init__()
 8 |         self.processor = Wav2Vec2FeatureExtractor.from_pretrained(ckpt_path)
 9 |         self.model = HubertModel.from_pretrained(ckpt_path)
10 |         self.model.eval()
11 |         self.model = self.model.to(device)
12 |         self.device = device
13 |         self.freeze()
14 | 
15 |     def freeze(self):
16 |         for param in self.model.parameters():
17 |             param.requires_grad = False
18 | 
19 |     def extract(self, x):
20 |         """
21 |         Extract features from HuBert model
22 |         Input: <B, T>
23 |         Output: <B, T/320, D>
24 |         """
25 |         if len(x.size()) == 3:
26 |             x = x.squeeze(1) # from (B,1,T) ---> (B, T)
27 |         assert len(x.size()) == 2
28 | 
29 |         with torch.no_grad():
30 |             outputs = self.model(x)
31 |             last_hidden_state = outputs['last_hidden_state'].to(torch.float32)  # (B, ssl_dim, T)
32 |         return last_hidden_state
33 | 


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/semantic_features/w2vec2bert_feature.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/semantic_features/w2vec2bert_feature.py


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/semantic_features/wavlm_feature.py:
--------------------------------------------------------------------------------
 1 | """
 2 | this code aims to extract semantic features from pre-trained WavLM model
 3 | """
 4 | import torch
 5 | from semantic_features.WavLM import WavLM, WavLMConfig
 6 | import torchaudio
 7 | import torch.nn as nn
 8 | 
 9 | class WavLMFeature(nn.Module):
10 |     def __init__(self, ckpt_path, device='cpu'):
11 |         super().__init__()
12 |         checkpoint = torch.load(ckpt_path)
13 |         self.cfg = WavLMConfig(checkpoint['cfg'])
14 |         self.model = WavLM(self.cfg)
15 |         self.model.load_state_dict(checkpoint['model'])
16 |         self.model.eval()
17 |         self.model = self.model.to(device)
18 |         self.device = device
19 |         self.freeze()
20 | 
21 |     def freeze(self):
22 |         for param in self.model.parameters():
23 |             param.requires_grad = False
24 | 
25 |     def extract(self, x):
26 |         """
27 |         extract the feature from last layer of wavlm
28 |         input: <B, T>
29 |         output: <B, T/320, D>
30 |         """
31 |         if len(x.size()) == 3:
32 |             x = x.squeeze(1) # from (B,1,T) ---> (B, T)
33 |         assert len(x.size()) == 2
34 |         #x = torch.cat([x, torch.zeros(x.shape[0], 320).to(x.device)], dim=1)
35 |         if self.cfg.normalize:
36 |             wav_input_16khz = torch.nn.functional.layer_norm(x.to(self.device) , x.shape)
37 |         rep = self.model.extract_features(wav_input_16khz)[0]
38 |         return rep
39 | 
40 | 
41 |     


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/semantic_features/whisper_feature.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/semantic_features/whisper_feature.py


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__init__.py


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/compile.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/compile.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/ddp_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/ddp_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/ddp_utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/ddp_utils.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/hifigan_mel.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/hifigan_mel.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/hifigan_mel.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/hifigan_mel.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/hifigan_mel.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/hifigan_mel.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/hifigan_mel.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/hifigan_mel.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/hifigan_mel.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/hifigan_mel.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/utils.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/utils.cpython-312.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/AudioCodec/MimiCodec/utils/__pycache__/utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/AudioCodec/MimiCodec/utils/__pycache__/utils.cpython-39.pyc


--------------------------------------------------------------------------------
/AudioCodec/readme.md:
--------------------------------------------------------------------------------
1 | ## Streaming Audio Codec Model
2 | In this part, we introduce the training code for streaming audio codec. Now, we support to train the SOTA codec, MimiCodec. <br> 
3 | We plan to add more advanced streaming codec model in the future
4 | 
5 | - [x] The training and inference code for MimiCodec.
6 | - [ ] Support other advanced streaming codec.
7 | 


--------------------------------------------------------------------------------
/DataPipeline/readme.md:
--------------------------------------------------------------------------------
1 | ## Large-scale data processing pipeline
2 | In this part, we introduce how to collect large-scale dataset for MLLM training. We mainly care about two types of audio data:
3 | 
4 | - [ ] Single-streaming speech data (TTS level speech data) <br>
5 | - [ ] Two-streaming or Multi-streaming speech data (conversation speech data)
6 | 
7 | In this version, we only provide a data preprocessing pipeline for pre-collected multi-stream speech data (i.e. Fisher), and it is temporarily integrated in [MLLM/egs/moshi/ft/readme.md](../MLLM/egs/moshi_ft/readme.md)


--------------------------------------------------------------------------------
/Evaluation/codec/compute_dnsmos.sh:
--------------------------------------------------------------------------------
1 | # DNSMOS is a reference free evaluation metrix
2 | audio_path='' 
3 | cd DNS-Challenge/DNSMOS
4 | python dnsmos_local.py -t $audio_path -o output.csv -p
5 | 
6 | 


--------------------------------------------------------------------------------
/Evaluation/readme.md:
--------------------------------------------------------------------------------
1 | ## Evaluation and Benchmark dataset
2 | We provide the evaluation metrics and evaluation dataset for audio codec and speech-text models.
3 | 
4 | - [x] Audio Codec Evaluation Metrics
5 | - [ ] Audio Codec evaluation benchmark dataset
6 | - [ ] Speech-text foundation model evaluation metrics
7 | - [ ] Speech-text foundation model benchmark dataset
8 | 
9 | 


--------------------------------------------------------------------------------
/MLLM/models/__pycache__/model.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/models/__pycache__/model.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Kyutai, all rights reserved.
 2 | # This source code is licensed under the license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 6 | # All rights reserved.
 7 | #
 8 | # This source code is licensed under the license found in the
 9 | # LICENSE file in the root directory of this source tree.
10 | """Modules used for building the models."""
11 | 
12 | # flake8: noqa
13 | from modules.conv import (
14 |     NormConv1d,
15 |     NormConvTranspose1d,
16 |     StreamingConv1d,
17 |     StreamingConvTranspose1d,
18 |     pad_for_conv1d,
19 |     pad1d,
20 |     unpad1d,
21 | )
22 | from modules.seanet import SEANetEncoder, SEANetDecoder
23 | from modules.transformer import StreamingTransformer
24 | 


--------------------------------------------------------------------------------
/MLLM/modules/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/modules/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/modules/__pycache__/conv.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/modules/__pycache__/conv.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/modules/__pycache__/gating.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/modules/__pycache__/gating.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/modules/__pycache__/resample.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/modules/__pycache__/resample.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/modules/__pycache__/rope.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/modules/__pycache__/rope.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/modules/__pycache__/seanet.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/modules/__pycache__/seanet.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/modules/__pycache__/streaming.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/modules/__pycache__/streaming.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/modules/__pycache__/transformer.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/modules/__pycache__/transformer.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/modules/__pycache__/transformer_lora.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/modules/__pycache__/transformer_lora.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/readme.md:
--------------------------------------------------------------------------------
1 | ## Multi-modal LLM (speech-text foundation models)
2 | In this part, we provide the training details of speech-text foundation models. We will includes:
3 | 
4 | - [x] Moshi finetuning code, including full-parameter finetuning and LORA finetuning
5 | - [ ] Moshi pre-training and post-training code
6 | - [ ] More advanced speech-text foundation model (by ourselves)
7 | 
8 | 


--------------------------------------------------------------------------------
/MLLM/tools/data_scripts/filter_scp.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | ref_f = sys.argv[1]
 4 | in_f = sys.argv[2]
 5 | try:
 6 |     writer = open(sys.argv[3], 'w', encoding='utf-8')
 7 |     stream_out = False
 8 | except:
 9 |     stream_out = True 
10 | 
11 | # output is in the order of ref_f
12 | ref = []
13 | for line in open(ref_f, encoding='utf-8'):
14 |     uttid = line.strip().split()[0]
15 |     ref.append(uttid)
16 | 
17 | in_dic = {}
18 | for line in open(in_f, encoding='utf-8'):
19 |     elems = line.strip().split()
20 |     uttid = elems[0]
21 |     ctx = " ".join(elems[1:])
22 |     in_dic[uttid] = ctx
23 | 
24 | for e in ref:
25 |     if e in in_dic:
26 |         if stream_out:
27 |             print(f"{e} {in_dic[e]}")
28 |         else:
29 |             writer.write(f"{e} {in_dic[e]}\n")
30 | 


--------------------------------------------------------------------------------
/MLLM/tools/data_scripts/select_spk2utt.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import argparse
 3 | 
 4 | def get_parser():
 5 |     parser = argparse.ArgumentParser(
 6 |         description="Revise the spk2utt file: it only contans a subset of the utts",
 7 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
 8 |     )
 9 |     parser.add_argument("--in-spk2utt", type=str, help="original spk2utt file")
10 |     parser.add_argument("--out-spk2utt", type=str, help="revised spk2utt file")
11 |     parser.add_argument("--subset-list", type=str, help="list of utt subset")
12 |     return parser
13 | 
14 | def main(args):
15 |     args = get_parser().parse_args(args)
16 | 
17 |     utts = open(args.subset_list).readlines()
18 |     utts = [line.strip().split()[0] for line in utts]
19 |     utts = {x: None for x in utts}
20 | 
21 |     writer = open(args.out_spk2utt, 'w') 
22 |     for line in open(args.in_spk2utt):
23 |         line = line.strip().split()
24 |         spk_id, spk_utts = line[0], line[1:]
25 |         spk_utts = [utt for utt in spk_utts if utt in utts]
26 | 
27 |         out_str = " ".join([spk_id] + spk_utts)
28 |         writer.write(out_str + "\n")
29 | 
30 | if __name__ == "__main__":
31 |     main(sys.argv[1:])
32 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/build_const_arpa_lm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2014  Guoguo Chen
 4 | # Apache 2.0
 5 | 
 6 | # This script reads in an Arpa format language model, and converts it into the
 7 | # ConstArpaLm format language model.
 8 | 
 9 | # begin configuration section
10 | # end configuration section
11 | 
12 | [ -f path.sh ] && . ./path.sh;
13 | 
14 | . utils/parse_options.sh
15 | 
16 | if [ $# != 3 ]; then
17 |   echo "Usage: "
18 |   echo "  $0 [options] <arpa-lm-path> <old-lang-dir> <new-lang-dir>"
19 |   echo "e.g.:"
20 |   echo "  $0 data/local/lm/3-gram.full.arpa.gz data/lang/ data/lang_test_tgmed"
21 |   echo "Options"
22 |   exit 1;
23 | fi
24 | 
25 | export LC_ALL=C
26 | 
27 | arpa_lm=$1
28 | old_lang=$2
29 | new_lang=$3
30 | 
31 | mkdir -p $new_lang
32 | 
33 | mkdir -p $new_lang
34 | cp -r $old_lang/* $new_lang
35 | 
36 | unk=`cat $old_lang/oov.int`
37 | bos=`grep "^<s>\s" $old_lang/words.txt | awk '{print $2}'`
38 | eos=`grep "^</s>\s" $old_lang/words.txt | awk '{print $2}'`
39 | if [[ -z $bos || -z $eos ]]; then
40 |   echo "$0: <s> and </s> symbols are not in $old_lang/words.txt"
41 |   exit 1
42 | fi
43 | if [[ -z $unk ]]; then
44 |   echo "$0: can't find oov symbol id in $old_lang/oov.int"
45 |   exit 1
46 | fi
47 | 
48 | 
49 | arpa-to-const-arpa --bos-symbol=$bos \
50 |   --eos-symbol=$eos --unk-symbol=$unk \
51 |   "gunzip -c $arpa_lm | utils/map_arpa_lm.pl $new_lang/words.txt|"  $new_lang/G.carpa  || exit 1;
52 | 
53 | exit 0;
54 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/build_kenlm_model_from_arpa.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # 2020 author Jiayu DU
 3 | # Apache 2.0
 4 | 
 5 | # This script reads in an Arpa format language model, and converts it into the
 6 | # KenLM format language model.
 7 | 
 8 | [ -f path.sh ] && . ./path.sh;
 9 | 
10 | # begin configuration section
11 | kenlm_opts="" # e.g. "-q 8 -b 8" for 8bits quantization
12 | model_type="trie" # "trie" or "probing". trie is smaller, probing is faster.
13 | # end configuration section
14 | 
15 | . utils/parse_options.sh
16 | 
17 | if [ $# != 2 ]; then
18 |   echo "Usage: "
19 |   echo "  $0 [options] <arpa-lm-path> <kenlm-path>"
20 |   echo "e.g.:"
21 |   echo "  $0 data/local/lm/4gram.arpa data/lang_test/G.trie"
22 |   echo "Options:"
23 |   echo "  --model-type can be either \"trie\" or \"probing\""
24 |   echo "  --kenlm-opts directly pass through to kenlm"
25 |   echo "    e.g. for 8bits quantization, feed \"-q 8 -b 8\""
26 |   exit 1;
27 | fi
28 | 
29 | export LC_ALL=C
30 | 
31 | arpa_lm=$1
32 | kenlm=$2
33 | 
34 | if ! which build_binary >& /dev/null ; then
35 |   echo "$0: cannot find KenLM's build_binary tool,"
36 |   echo "check kenlm installation (tools/extras/install_kenlm_query_only.sh)."
37 |   exit 1
38 | fi
39 | 
40 | mkdir -p $(dirname $kenlm)
41 | build_binary  $kenlm_opts  $model_type  $arpa_lm  $kenlm
42 | 
43 | echo "$0: Successfully built arpa into kenlm format: $kenlm"
44 | exit 0
45 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/ctm/fix_ctm.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | stmfile=$1
 4 | ctmfile=$2
 5 | 
 6 | segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u`
 7 | segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u`
 8 | 
 9 | segments_stm_count=`echo "$segments_stm" | wc -l `
10 | segments_ctm_count=`echo "$segments_ctm" | wc -l `
11 | 
12 | #echo $segments_stm_count
13 | #echo $segments_ctm_count
14 | 
15 | if [ "$segments_stm_count" -gt "$segments_ctm_count"  ] ; then
16 |   pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g")
17 |   (
18 |     for elem in $pp ; do
19 |       echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE"
20 |     done
21 |   ) >> $ctmfile
22 |   echo "FIXED CTM FILE"
23 |   exit 0
24 | elif [ "$segments_stm_count" -lt "$segments_ctm_count"  ] ; then
25 |   echo "Segment STM count: $segments_stm_count"
26 |   echo "Segment CTM count: $segments_ctm_count"
27 |   echo "FAILURE FIXING CTM FILE"
28 |   exit 1
29 | else
30 |   exit 0
31 | fi
32 | 
33 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/data/get_num_frames.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script works out the approximate number of frames in a training directory.
 4 | # This is sometimes needed by higher-level scripts
 5 | 
 6 | 
 7 | if [ -f path.sh ]; then . ./path.sh; fi
 8 | . parse_options.sh || exit 1;
 9 | 
10 | if [ $# -ne 1 ]; then
11 |   (
12 |     echo "Usage: $0 <data-dir>"
13 |     echo "Prints the number of frames of data in the data-dir"
14 |   ) 1>&2
15 | fi
16 | 
17 | data=$1
18 | 
19 | if [ ! -f $data/utt2dur ]; then
20 |   utils/data/get_utt2dur.sh $data 1>&2 || exit 1
21 | fi
22 | 
23 | frame_shift=$(utils/data/get_frame_shift.sh $data) || exit 1
24 | 
25 | awk -v s=$frame_shift '{n += $2} END{printf("%.0f\n", (n / s))}' <$data/utt2dur
26 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/data/get_reco2utt_for_data.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Copyright 2016  Vimal Manohar
 4 | # Apache 2.0
 5 | 
 6 | if [ $# -ne 1 ]; then
 7 |   echo "This script outputs a mapping from recording to a list of utterances "
 8 |   echo "corresponding to the recording. It is analogous to the content of "
 9 |   echo "a spk2utt file, but is indexed by recording instead of speaker."
10 |   echo "Usage: get_reco2utt.sh <data>"
11 |   echo " e.g.: get_reco2utt.sh data/train"
12 |   exit 1
13 | fi
14 | 
15 | data=$1
16 | 
17 | if [ ! -s $data/segments ]; then
18 |   utils/data/get_segments_for_data.sh $data > $data/segments
19 | fi
20 | 
21 | cut -d ' ' -f 1,2 $data/segments | utils/utt2spk_to_spk2utt.pl
22 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/data/get_segments_for_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script operates on a data directory, such as in data/train/,
 4 | # and writes new segments to stdout. The file 'segments' maps from
 5 | # utterance to time offsets into a recording, with the format:
 6 | #   <utterance-id> <recording-id> <segment-begin> <segment-end>
 7 | # This script assumes utterance and recording ids are the same (i.e., that
 8 | # wav.scp is indexed by utterance), and uses durations from 'utt2dur', 
 9 | # created if necessary by get_utt2dur.sh.
10 | 
11 | . ./path.sh
12 | 
13 | if [ $# != 1 ]; then
14 |   echo "Usage: $0 [options] <datadir>"
15 |   echo "e.g.:"
16 |   echo " $0 data/train > data/train/segments"
17 |   exit 1
18 | fi
19 | 
20 | data=$1
21 | 
22 | if [ ! -s $data/utt2dur ]; then
23 |   utils/data/get_utt2dur.sh $data 1>&2 || exit 1;
24 | fi
25 | 
26 | # <utt-id> <utt-id> 0 <utt-dur>
27 | awk '{ print $1, $1, 0, $2 }' $data/utt2dur
28 | 
29 | exit 0
30 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/data/get_utt2num_frames.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Copyright 2016  Vimal Manohar
 4 | # Apache 2.0.
 5 | 
 6 | cmd=run.pl
 7 | nj=4
 8 | 
 9 | frame_shift=0.01
10 | frame_overlap=0.015
11 | 
12 | . utils/parse_options.sh
13 | . ./path.sh
14 | 
15 | if [ $# -ne 1 ]; then
16 |   echo "This script writes a file utt2num_frames with the "
17 |   echo "number of frames in each utterance as measured based on the "
18 |   echo "duration of the utterances (in utt2dur) and the specified "
19 |   echo "frame_shift and frame_overlap."
20 |   echo "Usage: $0 <data>"
21 |   exit 1
22 | fi
23 | 
24 | data=$1
25 | 
26 | if [ -s $data/utt2num_frames ]; then
27 |   echo "$0: $data/utt2num_frames already present!"
28 |   exit 0;
29 | fi
30 | 
31 | if [ ! -f $data/feats.scp ]; then
32 |   utils/data/get_utt2dur.sh --nj ${nj} --cmd "$cmd" $data
33 |   awk -v fs=$frame_shift -v fovlp=$frame_overlap \
34 |     '{print $1" "int( ($2 - fovlp) / fs)}' $data/utt2dur > $data/utt2num_frames
35 |   exit 0
36 | fi
37 | 
38 | utils/split_data.sh --per-utt $data $nj || exit 1
39 | $cmd JOB=1:$nj $data/log/get_utt2num_frames.JOB.log \
40 |   feat-to-len scp:$data/split${nj}utt/JOB/feats.scp ark,t:$data/split${nj}utt/JOB/utt2num_frames || exit 1
41 | 
42 | for n in `seq $nj`; do
43 |   cat $data/split${nj}utt/$n/utt2num_frames
44 | done > $data/utt2num_frames
45 | 
46 | echo "$0: Computed and wrote $data/utt2num_frames"
47 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/data/resample_data_dir.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Copyright 2016  Vimal Manohar
 4 | #           2018  Xiaohui Zhang
 5 | # Apache 2.0.
 6 | 
 7 | if [ $# -ne 2 ]; then
 8 |   echo "This script adds a sox line in wav.scp to resample the audio at a "
 9 |   echo "different sampling-rate"
10 |   echo "Usage: $0 <frequency> <data-dir>"
11 |   echo " e.g.: $0 8000 data/dev"
12 |   exit 1
13 | fi
14 | 
15 | freq=$1
16 | dir=$2
17 | 
18 | sox=`which sox` || { echo "Could not find sox in PATH"; exit 1; }
19 | 
20 | if [ -f $dir/feats.scp ]; then
21 |   mkdir -p $dir/.backup
22 |   mv $dir/feats.scp $dir/.backup/
23 |   if [ -f $dir/cmvn.scp ]; then
24 |     mv $dir/cmvn.scp $dir/.backup/
25 |   fi
26 |   echo "$0: feats.scp already exists. Moving it to $dir/.backup"
27 | fi
28 | 
29 | # After resampling we cannot compute utt2dur from wav.scp any more,
30 | # so we create utt2dur now, in case it's needed later
31 | if [ ! -s $dir/utt2dur ]; then
32 |   utils/data/get_utt2dur.sh $dir 1>&2 || exit 1;
33 | fi
34 | 
35 | mv $dir/wav.scp $dir/wav.scp.tmp
36 | cat $dir/wav.scp.tmp | python -c "import sys
37 | for line in sys.stdin.readlines():
38 |   splits = line.strip().split()
39 |   if splits[-1] == '|':
40 |     out_line = line.strip() + ' $sox -t wav - -c 1 -b 16 -t wav - rate $freq |'
41 |   else:
42 |     out_line = '{0} cat {1} | $sox -t wav - -c 1 -b 16 -t wav - rate $freq |'.format(splits[0], ' '.join(splits[1:]))
43 |   print (out_line)" > ${dir}/wav.scp
44 | rm $dir/wav.scp.tmp
45 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/eps2disambig.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | #                2015 Guoguo Chen
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # This script replaces epsilon with #0 on the input side only, of the G.fst
19 | # acceptor.  
20 | 
21 | while(<>){
22 |   if (/\s+#0\s+/) {
23 |     print STDERR "$0: ERROR: LM has word #0, " .
24 |                  "which is reserved as disambiguation symbol\n";
25 |     exit 1;
26 |   }
27 |   s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
28 |   print;
29 | }
30 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/filt.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Apache 2.0
 4 | 
 5 | from __future__ import print_function
 6 | import sys
 7 | 
 8 | vocab=set()
 9 | with open(sys.argv[1]) as vocabfile:
10 |     for line in vocabfile:
11 |         vocab.add(line.strip())
12 | 
13 | with open(sys.argv[2]) as textfile:
14 |     for line in textfile:
15 |         print(" ".join([word if word in vocab else '<UNK>' for word in line.strip().split()]))
16 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/fix_ctm.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | stmfile=$1
 4 | ctmfile=$2
 5 | 
 6 | segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u`
 7 | segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u`
 8 | 
 9 | segments_stm_count=`echo "$segments_stm" | wc -l `
10 | segments_ctm_count=`echo "$segments_ctm" | wc -l `
11 | 
12 | #echo $segments_stm_count
13 | #echo $segments_ctm_count
14 | 
15 | if [ "$segments_stm_count" -gt "$segments_ctm_count"  ] ; then
16 |   pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g")
17 |   (
18 |     for elem in $pp ; do
19 |       echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE"
20 |     done
21 |   ) >> $ctmfile
22 |   echo "FIXED CTM FILE"
23 |   exit 0
24 | elif [ "$segments_stm_count" -lt "$segments_ctm_count"  ] ; then
25 |   echo "Segment STM count: $segments_stm_count"
26 |   echo "Segment CTM count: $segments_ctm_count"
27 |   echo "FAILURE FIXING CTM FILE"
28 |   exit 1
29 | else
30 |   exit 0
31 | fi
32 | 
33 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/lang/bpe/prepend_words.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This script, prepend '|' to every words in the transcript to mark
 4 | # the beginning of the words for finding the initial-space of every word
 5 | # after decoding.
 6 | 
 7 | import sys
 8 | import io
 9 | import re
10 | 
11 | whitespace = re.compile("[ \t]+")
12 | infile = io.TextIOWrapper(sys.stdin.buffer, encoding='latin-1')
13 | output = io.TextIOWrapper(sys.stdout.buffer, encoding='latin-1')
14 | for line in infile:
15 |     words = whitespace.split(line.strip(" \t\r\n"))
16 |     output.write(' '.join([ "|"+word for word in words]) + '\n')
17 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/lang/bpe/reverse.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # This script, reverse all latin and digits sequences
 5 | # (including words like MP3) to put them in the right order in the images.
 6 | 
 7 | import re, os, sys, io
 8 | 
 9 | in_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
10 | out_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
11 | for line in in_stream:
12 |     out_stream.write(re.sub(r'[a-zA-Z0-9][a-zA-Z0-9\s\.\:]*[a-zA-Z0-9]',
13 |                             lambda m:m.group(0)[::-1], line))
14 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/make_absolute.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script replaces the command readlink -f (which is not portable).
 4 | # It turns a pathname into an absolute pathname, including following soft links.
 5 | target_file=$1
 6 | 
 7 | cd $(dirname $target_file)
 8 | target_file=$(basename "$target_file")
 9 | 
10 | # Iterate down a (possible) chain of symlinks
11 | while [ -L "$target_file" ]; do
12 |     target_file=$(readlink $target_file)
13 |     cd $(dirname $target_file)
14 |     target_file=$(basename $target_file)
15 | done
16 | 
17 | # Compute the canonicalized name by finding the physical path 
18 | # for the directory we're in and appending the target file.
19 | phys_dir=$(pwd -P)
20 | result=$phys_dir/$target_file
21 | echo $result
22 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/require_argument.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # To be sourced by another script
 4 | 
 5 | set -euo pipefail
 6 | 
 7 | if [ $# -ne 1 ]; then
 8 |     echo "Usage: $0 <argument>" >&2
 9 |     echo " e.g.: $0 --data-dir" >&2
10 | fi
11 | 
12 | key=$1
13 | 
14 | name=$(sed -e s/^--// -e s/-/_/g <<< "$key")
15 | 
16 | if eval '[ -z "$'$name'" ]'; then
17 |     echo "$0: option $key is required" >&2
18 |     echo >&2
19 |     echo "$help_message" >&2
20 |     exit 1
21 | fi
22 | 
23 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/require_argument_all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # To be sourced by another script
4 | 
5 | for i in $@; do
6 |     . utils/require_argument.sh $i
7 | done
8 | 
9 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/s2eps.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script replaces <s> and </s> with <eps> (on both input and output sides),
18 | # for the G.fst acceptor.
19 | 
20 | while(<>){
21 |     @A = split(" ", $_);
22 |     if ( @A >= 4 ) {
23 |         if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
24 |         if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
25 |     }
26 |     print join("\t", @A) . "\n";
27 | }
28 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/shuffle_list.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | if ($ARGV[0] eq "--srand") {
20 |   $n = $ARGV[1];
21 |   $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\"";
22 |   srand($ARGV[1]);
23 |   shift;
24 |   shift;
25 | } else {
26 |   srand(0); # Gives inconsistent behavior if we don't seed.
27 | }
28 | 
29 | if (@ARGV > 1 || $ARGV[0] =~ m/^-.+/) { # >1 args, or an option we 
30 |   # don't understand.
31 |   print "Usage: shuffle_list.pl [--srand N] [input file]  > output\n";
32 |   print "randomizes the order of lines of input.\n";
33 |   exit(1);
34 | }
35 | 
36 | @lines;
37 | while (<>) {
38 |   push @lines, [ (rand(), $_)] ;
39 | }
40 | 
41 | @lines = sort { $a->[0] cmp $b->[0] } @lines;
42 | foreach $l (@lines) {
43 |     print $l->[1];
44 | }
45 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/spk2utt_to_utt2spk.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | while(<>){ 
19 |     @A = split(" ", $_);
20 |     @A > 1 || die "Invalid line in spk2utt file: $_";
21 |     $s = shift @A;
22 |     foreach $u ( @A ) {
23 |         print "$u $s\n";
24 |     }
25 | }
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/summarize_warnings.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | 
 5 |  @ARGV != 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;
 6 | 
 7 | $dir = $ARGV[0];
 8 | 
 9 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" && exit 1;
10 | 
11 | $dir =~ s:/$::; # Remove trailing slash.
12 | 
13 | 
14 | # Group the files into categories where all have the same base-name.
15 | foreach $f (glob ("$dir/*.log")) {
16 |   $f_category = $f;
17 |   # do next expression twice; s///g doesn't work as they overlap.
18 |   $f_category =~ s:\.\d+\.:.*.:;
19 |   $f_category =~ s:\.\d+\.:.*.:;
20 |   $fmap{$f_category} .= " $f";
21 | }
22 | 
23 | sub split_hundreds { # split list of filenames into groups of 100.
24 |   my $names = shift @_;
25 |   my @A = split(" ", $names);
26 |   my @ans = ();
27 |   while (@A > 0) {
28 |     my $group = "";
29 |     for ($x = 0; $x < 100 && @A>0; $x++) {
30 |       $fname = pop @A;
31 |       $group .= "$fname ";
32 |     }
33 |     push @ans, $group;
34 |   }
35 |   return @ans;
36 | }
37 | 
38 | foreach $c (keys %fmap) {
39 |   $n = 0;
40 |   foreach $fgroup (split_hundreds($fmap{$c})) {
41 |     $n += `grep -w WARNING $fgroup | wc -l`;
42 |   }
43 |   if ($n != 0) {
44 |     print "$n warnings in $c\n"
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/MLLM/tools/kaldi/utils/utt2spk_to_spk2utt.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # converts an utt2spk file to a spk2utt file.
18 | # Takes input from the stdin or from a file argument;
19 | # output goes to the standard out.
20 | 
21 | if ( @ARGV > 1 ) {
22 |     die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
23 | }
24 | 
25 | while(<>){ 
26 |     @A = split(" ", $_);
27 |     @A == 2 || die "Invalid line in utt2spk file: $_";
28 |     ($u,$s) = @A;
29 |     if(!$seen_spk{$s}) {
30 |         $seen_spk{$s} = 1;
31 |         push @spklist, $s;
32 |     }
33 |     push (@{$spk_hash{$s}}, "$u");
34 | }
35 | foreach $s (@spklist) {
36 |     $l = join(' ',@{$spk_hash{$s}});
37 |     print "$s $l\n";
38 | }
39 | 


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/__pycache__/mimi_tokenizer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/__pycache__/mimi_tokenizer.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/__pycache__/mimi_tokenizer.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/__pycache__/mimi_tokenizer.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/mimi_config.yaml:
--------------------------------------------------------------------------------
1 | generator:
2 |   name: MimiCodec
3 |   config:
4 |     encoder_rates: [8, 6, 5, 4]
5 |     codebook_size: 2048
6 |     codebook_dim: 256
7 |     rvq_layers: 8


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/models/__init__.py


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/models/__pycache__/MimiCodec.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/models/__pycache__/MimiCodec.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/models/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/models/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/modules/__init__.py


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/conv.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/conv.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/gating.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/gating.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/resample.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/resample.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/rope.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/rope.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/seanet.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/seanet.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/streaming.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/streaming.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/transformer.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/modules/__pycache__/transformer.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/quantization/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Kyutai, all rights reserved.
 2 | # This source code is licensed under the license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 6 | # All rights reserved.
 7 | #
 8 | # This source code is licensed under the license found in the
 9 | # LICENSE file in the root directory of this source tree.
10 | """RVQ."""
11 | # flake8: noqa
12 | from .vq import ResidualVectorQuantizer, SplitResidualVectorQuantizer
13 | from .base import BaseQuantizer, DummyQuantizer, QuantizedResult
14 | 


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/quantization/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/quantization/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/quantization/__pycache__/base.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/quantization/__pycache__/base.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/quantization/__pycache__/core_vq.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/quantization/__pycache__/core_vq.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/quantization/__pycache__/vq.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/quantization/__pycache__/vq.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/utils/__init__.py


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/utils/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/utils/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/MimiCodec/model/utils/__pycache__/compile.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/MimiCodec/model/utils/__pycache__/compile.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/Text2ID/__pycache__/moshi_text_tokenizer.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/Text2ID/__pycache__/moshi_text_tokenizer.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/Text2ID/__pycache__/text_tokenizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/Text2ID/__pycache__/text_tokenizer.cpython-38.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/__pycache__/abs_tokenizer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/__pycache__/abs_tokenizer.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/__pycache__/abs_tokenizer.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/__pycache__/abs_tokenizer.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/__pycache__/abs_tokenizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/__pycache__/abs_tokenizer.cpython-38.pyc


--------------------------------------------------------------------------------
/MLLM/tools/tokenizer/__pycache__/common.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/tools/tokenizer/__pycache__/common.cpython-38.pyc


--------------------------------------------------------------------------------
/MLLM/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Kyutai, all rights reserved.
 2 | # This source code is licensed under the license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 6 | # All rights reserved.
 7 | #
 8 | # This source code is licensed under the license found in the
 9 | # LICENSE file in the root directory of this source tree.
10 | """Utilities."""
11 | 


--------------------------------------------------------------------------------
/MLLM/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM/utils/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/utils/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/utils/__pycache__/abs_scheduler.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/utils/__pycache__/abs_scheduler.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/utils/__pycache__/arguments.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/utils/__pycache__/arguments.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/utils/__pycache__/compile.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/utils/__pycache__/compile.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/utils/__pycache__/dataloader.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/utils/__pycache__/dataloader.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM/utils/__pycache__/dataloader.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/utils/__pycache__/dataloader.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/utils/__pycache__/reporter.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/utils/__pycache__/reporter.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/utils/__pycache__/sampling.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/utils/__pycache__/sampling.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/utils/__pycache__/task_definition.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/utils/__pycache__/task_definition.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM/utils/__pycache__/train_utils.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM/utils/__pycache__/train_utils.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/egs/extract_tokens/get_wav.py:
--------------------------------------------------------------------------------
 1 | import os 
 2 | import glob
 3 | 
 4 | names = glob.glob("/home-dongchao/data/source/*.wav")
 5 | f = open('/home-dongchao/code3/RSTnet_private/MLLM/egs/extract_tokens/wav.scp', 'w')
 6 | for name in names:
 7 |     bs_name = os.path.basename(name)
 8 |     f.write(bs_name+' '+name+'\n')
 9 | 
10 | 
11 |     


--------------------------------------------------------------------------------
/MLLM_v2/egs/pretraining/data_scripts/emilia/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "language": {
 3 |         "multilingual": true,
 4 |         "supported": [
 5 |             "zh",
 6 |             "en",
 7 |             "fr",
 8 |             "ja",
 9 |             "ko",
10 |             "de"
11 |         ]
12 |     },
13 |     "entrypoint": {
14 |         "input_folder_path": "/mnt/users/hccl.local/jkzhao/projects/RSTnet/MLLM_v2/egs/pretraining/data_scripts/emilia/testbench",
15 |         "SAMPLE_RATE": 24000
16 |     },
17 |     "separate": {
18 |         "step1": {
19 |             "model_path": "/mnt/users/hccl.local/jkzhao/projects/RSTnet/MLLM_v2/egs/pretraining/data_scripts/emilia/ckpts/UVR-MDX-NET-Inst_HQ_3.onnx",
20 |             "denoise": true,
21 |             "margin": 44100,
22 |             "chunks": 15,
23 |             "n_fft": 6144,
24 |             "dim_t": 8,
25 |             "dim_f": 3072
26 |         }
27 |     },
28 |     "mos_model": {
29 |         "primary_model_path": "/mnt/users/hccl.local/jkzhao/projects/RSTnet/MLLM_v2/egs/pretraining/data_scripts/emilia/ckpts/sig_bak_ovr.onnx"
30 |     },
31 |     "huggingface_token": "<HUGGINGFACE_ACCESS_TOKEN>"
32 | }


--------------------------------------------------------------------------------
/MLLM_v2/egs/pretraining/data_scripts/emilia/env.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2024 Amphion.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | conda install ffmpeg -y
 8 | conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia -y
 9 | conda install cudnn=9
10 | pip install -r requirements.txt
11 | pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
12 | 


--------------------------------------------------------------------------------
/MLLM_v2/egs/pretraining/data_scripts/emilia/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/egs/pretraining/data_scripts/emilia/models/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/egs/pretraining/data_scripts/emilia/requirements.txt:
--------------------------------------------------------------------------------
1 | librosa
2 | numpy
3 | tqdm
4 | pydub
5 | pyannote.audio
6 | pandas
7 | git+https://github.com/m-bain/whisperx.git # needs torch >= 2.0.0
8 | 


--------------------------------------------------------------------------------
/MLLM_v2/egs/pretraining/data_scripts/emilia/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/egs/pretraining/data_scripts/emilia/utils/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/egs/pretraining/data_scripts/filter_scp.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | ref_f = sys.argv[1]
 4 | in_f = sys.argv[2]
 5 | try:
 6 |     writer = open(sys.argv[3], 'w', encoding='utf-8')
 7 |     stream_out = False
 8 | except:
 9 |     stream_out = True 
10 | 
11 | # output is in the order of ref_f
12 | ref = []
13 | for line in open(ref_f, encoding='utf-8'):
14 |     uttid = line.strip().split()[0]
15 |     ref.append(uttid)
16 | 
17 | in_dic = {}
18 | for line in open(in_f, encoding='utf-8'):
19 |     elems = line.strip().split()
20 |     uttid = elems[0]
21 |     ctx = " ".join(elems[1:])
22 |     in_dic[uttid] = ctx
23 | 
24 | for e in ref:
25 |     if e in in_dic:
26 |         if stream_out:
27 |             print(f"{e} {in_dic[e]}")
28 |         else:
29 |             writer.write(f"{e} {in_dic[e]}\n")
30 | 


--------------------------------------------------------------------------------
/MLLM_v2/egs/pretraining/infer.sh:
--------------------------------------------------------------------------------
 1 | # inference
 2 | . ./path.sh
 3 | ngpu=1
 4 | inference_dir='/home-dongchao/exp/MLLM/exp/infer'
 5 | part='simple_infer'
 6 | 
 7 | 
 8 | python3 ../../infer_no_streaming.py \
 9 |     --exp_dir /home-dongchao/exp/MLLM/exp/exp/audiollm_v2_llama3B_11_25_tts \
10 |     --resume /home-dongchao/exp/MLLM/exp/exp/audiollm_v2_llama3B_11_25_tts/ep1-iter125000.checkpoint \
11 |     --inference_mode 'sampling' \
12 |     --rank 0 \
13 |     --output_dir /home-dongchao/code3/RSTnet_private/MLLM2_11_24/egs/pretraining/tts_only_11_25 \
14 |     --data_json /home-dongchao/exp/MLLM/tasks/audio/libritts/test/8splits/data_tts.0.json \
15 |     --generate_target 'audio' \
16 |     --task_name 'TTS'
17 | 


--------------------------------------------------------------------------------
/MLLM_v2/egs/pretraining/path.sh:
--------------------------------------------------------------------------------
 1 | export LC_ALL=C
 2 | export PYTHONIOENCODING=UTF-8
 3 | export OMP_NUM_THREADS=1
 4 | 
 5 | # executable bins
 6 | export PATH=$PATH:utils:../../tools/data_scripts/
 7 | 
 8 | # python import root
 9 | export PYTHONPATH=${PYTHONPATH}:../../
10 | 


--------------------------------------------------------------------------------
/MLLM_v2/egs/pretraining/readme.md:
--------------------------------------------------------------------------------
 1 | ## Main idea
 2 | 
 3 | 1. preprocess the dataset
 4 | 
 5 |     First, prepare environment following [Emilia](https://github.com/open-mmlab/Amphion/tree/main/preprocessors/Emilia). The Emilia codes are at `exripts/emilia`.
 6 | 
 7 |     Then, modify paths in `prepare_broadcast_data.sh` and run. The dataloader will yield a token sequence with the following [B, 9, t_text+t_audio+2] shape:
 8 | ```
 9 |     <|begin_of_text|>[<spk_id1>] <text> ··· [<spk_id1>] <text><|text_emply_token|>···
10 |                                                               <semantic_tokens>···<|semantic_emply_token|>
11 |                                                               <|semantic_emply_token|><acoustic_tokens>···
12 | ```
13 | 
14 | 2. Pre-training
15 | 
16 | 3. Post-training
17 | 
18 | 4. inference
19 | 
20 | 


--------------------------------------------------------------------------------
/MLLM_v2/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/config.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/config.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/config.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/config.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/config.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/config.cpython-39.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/lit_model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/lit_model.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/lit_model.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/lit_model.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/lit_model.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/lit_model.cpython-39.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/llama_streaming.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/llama_streaming.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/llama_streaming.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/llama_streaming.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/llama_streaming.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/llama_streaming.cpython-39.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/llama_streaming_lora.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/llama_streaming_lora.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/mlp.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/mlp.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/mlp.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/mlp.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/mlp.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/mlp.cpython-39.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/__pycache__/model.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/models/__pycache__/model.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/models/mlp.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from dataclasses import dataclass
 3 | from typing import Any, Dict, List, Optional, Tuple, Type, Union
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from torch.nn import functional as F
 8 | from typing_extensions import Self
 9 | from models import lit_model
10 | from models.config import Config as BaseConfig
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/MLLM_v2/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Kyutai, all rights reserved.
 2 | # This source code is licensed under the license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 6 | # All rights reserved.
 7 | #
 8 | # This source code is licensed under the license found in the
 9 | # LICENSE file in the root directory of this source tree.
10 | """Modules used for building the models."""
11 | 
12 | # flake8: noqa
13 | from modules.conv import (
14 |     NormConv1d,
15 |     NormConvTranspose1d,
16 |     StreamingConv1d,
17 |     StreamingConvTranspose1d,
18 |     pad_for_conv1d,
19 |     pad1d,
20 |     unpad1d,
21 | )
22 | from modules.seanet import SEANetEncoder, SEANetDecoder
23 | from modules.transformer import StreamingTransformer
24 | 


--------------------------------------------------------------------------------
/MLLM_v2/modules/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/modules/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/modules/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/modules/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/MLLM_v2/modules/__pycache__/conv.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/modules/__pycache__/conv.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/modules/__pycache__/conv.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/modules/__pycache__/conv.cpython-38.pyc


--------------------------------------------------------------------------------
/MLLM_v2/modules/__pycache__/gating.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/modules/__pycache__/gating.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/modules/__pycache__/resample.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/modules/__pycache__/resample.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/modules/__pycache__/rope.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/modules/__pycache__/rope.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/modules/__pycache__/seanet.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/modules/__pycache__/seanet.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/modules/__pycache__/streaming.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/modules/__pycache__/streaming.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/modules/__pycache__/streaming.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/modules/__pycache__/streaming.cpython-38.pyc


--------------------------------------------------------------------------------
/MLLM_v2/modules/__pycache__/transformer.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/modules/__pycache__/transformer.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/modules/__pycache__/transformer_lora.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/modules/__pycache__/transformer_lora.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Kyutai, all rights reserved.
 2 | # This source code is licensed under the license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | """
 6 | moshi is the inference codebase for Kyutai audio generation models.
 7 | 
 8 | The code has been adapted from Audiocraft, see LICENSE.audiocraft
 9 |   Copyright (c) Meta Platforms, Inc. and affiliates.
10 | """
11 | 
12 | # flake8: noqa
13 | from . import utils
14 | from . import modules
15 | from . import models
16 | from . import quantization
17 | 
18 | __version__ = "0.1.0"
19 | 


--------------------------------------------------------------------------------
/MLLM_v2/moshi/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Kyutai, all rights reserved.
 2 | # This source code is licensed under the license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | """
 5 | Models for the compression model Moshi,
 6 | """
 7 | 
 8 | # flake8: noqa
 9 | from moshi.models.compression import (
10 |     CompressionModel,
11 |     MimiModel,
12 | )
13 | from moshi.models.lm import LMModel, LMGen
14 | from moshi.models.loaders import get_mimi, get_moshi_lm
15 | 


--------------------------------------------------------------------------------
/MLLM_v2/moshi/models/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/models/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/models/__pycache__/compression.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/models/__pycache__/compression.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/models/__pycache__/lm.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/models/__pycache__/lm.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/models/__pycache__/loaders.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/models/__pycache__/loaders.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Kyutai, all rights reserved.
 2 | # This source code is licensed under the license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 6 | # All rights reserved.
 7 | #
 8 | # This source code is licensed under the license found in the
 9 | # LICENSE file in the root directory of this source tree.
10 | """Modules used for building the models."""
11 | 
12 | # flake8: noqa
13 | from moshi.modules.conv import (
14 |     NormConv1d,
15 |     NormConvTranspose1d,
16 |     StreamingConv1d,
17 |     StreamingConvTranspose1d,
18 |     pad_for_conv1d,
19 |     pad1d,
20 |     unpad1d,
21 | )
22 | from moshi.modules.seanet import SEANetEncoder, SEANetDecoder
23 | from moshi.modules.transformer import StreamingTransformer
24 | 


--------------------------------------------------------------------------------
/MLLM_v2/moshi/modules/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/modules/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/modules/__pycache__/conv.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/modules/__pycache__/conv.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/modules/__pycache__/gating.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/modules/__pycache__/gating.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/modules/__pycache__/resample.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/modules/__pycache__/resample.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/modules/__pycache__/rope.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/modules/__pycache__/rope.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/modules/__pycache__/seanet.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/modules/__pycache__/seanet.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/modules/__pycache__/streaming.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/modules/__pycache__/streaming.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/modules/__pycache__/transformer.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/modules/__pycache__/transformer.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/quantization/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Kyutai, all rights reserved.
 2 | # This source code is licensed under the license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 6 | # All rights reserved.
 7 | #
 8 | # This source code is licensed under the license found in the
 9 | # LICENSE file in the root directory of this source tree.
10 | """RVQ."""
11 | # flake8: noqa
12 | from .vq import ResidualVectorQuantizer, SplitResidualVectorQuantizer
13 | from .base import BaseQuantizer, DummyQuantizer, QuantizedResult
14 | 


--------------------------------------------------------------------------------
/MLLM_v2/moshi/quantization/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/quantization/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/quantization/__pycache__/base.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/quantization/__pycache__/base.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/quantization/__pycache__/core_vq.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/quantization/__pycache__/core_vq.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/quantization/__pycache__/vq.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/quantization/__pycache__/vq.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Kyutai, all rights reserved.
 2 | # This source code is licensed under the license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 6 | # All rights reserved.
 7 | #
 8 | # This source code is licensed under the license found in the
 9 | # LICENSE file in the root directory of this source tree.
10 | """Utilities."""
11 | 


--------------------------------------------------------------------------------
/MLLM_v2/moshi/utils/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/utils/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/utils/__pycache__/compile.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/utils/__pycache__/compile.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/moshi/utils/__pycache__/sampling.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/moshi/utils/__pycache__/sampling.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/readme.md:
--------------------------------------------------------------------------------
 1 | ## Multi-modal LLM (speech-text foundation models)
 2 | In this part, we provide the training details of speech-text foundation models. 
 3 | We provide a moshi-style pre-training code.
 4 | 
 5 | ## How to start it?
 6 | 
 7 | ### Step 0: refer to litgpt https://github.com/Lightning-AI/litgpt/ to download the desired LLM checkpoints
 8 | 
 9 | ### Step 1: refer to egs/pretraining, and check the extract_token.sh for data preprocessing
10 | 
11 | ### Step 2: refer to egs/pretraining, and check the run.sh for model pre-training
12 | 
13 | ### Step 3: refer to egs/pretraining, and check the infer.sh for inference
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/data_scripts/filter_scp.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | ref_f = sys.argv[1]
 4 | in_f = sys.argv[2]
 5 | try:
 6 |     writer = open(sys.argv[3], 'w', encoding='utf-8')
 7 |     stream_out = False
 8 | except:
 9 |     stream_out = True 
10 | 
11 | # output is in the order of ref_f
12 | ref = []
13 | for line in open(ref_f, encoding='utf-8'):
14 |     uttid = line.strip().split()[0]
15 |     ref.append(uttid)
16 | 
17 | in_dic = {}
18 | for line in open(in_f, encoding='utf-8'):
19 |     elems = line.strip().split()
20 |     uttid = elems[0]
21 |     ctx = " ".join(elems[1:])
22 |     in_dic[uttid] = ctx
23 | 
24 | for e in ref:
25 |     if e in in_dic:
26 |         if stream_out:
27 |             print(f"{e} {in_dic[e]}")
28 |         else:
29 |             writer.write(f"{e} {in_dic[e]}\n")
30 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/data_scripts/select_spk2utt.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import argparse
 3 | 
 4 | def get_parser():
 5 |     parser = argparse.ArgumentParser(
 6 |         description="Revise the spk2utt file: it only contans a subset of the utts",
 7 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
 8 |     )
 9 |     parser.add_argument("--in-spk2utt", type=str, help="original spk2utt file")
10 |     parser.add_argument("--out-spk2utt", type=str, help="revised spk2utt file")
11 |     parser.add_argument("--subset-list", type=str, help="list of utt subset")
12 |     return parser
13 | 
14 | def main(args):
15 |     args = get_parser().parse_args(args)
16 | 
17 |     utts = open(args.subset_list).readlines()
18 |     utts = [line.strip().split()[0] for line in utts]
19 |     utts = {x: None for x in utts}
20 | 
21 |     writer = open(args.out_spk2utt, 'w') 
22 |     for line in open(args.in_spk2utt):
23 |         line = line.strip().split()
24 |         spk_id, spk_utts = line[0], line[1:]
25 |         spk_utts = [utt for utt in spk_utts if utt in utts]
26 | 
27 |         out_str = " ".join([spk_id] + spk_utts)
28 |         writer.write(out_str + "\n")
29 | 
30 | if __name__ == "__main__":
31 |     main(sys.argv[1:])
32 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/build_const_arpa_lm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2014  Guoguo Chen
 4 | # Apache 2.0
 5 | 
 6 | # This script reads in an Arpa format language model, and converts it into the
 7 | # ConstArpaLm format language model.
 8 | 
 9 | # begin configuration section
10 | # end configuration section
11 | 
12 | [ -f path.sh ] && . ./path.sh;
13 | 
14 | . utils/parse_options.sh
15 | 
16 | if [ $# != 3 ]; then
17 |   echo "Usage: "
18 |   echo "  $0 [options] <arpa-lm-path> <old-lang-dir> <new-lang-dir>"
19 |   echo "e.g.:"
20 |   echo "  $0 data/local/lm/3-gram.full.arpa.gz data/lang/ data/lang_test_tgmed"
21 |   echo "Options"
22 |   exit 1;
23 | fi
24 | 
25 | export LC_ALL=C
26 | 
27 | arpa_lm=$1
28 | old_lang=$2
29 | new_lang=$3
30 | 
31 | mkdir -p $new_lang
32 | 
33 | mkdir -p $new_lang
34 | cp -r $old_lang/* $new_lang
35 | 
36 | unk=`cat $old_lang/oov.int`
37 | bos=`grep "^<s>\s" $old_lang/words.txt | awk '{print $2}'`
38 | eos=`grep "^</s>\s" $old_lang/words.txt | awk '{print $2}'`
39 | if [[ -z $bos || -z $eos ]]; then
40 |   echo "$0: <s> and </s> symbols are not in $old_lang/words.txt"
41 |   exit 1
42 | fi
43 | if [[ -z $unk ]]; then
44 |   echo "$0: can't find oov symbol id in $old_lang/oov.int"
45 |   exit 1
46 | fi
47 | 
48 | 
49 | arpa-to-const-arpa --bos-symbol=$bos \
50 |   --eos-symbol=$eos --unk-symbol=$unk \
51 |   "gunzip -c $arpa_lm | utils/map_arpa_lm.pl $new_lang/words.txt|"  $new_lang/G.carpa  || exit 1;
52 | 
53 | exit 0;
54 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/build_kenlm_model_from_arpa.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # 2020 author Jiayu DU
 3 | # Apache 2.0
 4 | 
 5 | # This script reads in an Arpa format language model, and converts it into the
 6 | # KenLM format language model.
 7 | 
 8 | [ -f path.sh ] && . ./path.sh;
 9 | 
10 | # begin configuration section
11 | kenlm_opts="" # e.g. "-q 8 -b 8" for 8bits quantization
12 | model_type="trie" # "trie" or "probing". trie is smaller, probing is faster.
13 | # end configuration section
14 | 
15 | . utils/parse_options.sh
16 | 
17 | if [ $# != 2 ]; then
18 |   echo "Usage: "
19 |   echo "  $0 [options] <arpa-lm-path> <kenlm-path>"
20 |   echo "e.g.:"
21 |   echo "  $0 data/local/lm/4gram.arpa data/lang_test/G.trie"
22 |   echo "Options:"
23 |   echo "  --model-type can be either \"trie\" or \"probing\""
24 |   echo "  --kenlm-opts directly pass through to kenlm"
25 |   echo "    e.g. for 8bits quantization, feed \"-q 8 -b 8\""
26 |   exit 1;
27 | fi
28 | 
29 | export LC_ALL=C
30 | 
31 | arpa_lm=$1
32 | kenlm=$2
33 | 
34 | if ! which build_binary >& /dev/null ; then
35 |   echo "$0: cannot find KenLM's build_binary tool,"
36 |   echo "check kenlm installation (tools/extras/install_kenlm_query_only.sh)."
37 |   exit 1
38 | fi
39 | 
40 | mkdir -p $(dirname $kenlm)
41 | build_binary  $kenlm_opts  $model_type  $arpa_lm  $kenlm
42 | 
43 | echo "$0: Successfully built arpa into kenlm format: $kenlm"
44 | exit 0
45 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/ctm/fix_ctm.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | stmfile=$1
 4 | ctmfile=$2
 5 | 
 6 | segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u`
 7 | segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u`
 8 | 
 9 | segments_stm_count=`echo "$segments_stm" | wc -l `
10 | segments_ctm_count=`echo "$segments_ctm" | wc -l `
11 | 
12 | #echo $segments_stm_count
13 | #echo $segments_ctm_count
14 | 
15 | if [ "$segments_stm_count" -gt "$segments_ctm_count"  ] ; then
16 |   pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g")
17 |   (
18 |     for elem in $pp ; do
19 |       echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE"
20 |     done
21 |   ) >> $ctmfile
22 |   echo "FIXED CTM FILE"
23 |   exit 0
24 | elif [ "$segments_stm_count" -lt "$segments_ctm_count"  ] ; then
25 |   echo "Segment STM count: $segments_stm_count"
26 |   echo "Segment CTM count: $segments_ctm_count"
27 |   echo "FAILURE FIXING CTM FILE"
28 |   exit 1
29 | else
30 |   exit 0
31 | fi
32 | 
33 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/data/get_num_frames.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script works out the approximate number of frames in a training directory.
 4 | # This is sometimes needed by higher-level scripts
 5 | 
 6 | 
 7 | if [ -f path.sh ]; then . ./path.sh; fi
 8 | . parse_options.sh || exit 1;
 9 | 
10 | if [ $# -ne 1 ]; then
11 |   (
12 |     echo "Usage: $0 <data-dir>"
13 |     echo "Prints the number of frames of data in the data-dir"
14 |   ) 1>&2
15 | fi
16 | 
17 | data=$1
18 | 
19 | if [ ! -f $data/utt2dur ]; then
20 |   utils/data/get_utt2dur.sh $data 1>&2 || exit 1
21 | fi
22 | 
23 | frame_shift=$(utils/data/get_frame_shift.sh $data) || exit 1
24 | 
25 | awk -v s=$frame_shift '{n += $2} END{printf("%.0f\n", (n / s))}' <$data/utt2dur
26 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/data/get_reco2utt_for_data.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Copyright 2016  Vimal Manohar
 4 | # Apache 2.0
 5 | 
 6 | if [ $# -ne 1 ]; then
 7 |   echo "This script outputs a mapping from recording to a list of utterances "
 8 |   echo "corresponding to the recording. It is analogous to the content of "
 9 |   echo "a spk2utt file, but is indexed by recording instead of speaker."
10 |   echo "Usage: get_reco2utt.sh <data>"
11 |   echo " e.g.: get_reco2utt.sh data/train"
12 |   exit 1
13 | fi
14 | 
15 | data=$1
16 | 
17 | if [ ! -s $data/segments ]; then
18 |   utils/data/get_segments_for_data.sh $data > $data/segments
19 | fi
20 | 
21 | cut -d ' ' -f 1,2 $data/segments | utils/utt2spk_to_spk2utt.pl
22 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/data/get_segments_for_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script operates on a data directory, such as in data/train/,
 4 | # and writes new segments to stdout. The file 'segments' maps from
 5 | # utterance to time offsets into a recording, with the format:
 6 | #   <utterance-id> <recording-id> <segment-begin> <segment-end>
 7 | # This script assumes utterance and recording ids are the same (i.e., that
 8 | # wav.scp is indexed by utterance), and uses durations from 'utt2dur', 
 9 | # created if necessary by get_utt2dur.sh.
10 | 
11 | . ./path.sh
12 | 
13 | if [ $# != 1 ]; then
14 |   echo "Usage: $0 [options] <datadir>"
15 |   echo "e.g.:"
16 |   echo " $0 data/train > data/train/segments"
17 |   exit 1
18 | fi
19 | 
20 | data=$1
21 | 
22 | if [ ! -s $data/utt2dur ]; then
23 |   utils/data/get_utt2dur.sh $data 1>&2 || exit 1;
24 | fi
25 | 
26 | # <utt-id> <utt-id> 0 <utt-dur>
27 | awk '{ print $1, $1, 0, $2 }' $data/utt2dur
28 | 
29 | exit 0
30 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/data/get_utt2num_frames.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Copyright 2016  Vimal Manohar
 4 | # Apache 2.0.
 5 | 
 6 | cmd=run.pl
 7 | nj=4
 8 | 
 9 | frame_shift=0.01
10 | frame_overlap=0.015
11 | 
12 | . utils/parse_options.sh
13 | . ./path.sh
14 | 
15 | if [ $# -ne 1 ]; then
16 |   echo "This script writes a file utt2num_frames with the "
17 |   echo "number of frames in each utterance as measured based on the "
18 |   echo "duration of the utterances (in utt2dur) and the specified "
19 |   echo "frame_shift and frame_overlap."
20 |   echo "Usage: $0 <data>"
21 |   exit 1
22 | fi
23 | 
24 | data=$1
25 | 
26 | if [ -s $data/utt2num_frames ]; then
27 |   echo "$0: $data/utt2num_frames already present!"
28 |   exit 0;
29 | fi
30 | 
31 | if [ ! -f $data/feats.scp ]; then
32 |   utils/data/get_utt2dur.sh --nj ${nj} --cmd "$cmd" $data
33 |   awk -v fs=$frame_shift -v fovlp=$frame_overlap \
34 |     '{print $1" "int( ($2 - fovlp) / fs)}' $data/utt2dur > $data/utt2num_frames
35 |   exit 0
36 | fi
37 | 
38 | utils/split_data.sh --per-utt $data $nj || exit 1
39 | $cmd JOB=1:$nj $data/log/get_utt2num_frames.JOB.log \
40 |   feat-to-len scp:$data/split${nj}utt/JOB/feats.scp ark,t:$data/split${nj}utt/JOB/utt2num_frames || exit 1
41 | 
42 | for n in `seq $nj`; do
43 |   cat $data/split${nj}utt/$n/utt2num_frames
44 | done > $data/utt2num_frames
45 | 
46 | echo "$0: Computed and wrote $data/utt2num_frames"
47 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/data/resample_data_dir.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Copyright 2016  Vimal Manohar
 4 | #           2018  Xiaohui Zhang
 5 | # Apache 2.0.
 6 | 
 7 | if [ $# -ne 2 ]; then
 8 |   echo "This script adds a sox line in wav.scp to resample the audio at a "
 9 |   echo "different sampling-rate"
10 |   echo "Usage: $0 <frequency> <data-dir>"
11 |   echo " e.g.: $0 8000 data/dev"
12 |   exit 1
13 | fi
14 | 
15 | freq=$1
16 | dir=$2
17 | 
18 | sox=`which sox` || { echo "Could not find sox in PATH"; exit 1; }
19 | 
20 | if [ -f $dir/feats.scp ]; then
21 |   mkdir -p $dir/.backup
22 |   mv $dir/feats.scp $dir/.backup/
23 |   if [ -f $dir/cmvn.scp ]; then
24 |     mv $dir/cmvn.scp $dir/.backup/
25 |   fi
26 |   echo "$0: feats.scp already exists. Moving it to $dir/.backup"
27 | fi
28 | 
29 | # After resampling we cannot compute utt2dur from wav.scp any more,
30 | # so we create utt2dur now, in case it's needed later
31 | if [ ! -s $dir/utt2dur ]; then
32 |   utils/data/get_utt2dur.sh $dir 1>&2 || exit 1;
33 | fi
34 | 
35 | mv $dir/wav.scp $dir/wav.scp.tmp
36 | cat $dir/wav.scp.tmp | python -c "import sys
37 | for line in sys.stdin.readlines():
38 |   splits = line.strip().split()
39 |   if splits[-1] == '|':
40 |     out_line = line.strip() + ' $sox -t wav - -c 1 -b 16 -t wav - rate $freq |'
41 |   else:
42 |     out_line = '{0} cat {1} | $sox -t wav - -c 1 -b 16 -t wav - rate $freq |'.format(splits[0], ' '.join(splits[1:]))
43 |   print (out_line)" > ${dir}/wav.scp
44 | rm $dir/wav.scp.tmp
45 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/eps2disambig.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | #                2015 Guoguo Chen
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # This script replaces epsilon with #0 on the input side only, of the G.fst
19 | # acceptor.  
20 | 
21 | while(<>){
22 |   if (/\s+#0\s+/) {
23 |     print STDERR "$0: ERROR: LM has word #0, " .
24 |                  "which is reserved as disambiguation symbol\n";
25 |     exit 1;
26 |   }
27 |   s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
28 |   print;
29 | }
30 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/filt.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Apache 2.0
 4 | 
 5 | from __future__ import print_function
 6 | import sys
 7 | 
 8 | vocab=set()
 9 | with open(sys.argv[1]) as vocabfile:
10 |     for line in vocabfile:
11 |         vocab.add(line.strip())
12 | 
13 | with open(sys.argv[2]) as textfile:
14 |     for line in textfile:
15 |         print(" ".join([word if word in vocab else '<UNK>' for word in line.strip().split()]))
16 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/fix_ctm.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | stmfile=$1
 4 | ctmfile=$2
 5 | 
 6 | segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u`
 7 | segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u`
 8 | 
 9 | segments_stm_count=`echo "$segments_stm" | wc -l `
10 | segments_ctm_count=`echo "$segments_ctm" | wc -l `
11 | 
12 | #echo $segments_stm_count
13 | #echo $segments_ctm_count
14 | 
15 | if [ "$segments_stm_count" -gt "$segments_ctm_count"  ] ; then
16 |   pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g")
17 |   (
18 |     for elem in $pp ; do
19 |       echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE"
20 |     done
21 |   ) >> $ctmfile
22 |   echo "FIXED CTM FILE"
23 |   exit 0
24 | elif [ "$segments_stm_count" -lt "$segments_ctm_count"  ] ; then
25 |   echo "Segment STM count: $segments_stm_count"
26 |   echo "Segment CTM count: $segments_ctm_count"
27 |   echo "FAILURE FIXING CTM FILE"
28 |   exit 1
29 | else
30 |   exit 0
31 | fi
32 | 
33 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/lang/bpe/prepend_words.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This script, prepend '|' to every words in the transcript to mark
 4 | # the beginning of the words for finding the initial-space of every word
 5 | # after decoding.
 6 | 
 7 | import sys
 8 | import io
 9 | import re
10 | 
11 | whitespace = re.compile("[ \t]+")
12 | infile = io.TextIOWrapper(sys.stdin.buffer, encoding='latin-1')
13 | output = io.TextIOWrapper(sys.stdout.buffer, encoding='latin-1')
14 | for line in infile:
15 |     words = whitespace.split(line.strip(" \t\r\n"))
16 |     output.write(' '.join([ "|"+word for word in words]) + '\n')
17 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/lang/bpe/reverse.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # This script, reverse all latin and digits sequences
 5 | # (including words like MP3) to put them in the right order in the images.
 6 | 
 7 | import re, os, sys, io
 8 | 
 9 | in_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
10 | out_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
11 | for line in in_stream:
12 |     out_stream.write(re.sub(r'[a-zA-Z0-9][a-zA-Z0-9\s\.\:]*[a-zA-Z0-9]',
13 |                             lambda m:m.group(0)[::-1], line))
14 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/make_absolute.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script replaces the command readlink -f (which is not portable).
 4 | # It turns a pathname into an absolute pathname, including following soft links.
 5 | target_file=$1
 6 | 
 7 | cd $(dirname $target_file)
 8 | target_file=$(basename "$target_file")
 9 | 
10 | # Iterate down a (possible) chain of symlinks
11 | while [ -L "$target_file" ]; do
12 |     target_file=$(readlink $target_file)
13 |     cd $(dirname $target_file)
14 |     target_file=$(basename $target_file)
15 | done
16 | 
17 | # Compute the canonicalized name by finding the physical path 
18 | # for the directory we're in and appending the target file.
19 | phys_dir=$(pwd -P)
20 | result=$phys_dir/$target_file
21 | echo $result
22 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/require_argument.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # To be sourced by another script
 4 | 
 5 | set -euo pipefail
 6 | 
 7 | if [ $# -ne 1 ]; then
 8 |     echo "Usage: $0 <argument>" >&2
 9 |     echo " e.g.: $0 --data-dir" >&2
10 | fi
11 | 
12 | key=$1
13 | 
14 | name=$(sed -e s/^--// -e s/-/_/g <<< "$key")
15 | 
16 | if eval '[ -z "$'$name'" ]'; then
17 |     echo "$0: option $key is required" >&2
18 |     echo >&2
19 |     echo "$help_message" >&2
20 |     exit 1
21 | fi
22 | 
23 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/require_argument_all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # To be sourced by another script
4 | 
5 | for i in $@; do
6 |     . utils/require_argument.sh $i
7 | done
8 | 
9 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/s2eps.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script replaces <s> and </s> with <eps> (on both input and output sides),
18 | # for the G.fst acceptor.
19 | 
20 | while(<>){
21 |     @A = split(" ", $_);
22 |     if ( @A >= 4 ) {
23 |         if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
24 |         if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
25 |     }
26 |     print join("\t", @A) . "\n";
27 | }
28 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/shuffle_list.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | if ($ARGV[0] eq "--srand") {
20 |   $n = $ARGV[1];
21 |   $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\"";
22 |   srand($ARGV[1]);
23 |   shift;
24 |   shift;
25 | } else {
26 |   srand(0); # Gives inconsistent behavior if we don't seed.
27 | }
28 | 
29 | if (@ARGV > 1 || $ARGV[0] =~ m/^-.+/) { # >1 args, or an option we 
30 |   # don't understand.
31 |   print "Usage: shuffle_list.pl [--srand N] [input file]  > output\n";
32 |   print "randomizes the order of lines of input.\n";
33 |   exit(1);
34 | }
35 | 
36 | @lines;
37 | while (<>) {
38 |   push @lines, [ (rand(), $_)] ;
39 | }
40 | 
41 | @lines = sort { $a->[0] cmp $b->[0] } @lines;
42 | foreach $l (@lines) {
43 |     print $l->[1];
44 | }
45 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/spk2utt_to_utt2spk.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | while(<>){ 
19 |     @A = split(" ", $_);
20 |     @A > 1 || die "Invalid line in spk2utt file: $_";
21 |     $s = shift @A;
22 |     foreach $u ( @A ) {
23 |         print "$u $s\n";
24 |     }
25 | }
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/summarize_warnings.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | 
 5 |  @ARGV != 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;
 6 | 
 7 | $dir = $ARGV[0];
 8 | 
 9 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" && exit 1;
10 | 
11 | $dir =~ s:/$::; # Remove trailing slash.
12 | 
13 | 
14 | # Group the files into categories where all have the same base-name.
15 | foreach $f (glob ("$dir/*.log")) {
16 |   $f_category = $f;
17 |   # do next expression twice; s///g doesn't work as they overlap.
18 |   $f_category =~ s:\.\d+\.:.*.:;
19 |   $f_category =~ s:\.\d+\.:.*.:;
20 |   $fmap{$f_category} .= " $f";
21 | }
22 | 
23 | sub split_hundreds { # split list of filenames into groups of 100.
24 |   my $names = shift @_;
25 |   my @A = split(" ", $names);
26 |   my @ans = ();
27 |   while (@A > 0) {
28 |     my $group = "";
29 |     for ($x = 0; $x < 100 && @A>0; $x++) {
30 |       $fname = pop @A;
31 |       $group .= "$fname ";
32 |     }
33 |     push @ans, $group;
34 |   }
35 |   return @ans;
36 | }
37 | 
38 | foreach $c (keys %fmap) {
39 |   $n = 0;
40 |   foreach $fgroup (split_hundreds($fmap{$c})) {
41 |     $n += `grep -w WARNING $fgroup | wc -l`;
42 |   }
43 |   if ($n != 0) {
44 |     print "$n warnings in $c\n"
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/kaldi/utils/utt2spk_to_spk2utt.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # converts an utt2spk file to a spk2utt file.
18 | # Takes input from the stdin or from a file argument;
19 | # output goes to the standard out.
20 | 
21 | if ( @ARGV > 1 ) {
22 |     die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
23 | }
24 | 
25 | while(<>){ 
26 |     @A = split(" ", $_);
27 |     @A == 2 || die "Invalid line in utt2spk file: $_";
28 |     ($u,$s) = @A;
29 |     if(!$seen_spk{$s}) {
30 |         $seen_spk{$s} = 1;
31 |         push @spklist, $s;
32 |     }
33 |     push (@{$spk_hash{$s}}, "$u");
34 | }
35 | foreach $s (@spklist) {
36 |     $l = join(' ',@{$spk_hash{$s}});
37 |     print "$s $l\n";
38 | }
39 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/cli/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/dataset/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/decoder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/decoder.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/decoder.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/decoder.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/flow.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/flow.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/flow.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/flow.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/flow_matching.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/flow_matching.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/flow_matching.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/flow_matching.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/length_regulator.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/length_regulator.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/length_regulator.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/flow/__pycache__/length_regulator.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/hifigan/__pycache__/f0_predictor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/hifigan/__pycache__/f0_predictor.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/hifigan/__pycache__/f0_predictor.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/hifigan/__pycache__/f0_predictor.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/hifigan/__pycache__/generator.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/hifigan/__pycache__/generator.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/hifigan/__pycache__/generator.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/hifigan/__pycache__/generator.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/llm/__pycache__/llm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/llm/__pycache__/llm.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/llm/__pycache__/llm.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/llm/__pycache__/llm.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/activation.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/activation.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/activation.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/activation.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/attention.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/attention.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/attention.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/attention.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/convolution.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/convolution.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/convolution.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/convolution.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/embedding.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/embedding.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/embedding.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/embedding.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/encoder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/encoder.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/encoder.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/encoder.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/encoder_layer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/encoder_layer.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/encoder_layer.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/encoder_layer.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/label_smoothing_loss.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/label_smoothing_loss.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/label_smoothing_loss.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/label_smoothing_loss.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/positionwise_feed_forward.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/positionwise_feed_forward.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/positionwise_feed_forward.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/positionwise_feed_forward.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/subsampling.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/subsampling.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/subsampling.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/transformer/__pycache__/subsampling.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/__init__.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/__init__.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/block_mask_util.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/block_mask_util.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/block_mask_util.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/block_mask_util.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/class_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/class_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/class_utils.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/class_utils.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/common.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/common.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/common.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/common.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/mask.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/mask.cpython-310.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/mask.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/__pycache__/mask.cpython-312.pyc


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/cosyvoice/utils/block_mask_util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def create_grid_mask(seq_length, trunck_length, fill_triangle):
 5 |     assert seq_length > 0
 6 | 
 7 |     # 先不考虑seen_length创建一个grid mask：
 8 |     if fill_triangle:
 9 |         mask = 1 - torch.triu(torch.ones(seq_length, seq_length), diagonal=1)
10 |         # 下三角与主对角线都为1
11 |     else:
12 |         mask = torch.zeros(seq_length, seq_length)
13 | 
14 |     for i in range(seq_length):
15 |         trunck_idx = i // trunck_length
16 |         trunck_start = trunck_idx * trunck_length
17 |         trunck_end = trunck_length + trunck_start
18 |         mask[i][trunck_start:trunck_end] = 1
19 | 
20 |     return mask
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     mask = create_grid_mask(seq_length=8, trunck_length=3, fill_triangle=True).int()
25 |     print(mask)
26 | # tensor([[1, 1, 1, 0, 0, 0, 0, 0],
27 | #         [1, 1, 1, 0, 0, 0, 0, 0],
28 | #         [1, 1, 1, 0, 0, 0, 0, 0],
29 | #         [1, 1, 1, 1, 1, 1, 0, 0],
30 | #         [1, 1, 1, 1, 1, 1, 0, 0],
31 | #         [1, 1, 1, 1, 1, 1, 0, 0],
32 | #         [1, 1, 1, 1, 1, 1, 1, 1],
33 | #         [1, 1, 1, 1, 1, 1, 1, 1]]
34 | 
35 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/.env.example:
--------------------------------------------------------------------------------
1 | # example of file for storing private and user specific environment variables, like keys or system paths
2 | # rename it to ".env" (excluded from version control by default)
3 | # .env is loaded by train.py automatically
4 | # hydra allows you to reference variables in .yaml configs with special syntax: ${oc.env:MY_VAR}
5 | 
6 | MY_VAR="/home/user/my/system/path"
7 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## What does this PR do?
 2 | 
 3 | <!--
 4 | Please include a summary of the change and which issue is fixed.
 5 | Please also include relevant motivation and context.
 6 | List any dependencies that are required for this change.
 7 | List all the breaking changes introduced by this pull request.
 8 | -->
 9 | 
10 | Fixes #\<issue_number>
11 | 
12 | ## Before submitting
13 | 
14 | - [ ] Did you make sure **title is self-explanatory** and **the description concisely explains the PR**?
15 | - [ ] Did you make sure your **PR does only one thing**, instead of bundling different changes together?
16 | - [ ] Did you list all the **breaking changes** introduced by this pull request?
17 | - [ ] Did you **test your PR locally** with `pytest` command?
18 | - [ ] Did you **run pre-commit hooks** with `pre-commit run -a` command?
19 | 
20 | ## Did you have fun?
21 | 
22 | Make sure you had fun coding 🙃
23 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/.github/codecov.yml:
--------------------------------------------------------------------------------
 1 | coverage:
 2 |   status:
 3 |     # measures overall project coverage
 4 |     project:
 5 |       default:
 6 |         threshold: 100% # how much decrease in coverage is needed to not consider success
 7 | 
 8 |     # measures PR or single commit coverage
 9 |     patch:
10 |       default:
11 |         threshold: 100% # how much decrease in coverage is needed to not consider success
12 | 
13 | 
14 |     # project: off
15 |     # patch: off
16 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     target-branch: "dev"
11 |     schedule:
12 |       interval: "daily"
13 |     ignore:
14 |       - dependency-name: "pytorch-lightning"
15 |         update-types: ["version-update:semver-patch"]
16 |       - dependency-name: "torchmetrics"
17 |         update-types: ["version-update:semver-patch"]
18 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/.github/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name-template: "v$RESOLVED_VERSION"
 2 | tag-template: "v$RESOLVED_VERSION"
 3 | 
 4 | categories:
 5 |   - title: "🚀 Features"
 6 |     labels:
 7 |       - "feature"
 8 |       - "enhancement"
 9 |   - title: "🐛 Bug Fixes"
10 |     labels:
11 |       - "fix"
12 |       - "bugfix"
13 |       - "bug"
14 |   - title: "🧹 Maintenance"
15 |     labels:
16 |       - "maintenance"
17 |       - "dependencies"
18 |       - "refactoring"
19 |       - "cosmetic"
20 |       - "chore"
21 |   - title: "📝️ Documentation"
22 |     labels:
23 |       - "documentation"
24 |       - "docs"
25 | 
26 | change-template: "- $TITLE @$AUTHOR (#$NUMBER)"
27 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions
28 | 
29 | version-resolver:
30 |   major:
31 |     labels:
32 |       - "major"
33 |   minor:
34 |     labels:
35 |       - "minor"
36 |   patch:
37 |     labels:
38 |       - "patch"
39 |   default: patch
40 | 
41 | template: |
42 |   ## Changes
43 | 
44 |   $CHANGES
45 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/.project-root:
--------------------------------------------------------------------------------
1 | # this file is required for inferring the project root directory
2 | # do not delete
3 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Shivam Mehta
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.md
 2 | include LICENSE.txt
 3 | include requirements.*.txt
 4 | include *.cff
 5 | include requirements.txt
 6 | include matcha/VERSION
 7 | recursive-include matcha *.json
 8 | recursive-include matcha *.html
 9 | recursive-include matcha *.png
10 | recursive-include matcha *.md
11 | recursive-include matcha *.py
12 | recursive-include matcha *.pyx
13 | recursive-exclude tests *
14 | prune tests*
15 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | help:  ## Show help
 3 | 	@grep -E '^[.a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
 4 | 
 5 | clean: ## Clean autogenerated files
 6 | 	rm -rf dist
 7 | 	find . -type f -name "*.DS_Store" -ls -delete
 8 | 	find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf
 9 | 	find . | grep -E ".pytest_cache" | xargs rm -rf
10 | 	find . | grep -E ".ipynb_checkpoints" | xargs rm -rf
11 | 	rm -f .coverage
12 | 
13 | clean-logs: ## Clean logs
14 | 	rm -rf logs/**
15 | 
16 | create-package: ## Create wheel and tar gz
17 | 	rm -rf dist/
18 | 	python setup.py bdist_wheel --plat-name=manylinux1_x86_64
19 | 	python setup.py sdist
20 | 	python -m twine upload  dist/* --verbose --skip-existing
21 | 
22 | format: ## Run pre-commit hooks
23 | 	pre-commit run -a
24 | 
25 | sync: ## Merge changes from main branch to your current branch
26 | 	git pull
27 | 	git pull origin main
28 | 
29 | test: ## Run not slow tests
30 | 	pytest -k "not slow"
31 | 
32 | test-full: ## Run all tests
33 | 	pytest
34 | 
35 | train-ljspeech: ## Train the model
36 | 	python matcha/train.py experiment=ljspeech
37 | 
38 | train-ljspeech-min: ## Train the model with minimum memory
39 | 	python matcha/train.py experiment=ljspeech_min_memory
40 | 
41 | start_app: ## Start the app
42 | 	python matcha/app.py
43 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/__init__.py:
--------------------------------------------------------------------------------
1 | # this file is needed here to include configs when building project as a package
2 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/callbacks/default.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - model_checkpoint.yaml
3 |   - model_summary.yaml
4 |   - rich_progress_bar.yaml
5 |   - _self_
6 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/callbacks/model_checkpoint.yaml:
--------------------------------------------------------------------------------
 1 | # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html
 2 | 
 3 | model_checkpoint:
 4 |   _target_: lightning.pytorch.callbacks.ModelCheckpoint
 5 |   dirpath: ${paths.output_dir}/checkpoints # directory to save the model file
 6 |   filename: checkpoint_{epoch:03d}  # checkpoint filename
 7 |   monitor: epoch # name of the logged metric which determines when model is improving
 8 |   verbose: False # verbosity mode
 9 |   save_last: true # additionally always save an exact copy of the last checkpoint to a file last.ckpt
10 |   save_top_k: 10 # save k best models (determined by above metric)
11 |   mode: "max" # "max" means higher metric value is better, can be also "min"
12 |   auto_insert_metric_name: True # when True, the checkpoints filenames will contain the metric name
13 |   save_weights_only: False # if True, then only the model’s weights will be saved
14 |   every_n_train_steps: null # number of training steps between checkpoints
15 |   train_time_interval: null # checkpoints are monitored at the specified time interval
16 |   every_n_epochs: 100 # number of epochs between checkpoints
17 |   save_on_train_epoch_end: null # whether to run checkpointing at the end of the training epoch or the end of validation
18 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/callbacks/model_summary.yaml:
--------------------------------------------------------------------------------
1 | # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html
2 | 
3 | model_summary:
4 |   _target_: lightning.pytorch.callbacks.RichModelSummary
5 |   max_depth: 3 # the maximum depth of layer nesting that the summary will include
6 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/callbacks/none.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/callbacks/none.yaml


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/callbacks/rich_progress_bar.yaml:
--------------------------------------------------------------------------------
1 | # https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.RichProgressBar.html
2 | 
3 | rich_progress_bar:
4 |   _target_: lightning.pytorch.callbacks.RichProgressBar
5 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/data/hi-fi_en-US_female.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - ljspeech
 3 |   - _self_
 4 | 
 5 | # Dataset URL: https://ast-astrec.nict.go.jp/en/release/hi-fi-captain/
 6 | _target_: matcha.data.text_mel_datamodule.TextMelDataModule
 7 | name: hi-fi_en-US_female
 8 | train_filelist_path: data/filelists/hi-fi-captain-en-us-female_train.txt
 9 | valid_filelist_path: data/filelists/hi-fi-captain-en-us-female_val.txt
10 | batch_size: 32
11 | cleaners: [english_cleaners_piper]
12 | data_statistics:  # Computed for this dataset
13 |   mel_mean: -6.38385
14 |   mel_std: 2.541796
15 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/data/ljspeech.yaml:
--------------------------------------------------------------------------------
 1 | _target_: matcha.data.text_mel_datamodule.TextMelDataModule
 2 | name: ljspeech
 3 | train_filelist_path: data/filelists/ljs_audio_text_train_filelist.txt
 4 | valid_filelist_path: data/filelists/ljs_audio_text_val_filelist.txt
 5 | batch_size: 32
 6 | num_workers: 20
 7 | pin_memory: True
 8 | cleaners: [english_cleaners2]
 9 | add_blank: True
10 | n_spks: 1
11 | n_fft: 1024
12 | n_feats: 80
13 | sample_rate: 22050
14 | hop_length: 256
15 | win_length: 1024
16 | f_min: 0
17 | f_max: 8000
18 | data_statistics:  # Computed for ljspeech dataset
19 |   mel_mean: -5.536622
20 |   mel_std: 2.116101
21 | seed: ${seed}
22 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/data/vctk.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - ljspeech
 3 |   - _self_
 4 | 
 5 | _target_: matcha.data.text_mel_datamodule.TextMelDataModule
 6 | name: vctk
 7 | train_filelist_path: data/filelists/vctk_audio_sid_text_train_filelist.txt
 8 | valid_filelist_path: data/filelists/vctk_audio_sid_text_val_filelist.txt
 9 | batch_size: 32
10 | add_blank: True
11 | n_spks: 109
12 | data_statistics:  # Computed for vctk dataset
13 |   mel_mean: -6.630575
14 |   mel_std: 2.482914
15 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/debug/default.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # default debugging setup, runs 1 full epoch
 4 | # other debugging configs can inherit from this one
 5 | 
 6 | # overwrite task name so debugging logs are stored in separate folder
 7 | task_name: "debug"
 8 | 
 9 | # disable callbacks and loggers during debugging
10 | # callbacks: null
11 | # logger: null
12 | 
13 | extras:
14 |   ignore_warnings: False
15 |   enforce_tags: False
16 | 
17 | # sets level of all command line loggers to 'DEBUG'
18 | # https://hydra.cc/docs/tutorials/basic/running_your_app/logging/
19 | hydra:
20 |   job_logging:
21 |     root:
22 |       level: DEBUG
23 | 
24 |   # use this to also set hydra loggers to 'DEBUG'
25 |   # verbose: True
26 | 
27 | trainer:
28 |   max_epochs: 1
29 |   accelerator: cpu # debuggers don't like gpus
30 |   devices: 1 # debuggers don't like multiprocessing
31 |   detect_anomaly: true # raise exception if NaN or +/-inf is detected in any tensor
32 | 
33 | data:
34 |   num_workers: 0 # debuggers don't like multiprocessing
35 |   pin_memory: False # disable gpu memory pin
36 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/debug/fdr.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # runs 1 train, 1 validation and 1 test step
 4 | 
 5 | defaults:
 6 |   - default
 7 | 
 8 | trainer:
 9 |   fast_dev_run: true
10 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/debug/limit.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # uses only 1% of the training data and 5% of validation/test data
 4 | 
 5 | defaults:
 6 |   - default
 7 | 
 8 | trainer:
 9 |   max_epochs: 3
10 |   limit_train_batches: 0.01
11 |   limit_val_batches: 0.05
12 |   limit_test_batches: 0.05
13 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/debug/overfit.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # overfits to 3 batches
 4 | 
 5 | defaults:
 6 |   - default
 7 | 
 8 | trainer:
 9 |   max_epochs: 20
10 |   overfit_batches: 3
11 | 
12 | # model ckpt and early stopping need to be disabled during overfitting
13 | callbacks: null
14 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/debug/profiler.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # runs with execution time profiling
 4 | 
 5 | defaults:
 6 |   - default
 7 | 
 8 | trainer:
 9 |   max_epochs: 1
10 |   # profiler: "simple"
11 |   profiler: "advanced"
12 |   # profiler: "pytorch"
13 |   accelerator: gpu
14 | 
15 |   limit_train_batches: 0.02
16 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/eval.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | defaults:
 4 |   - _self_
 5 |   - data: mnist # choose datamodule with `test_dataloader()` for evaluation
 6 |   - model: mnist
 7 |   - logger: null
 8 |   - trainer: default
 9 |   - paths: default
10 |   - extras: default
11 |   - hydra: default
12 | 
13 | task_name: "eval"
14 | 
15 | tags: ["dev"]
16 | 
17 | # passing checkpoint path is necessary for evaluation
18 | ckpt_path: ???
19 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/experiment/hifi_dataset_piper_phonemizer.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # to execute this experiment run:
 4 | # python train.py experiment=multispeaker
 5 | 
 6 | defaults:
 7 |   - override /data: hi-fi_en-US_female.yaml
 8 | 
 9 | # all parameters below will be merged with parameters from default configurations set above
10 | # this allows you to overwrite only specified parameters
11 | 
12 | tags: ["hi-fi", "single_speaker", "piper_phonemizer", "en_US", "female"]
13 | 
14 | run_name: hi-fi_en-US_female_piper_phonemizer
15 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/experiment/ljspeech.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # to execute this experiment run:
 4 | # python train.py experiment=multispeaker
 5 | 
 6 | defaults:
 7 |   - override /data: ljspeech.yaml
 8 | 
 9 | # all parameters below will be merged with parameters from default configurations set above
10 | # this allows you to overwrite only specified parameters
11 | 
12 | tags: ["ljspeech"]
13 | 
14 | run_name: ljspeech
15 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/experiment/ljspeech_min_memory.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # to execute this experiment run:
 4 | # python train.py experiment=multispeaker
 5 | 
 6 | defaults:
 7 |   - override /data: ljspeech.yaml
 8 | 
 9 | # all parameters below will be merged with parameters from default configurations set above
10 | # this allows you to overwrite only specified parameters
11 | 
12 | tags: ["ljspeech"]
13 | 
14 | run_name: ljspeech_min
15 | 
16 | 
17 | model:
18 |   out_size: 172
19 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/experiment/multispeaker.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # to execute this experiment run:
 4 | # python train.py experiment=multispeaker
 5 | 
 6 | defaults:
 7 |   - override /data: vctk.yaml
 8 | 
 9 | # all parameters below will be merged with parameters from default configurations set above
10 | # this allows you to overwrite only specified parameters
11 | 
12 | tags: ["multispeaker"]
13 | 
14 | run_name: multispeaker
15 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/extras/default.yaml:
--------------------------------------------------------------------------------
1 | # disable python warnings if they annoy you
2 | ignore_warnings: False
3 | 
4 | # ask user for tags if none are provided in the config
5 | enforce_tags: True
6 | 
7 | # pretty print config tree at the start of the run using Rich library
8 | print_config: True
9 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/hydra/default.yaml:
--------------------------------------------------------------------------------
 1 | # https://hydra.cc/docs/configure_hydra/intro/
 2 | 
 3 | # enable color logging
 4 | defaults:
 5 |   - override hydra_logging: colorlog
 6 |   - override job_logging: colorlog
 7 | 
 8 | # output directory, generated dynamically on each run
 9 | run:
10 |   dir: ${paths.log_dir}/${task_name}/${run_name}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}
11 | sweep:
12 |   dir: ${paths.log_dir}/${task_name}/${run_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}
13 |   subdir: ${hydra.job.num}
14 | 
15 | job_logging:
16 |   handlers:
17 |     file:
18 |       # Incorporates fix from https://github.com/facebookresearch/hydra/pull/2242
19 |       filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
20 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/local/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/local/.gitkeep


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/logger/aim.yaml:
--------------------------------------------------------------------------------
 1 | # https://aimstack.io/
 2 | 
 3 | # example usage in lightning module:
 4 | # https://github.com/aimhubio/aim/blob/main/examples/pytorch_lightning_track.py
 5 | 
 6 | # open the Aim UI with the following command (run in the folder containing the `.aim` folder):
 7 | # `aim up`
 8 | 
 9 | aim:
10 |   _target_: aim.pytorch_lightning.AimLogger
11 |   repo: ${paths.root_dir} # .aim folder will be created here
12 |   # repo: "aim://ip_address:port" # can instead provide IP address pointing to Aim remote tracking server which manages the repo, see https://aimstack.readthedocs.io/en/latest/using/remote_tracking.html#
13 | 
14 |   # aim allows to group runs under experiment name
15 |   experiment: null # any string, set to "default" if not specified
16 | 
17 |   train_metric_prefix: "train/"
18 |   val_metric_prefix: "val/"
19 |   test_metric_prefix: "test/"
20 | 
21 |   # sets the tracking interval in seconds for system usage metrics (CPU, GPU, memory, etc.)
22 |   system_tracking_interval: 10 # set to null to disable system metrics tracking
23 | 
24 |   # enable/disable logging of system params such as installed packages, git info, env vars, etc.
25 |   log_system_params: true
26 | 
27 |   # enable/disable tracking console logs (default value is true)
28 |   capture_terminal_logs: false # set to false to avoid infinite console log loop issue https://github.com/aimhubio/aim/issues/2550
29 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/logger/comet.yaml:
--------------------------------------------------------------------------------
 1 | # https://www.comet.ml
 2 | 
 3 | comet:
 4 |   _target_: lightning.pytorch.loggers.comet.CometLogger
 5 |   api_key: ${oc.env:COMET_API_TOKEN} # api key is loaded from environment variable
 6 |   save_dir: "${paths.output_dir}"
 7 |   project_name: "lightning-hydra-template"
 8 |   rest_api_key: null
 9 |   # experiment_name: ""
10 |   experiment_key: null # set to resume experiment
11 |   offline: False
12 |   prefix: ""
13 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/logger/csv.yaml:
--------------------------------------------------------------------------------
1 | # csv logger built in lightning
2 | 
3 | csv:
4 |   _target_: lightning.pytorch.loggers.csv_logs.CSVLogger
5 |   save_dir: "${paths.output_dir}"
6 |   name: "csv/"
7 |   prefix: ""
8 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/logger/many_loggers.yaml:
--------------------------------------------------------------------------------
 1 | # train with many loggers at once
 2 | 
 3 | defaults:
 4 |   # - comet
 5 |   - csv
 6 |   # - mlflow
 7 |   # - neptune
 8 |   - tensorboard
 9 |   - wandb
10 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/logger/mlflow.yaml:
--------------------------------------------------------------------------------
 1 | # https://mlflow.org
 2 | 
 3 | mlflow:
 4 |   _target_: lightning.pytorch.loggers.mlflow.MLFlowLogger
 5 |   # experiment_name: ""
 6 |   # run_name: ""
 7 |   tracking_uri: ${paths.log_dir}/mlflow/mlruns # run `mlflow ui` command inside the `logs/mlflow/` dir to open the UI
 8 |   tags: null
 9 |   # save_dir: "./mlruns"
10 |   prefix: ""
11 |   artifact_location: null
12 |   # run_id: ""
13 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/logger/neptune.yaml:
--------------------------------------------------------------------------------
 1 | # https://neptune.ai
 2 | 
 3 | neptune:
 4 |   _target_: lightning.pytorch.loggers.neptune.NeptuneLogger
 5 |   api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is loaded from environment variable
 6 |   project: username/lightning-hydra-template
 7 |   # name: ""
 8 |   log_model_checkpoints: True
 9 |   prefix: ""
10 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/logger/tensorboard.yaml:
--------------------------------------------------------------------------------
 1 | # https://www.tensorflow.org/tensorboard/
 2 | 
 3 | tensorboard:
 4 |   _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
 5 |   save_dir: "${paths.output_dir}/tensorboard/"
 6 |   name: null
 7 |   log_graph: False
 8 |   default_hp_metric: True
 9 |   prefix: ""
10 |   # version: ""
11 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/logger/wandb.yaml:
--------------------------------------------------------------------------------
 1 | # https://wandb.ai
 2 | 
 3 | wandb:
 4 |   _target_: lightning.pytorch.loggers.wandb.WandbLogger
 5 |   # name: "" # name of the run (normally generated by wandb)
 6 |   save_dir: "${paths.output_dir}"
 7 |   offline: False
 8 |   id: null # pass correct id to resume experiment!
 9 |   anonymous: null # enable anonymous logging
10 |   project: "lightning-hydra-template"
11 |   log_model: False # upload lightning ckpts
12 |   prefix: "" # a string to put at the beginning of metric keys
13 |   # entity: "" # set to name of your wandb team
14 |   group: ""
15 |   tags: []
16 |   job_type: ""
17 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/model/cfm/default.yaml:
--------------------------------------------------------------------------------
1 | name: CFM
2 | solver: euler
3 | sigma_min: 1e-4
4 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/model/decoder/default.yaml:
--------------------------------------------------------------------------------
1 | channels: [256, 256]
2 | dropout: 0.05
3 | attention_head_dim: 64
4 | n_blocks: 1
5 | num_mid_blocks: 2
6 | num_heads: 2
7 | act_fn: snakebeta
8 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/model/encoder/default.yaml:
--------------------------------------------------------------------------------
 1 | encoder_type: RoPE Encoder
 2 | encoder_params:
 3 |   n_feats: ${model.n_feats}
 4 |   n_channels: 192
 5 |   filter_channels: 768
 6 |   filter_channels_dp: 256
 7 |   n_heads: 2
 8 |   n_layers: 6
 9 |   kernel_size: 3
10 |   p_dropout: 0.1
11 |   spk_emb_dim: 64
12 |   n_spks: 1
13 |   prenet: true
14 | 
15 | duration_predictor_params:
16 |   filter_channels_dp: ${model.encoder.encoder_params.filter_channels_dp}
17 |   kernel_size: 3
18 |   p_dropout: ${model.encoder.encoder_params.p_dropout}
19 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/model/matcha.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - encoder: default.yaml
 4 |   - decoder: default.yaml
 5 |   - cfm: default.yaml
 6 |   - optimizer: adam.yaml
 7 | 
 8 | _target_: matcha.models.matcha_tts.MatchaTTS
 9 | n_vocab: 178
10 | n_spks: ${data.n_spks}
11 | spk_emb_dim: 64
12 | n_feats: 80
13 | data_statistics: ${data.data_statistics}
14 | out_size: null # Must be divisible by 4
15 | prior_loss: true
16 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/model/optimizer/adam.yaml:
--------------------------------------------------------------------------------
1 | _target_: torch.optim.Adam
2 | _partial_: true
3 | lr: 1e-4
4 | weight_decay: 0.0
5 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/paths/default.yaml:
--------------------------------------------------------------------------------
 1 | # path to root directory
 2 | # this requires PROJECT_ROOT environment variable to exist
 3 | # you can replace it with "." if you want the root to be the current working directory
 4 | root_dir: ${oc.env:PROJECT_ROOT}
 5 | 
 6 | # path to data directory
 7 | data_dir: ${paths.root_dir}/data/
 8 | 
 9 | # path to logging directory
10 | log_dir: ${paths.root_dir}/logs/
11 | 
12 | # path to output directory, created dynamically by hydra
13 | # path generation pattern is specified in `configs/hydra/default.yaml`
14 | # use it to store all files generated during the run, like ckpts and metrics
15 | output_dir: ${hydra:runtime.output_dir}
16 | 
17 | # path to working directory
18 | work_dir: ${hydra:runtime.cwd}
19 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/trainer/cpu.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - default
3 | 
4 | accelerator: cpu
5 | devices: 1
6 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/trainer/ddp.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - default
 3 | 
 4 | strategy: ddp
 5 | 
 6 | accelerator: gpu
 7 | devices: [0,1]
 8 | num_nodes: 1
 9 | sync_batchnorm: True
10 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/trainer/ddp_sim.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - default
3 | 
4 | # simulate DDP on CPU, useful for debugging
5 | accelerator: cpu
6 | devices: 2
7 | strategy: ddp_spawn
8 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/trainer/default.yaml:
--------------------------------------------------------------------------------
 1 | _target_: lightning.pytorch.trainer.Trainer
 2 | 
 3 | default_root_dir: ${paths.output_dir}
 4 | 
 5 | max_epochs: -1
 6 | 
 7 | accelerator: gpu
 8 | devices: [0]
 9 | 
10 | # mixed precision for extra speed-up
11 | precision: 16-mixed
12 | 
13 | # perform a validation loop every N training epochs
14 | check_val_every_n_epoch: 1
15 | 
16 | # set True to to ensure deterministic results
17 | # makes training slower but gives more reproducibility than just setting seeds
18 | deterministic: False
19 | 
20 | gradient_clip_val: 5.0
21 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/trainer/gpu.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - default
3 | 
4 | accelerator: gpu
5 | devices: 1
6 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/configs/trainer/mps.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - default
3 | 
4 | accelerator: mps
5 | devices: 1
6 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/VERSION:
--------------------------------------------------------------------------------
1 | 0.0.5.1
2 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/data/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/data/components/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/data/components/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/hifigan/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Jungil Kong
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/hifigan/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/hifigan/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/hifigan/config.py:
--------------------------------------------------------------------------------
 1 | v1 = {
 2 |     "resblock": "1",
 3 |     "num_gpus": 0,
 4 |     "batch_size": 16,
 5 |     "learning_rate": 0.0004,
 6 |     "adam_b1": 0.8,
 7 |     "adam_b2": 0.99,
 8 |     "lr_decay": 0.999,
 9 |     "seed": 1234,
10 |     "upsample_rates": [8, 8, 2, 2],
11 |     "upsample_kernel_sizes": [16, 16, 4, 4],
12 |     "upsample_initial_channel": 512,
13 |     "resblock_kernel_sizes": [3, 7, 11],
14 |     "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
15 |     "resblock_initial_channel": 256,
16 |     "segment_size": 8192,
17 |     "num_mels": 80,
18 |     "num_freq": 1025,
19 |     "n_fft": 1024,
20 |     "hop_size": 256,
21 |     "win_size": 1024,
22 |     "sampling_rate": 22050,
23 |     "fmin": 0,
24 |     "fmax": 8000,
25 |     "fmax_loss": None,
26 |     "num_workers": 4,
27 |     "dist_config": {"dist_backend": "nccl", "dist_url": "tcp://localhost:54321", "world_size": 1},
28 | }
29 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/hifigan/env.py:
--------------------------------------------------------------------------------
 1 | """ from https://github.com/jik876/hifi-gan """
 2 | 
 3 | import os
 4 | import shutil
 5 | 
 6 | 
 7 | class AttrDict(dict):
 8 |     def __init__(self, *args, **kwargs):
 9 |         super().__init__(*args, **kwargs)
10 |         self.__dict__ = self
11 | 
12 | 
13 | def build_env(config, config_name, path):
14 |     t_path = os.path.join(path, config_name)
15 |     if config != t_path:
16 |         os.makedirs(path, exist_ok=True)
17 |         shutil.copyfile(config, os.path.join(path, config_name))
18 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/models/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/models/components/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/models/components/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/onnx/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/onnx/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/text/symbols.py:
--------------------------------------------------------------------------------
 1 | """ from https://github.com/keithito/tacotron
 2 | 
 3 | Defines the set of symbols used in text input to the model.
 4 | """
 5 | _pad = "_"
 6 | _punctuation = ';:,.!?¡¿—…"«»“” '
 7 | _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
 8 | _letters_ipa = (
 9 |     "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
10 | )
11 | 
12 | 
13 | # Export all symbols:
14 | symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
15 | 
16 | # Special symbol ids
17 | SPACE_ID = symbols.index(" ")
18 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from matcha.utils.instantiators import instantiate_callbacks, instantiate_loggers
2 | from matcha.utils.logging_utils import log_hyperparameters
3 | from matcha.utils.pylogger import get_pylogger
4 | from matcha.utils.rich_utils import enforce_tags, print_config_tree
5 | from matcha.utils.utils import extras, get_metric_value, task_wrapper
6 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/utils/monotonic_align/__init__.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from matcha.utils.monotonic_align.core import maximum_path_c
 5 | 
 6 | 
 7 | def maximum_path(value, mask):
 8 |     """Cython optimised version.
 9 |     value: [b, t_x, t_y]
10 |     mask: [b, t_x, t_y]
11 |     """
12 |     value = value * mask
13 |     device = value.device
14 |     dtype = value.dtype
15 |     value = value.data.cpu().numpy().astype(np.float32)
16 |     path = np.zeros_like(value).astype(np.int32)
17 |     mask = mask.data.cpu().numpy()
18 | 
19 |     t_x_max = mask.sum(1)[:, 0].astype(np.int32)
20 |     t_y_max = mask.sum(2)[:, 0].astype(np.int32)
21 |     maximum_path_c(path, value, t_x_max, t_y_max)
22 |     return torch.from_numpy(path).to(device=device, dtype=dtype)
23 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/utils/monotonic_align/setup.py:
--------------------------------------------------------------------------------
1 | # from distutils.core import setup
2 | # from Cython.Build import cythonize
3 | # import numpy
4 | 
5 | # setup(name='monotonic_align',
6 | #       ext_modules=cythonize("core.pyx"),
7 | #       include_dirs=[numpy.get_include()])
8 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/matcha/utils/pylogger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lightning.pytorch.utilities import rank_zero_only
 4 | 
 5 | 
 6 | def get_pylogger(name: str = __name__) -> logging.Logger:
 7 |     """Initializes a multi-GPU-friendly python command line logger.
 8 | 
 9 |     :param name: The name of the logger, defaults to ``__name__``.
10 | 
11 |     :return: A logger object.
12 |     """
13 |     logger = logging.getLogger(name)
14 | 
15 |     # this ensures all logging levels get marked with the rank zero decorator
16 |     # otherwise logs would get multiplied for each GPU process in multi-GPU setup
17 |     logging_levels = ("debug", "info", "warning", "error", "exception", "fatal", "critical")
18 |     for level in logging_levels:
19 |         setattr(logger, level, rank_zero_only(getattr(logger, level)))
20 | 
21 |     return logger
22 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/notebooks/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/notebooks/.gitkeep


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "wheel", "cython==0.29.35", "numpy==1.24.3", "packaging"]
 3 | 
 4 | [tool.black]
 5 | line-length = 120
 6 | target-version = ['py310']
 7 | exclude = '''
 8 | 
 9 | (
10 |   /(
11 |       \.eggs         # exclude a few common directories in the
12 |     | \.git          # root of the project
13 |     | \.hg
14 |     | \.mypy_cache
15 |     | \.tox
16 |     | \.venv
17 |     | _build
18 |     | buck-out
19 |     | build
20 |     | dist
21 |   )/
22 |   | foo.py           # also separately exclude a file named foo.py in
23 |                      # the root of the project
24 | )
25 | '''
26 | 
27 | [tool.pytest.ini_options]
28 | addopts = [
29 |   "--color=yes",
30 |   "--durations=0",
31 |   "--strict-markers",
32 |   "--doctest-modules",
33 | ]
34 | filterwarnings = [
35 |   "ignore::DeprecationWarning",
36 |   "ignore::UserWarning",
37 | ]
38 | log_cli = "True"
39 | markers = [
40 |   "slow: slow tests",
41 | ]
42 | minversion = "6.0"
43 | testpaths = "tests/"
44 | 
45 | [tool.coverage.report]
46 | exclude_lines = [
47 |     "pragma: nocover",
48 |     "raise NotImplementedError",
49 |     "raise NotImplementedError()",
50 |     "if __name__ == .__main__.:",
51 | ]
52 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/requirements.txt:
--------------------------------------------------------------------------------
 1 | # --------- pytorch --------- #
 2 | torch>=2.0.0
 3 | torchvision>=0.15.0
 4 | lightning>=2.0.0
 5 | torchmetrics>=0.11.4
 6 | 
 7 | # --------- hydra --------- #
 8 | hydra-core==1.3.2
 9 | hydra-colorlog==1.2.0
10 | hydra-optuna-sweeper==1.2.0
11 | 
12 | # --------- loggers --------- #
13 | # wandb
14 | # neptune-client
15 | # mlflow
16 | # comet-ml
17 | # aim>=3.16.2  # no lower than 3.16.2, see https://github.com/aimhubio/aim/issues/2550
18 | 
19 | # --------- others --------- #
20 | rootutils       # standardizing the project root setup
21 | pre-commit      # hooks for applying linters on commit
22 | rich            # beautiful text formatting in terminal
23 | pytest          # tests
24 | # sh            # for running bash commands in some tests (linux/macos only)
25 | phonemizer      # phonemization of text
26 | tensorboard
27 | librosa
28 | Cython
29 | numpy
30 | einops
31 | inflect
32 | Unidecode
33 | scipy
34 | torchaudio
35 | matplotlib
36 | pandas
37 | conformer==0.3.2
38 | diffusers==0.25.0
39 | notebook
40 | ipywidgets
41 | gradio==3.43.2
42 | gdown
43 | wget
44 | seaborn
45 | piper_phonemize
46 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/GLM4V/third_party/Matcha-TTS/scripts/schedule.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Schedule execution of many runs
3 | # Run from root folder with: bash scripts/schedule.sh
4 | 
5 | python src/train.py trainer.max_epochs=5 logger=csv
6 | 
7 | python src/train.py trainer.max_epochs=10 logger=csv
8 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/MimiCodec/mimi_config.yaml:
--------------------------------------------------------------------------------
1 | generator:
2 |   name: MimiCodec
3 |   config:
4 |     encoder_rates: [8, 6, 5, 4]
5 |     codebook_size: 2048
6 |     codebook_dim: 256
7 |     rvq_layers: 8


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/MimiCodec/model/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/MimiCodec/model/models/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/MimiCodec/model/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/MimiCodec/model/modules/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/MimiCodec/model/quantization/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Kyutai, all rights reserved.
 2 | # This source code is licensed under the license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 6 | # All rights reserved.
 7 | #
 8 | # This source code is licensed under the license found in the
 9 | # LICENSE file in the root directory of this source tree.
10 | """RVQ."""
11 | # flake8: noqa
12 | from .vq import ResidualVectorQuantizer, SplitResidualVectorQuantizer
13 | from .base import BaseQuantizer, DummyQuantizer, QuantizedResult
14 | 


--------------------------------------------------------------------------------
/MLLM_v2/tools/tokenizer/MimiCodec/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/MLLM_v2/tools/tokenizer/MimiCodec/model/utils/__init__.py


--------------------------------------------------------------------------------
/MLLM_v2/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Kyutai, all rights reserved.
 2 | # This source code is licensed under the license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 6 | # All rights reserved.
 7 | #
 8 | # This source code is licensed under the license found in the
 9 | # LICENSE file in the root directory of this source tree.
10 | """Utilities."""
11 | 


--------------------------------------------------------------------------------
/RSTnet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/RSTnet.pdf


--------------------------------------------------------------------------------
/RSTnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/RSTnet.png


--------------------------------------------------------------------------------
/demos/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/demos/.DS_Store


--------------------------------------------------------------------------------
/demos/tts/setence_level_text_audio_interleaved_1272-128104-0006_sample.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/demos/tts/setence_level_text_audio_interleaved_1272-128104-0006_sample.wav


--------------------------------------------------------------------------------
/demos/tts/setence_level_text_audio_interleaved_1272-141231-0011_sample.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/demos/tts/setence_level_text_audio_interleaved_1272-141231-0011_sample.wav


--------------------------------------------------------------------------------
/demos/tts/setence_level_text_audio_interleaved_174-168635-0014_sample.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/demos/tts/setence_level_text_audio_interleaved_174-168635-0014_sample.wav


--------------------------------------------------------------------------------
/demos/tts/setence_level_text_audio_interleaved_251-137823-0008_sample.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/demos/tts/setence_level_text_audio_interleaved_251-137823-0008_sample.wav


--------------------------------------------------------------------------------
/demos/tts/setence_level_text_audio_interleaved_652-129742-0018_sample.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/demos/tts/setence_level_text_audio_interleaved_652-129742-0018_sample.wav


--------------------------------------------------------------------------------
/demos/tts/setence_level_text_audio_interleaved_777-126732-0080_sample.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yangdongchao/RSTnet/5f01e1fbefe947342ac404e80fed8fbe5cc3da03/demos/tts/setence_level_text_audio_interleaved_777-126732-0080_sample.wav


--------------------------------------------------------------------------------