For old ESPnet (<=10.1)
89 |
90 | ### ASR
91 |
92 | ```python
93 | import soundfile
94 | from espnet_model_zoo.downloader import ModelDownloader
95 | from espnet2.bin.asr_inference import Speech2Text
96 | d = ModelDownloader()
97 | speech2text = Speech2Text(
98 | **d.download_and_unpack("model_name"),
99 | # Decoding parameters are not included in the model file
100 | maxlenratio=0.0,
101 | minlenratio=0.0,
102 | beam_size=20,
103 | ctc_weight=0.3,
104 | lm_weight=0.5,
105 | penalty=0.0,
106 | nbest=1
107 | )
108 | ```
109 |
110 | ### TTS
111 |
112 | ```python
113 | import soundfile
114 | from espnet_model_zoo.downloader import ModelDownloader
115 | from espnet2.bin.tts_inference import Text2Speech
116 | d = ModelDownloader()
117 | text2speech = Text2Speech(**d.download_and_unpack("model_name"))
118 | ```
119 |
120 | ### Speech separation
121 |
122 | ```python
123 | import soundfile
124 | from espnet_model_zoo.downloader import ModelDownloader
125 | from espnet2.bin.enh_inference import SeparateSpeech
126 | d = ModelDownloader()
127 | separate_speech = SeparateSpeech(
128 | **d.download_and_unpack("model_name"),
129 | # for segment-wise process on long speech
130 | segment_size=2.4,
131 | hop_size=0.8,
132 | normalize_segment_scale=False,
133 | show_progressbar=True,
134 | ref_channel=None,
135 | normalize_output_wav=True,
136 | )
137 | ```
138 |
139 |
140 |
141 | ## Instruction for ModelDownloader
142 |
143 | ```python
144 | from espnet_model_zoo.downloader import ModelDownloader
145 | d = ModelDownloader("~/.cache/espnet") # Specify cachedir
146 | d = ModelDownloader() #