├── audioldm ├── requirements.txt └── handler.py ├── musicgen ├── requirements.txt └── handler.py ├── README.md └── bark ├── requirements.txt └── handler.py /audioldm/requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers 2 | transformers -------------------------------------------------------------------------------- /musicgen/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.31.0 2 | accelerate>=0.20.3 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deploy audio ML models on HF Inference Endpoints 2 | 3 | list goes here... 4 | -------------------------------------------------------------------------------- /bark/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/huggingface/transformers.git 2 | git+https://github.com/huggingface/optimum.git 3 | git+https://github.com/huggingface/accelerate.git -------------------------------------------------------------------------------- /audioldm/handler.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Any 2 | from diffusers import AudioLDMPipeline 3 | 4 | import torch 5 | 6 | class EndpointHandler: 7 | def __init__(self, path=""): 8 | # load model and processor from path 9 | self.pipe = AudioLDMPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") 10 | 11 | def __call__(self, data: Dict[str, Any]) -> Dict[str, str]: 12 | """ 13 | Args: 14 | data (:dict:): 15 | The payload with the text prompt and generation parameters. 16 | """ 17 | # process input 18 | inputs = data.pop("inputs", data) 19 | parameters = data.pop("parameters", None) 20 | 21 | # pass inputs with all kwargs in data 22 | if parameters is not None: 23 | outputs = self.pipe(inputs, **parameters) 24 | else: 25 | outputs = self.pipe(inputs,) 26 | 27 | # postprocess the prediction 28 | prediction = outputs[0].cpu().numpy() 29 | 30 | return [{"generated_audio": prediction}] -------------------------------------------------------------------------------- /bark/handler.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Any 2 | from transformers import AutoProcessor, BarkModel 3 | 4 | import torch 5 | 6 | class EndpointHandler: 7 | def __init__(self, path="suno/bark"): 8 | # load model and processor from path 9 | self.processor = AutoProcessor.from_pretrained(path) 10 | self.model = BarkModel.from_pretrained(path, torch_dtype=torch.float16).to("cuda") 11 | 12 | def __call__(self, data: Dict[str, Any]) -> Dict[str, str]: 13 | """ 14 | Args: 15 | data (:dict:): 16 | The payload with the text prompt and generation parameters. 17 | """ 18 | # process input 19 | inputs = data.pop("inputs", data) 20 | parameters = data.pop("parameters", None) 21 | 22 | # preprocess 23 | inputs = self.processor( 24 | text=[inputs], 25 | padding=True, 26 | return_tensors="pt",).to("cuda") 27 | 28 | # pass inputs with all kwargs in data 29 | if parameters is not None: 30 | outputs = self.model.generate(**inputs, **parameters) 31 | else: 32 | outputs = self.model.generate(**inputs,) 33 | 34 | # postprocess the prediction 35 | prediction = outputs[0].cpu().numpy() 36 | 37 | return [{"generated_audio": prediction}] -------------------------------------------------------------------------------- /musicgen/handler.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Any 2 | from transformers import AutoProcessor, MusicgenForConditionalGeneration 3 | import torch 4 | 5 | class EndpointHandler: 6 | def __init__(self, path=""): 7 | # load model and processor from path 8 | self.processor = AutoProcessor.from_pretrained(path) 9 | self.model = MusicgenForConditionalGeneration.from_pretrained(path, torch_dtype=torch.float16).to("cuda") 10 | 11 | def __call__(self, data: Dict[str, Any]) -> Dict[str, str]: 12 | """ 13 | Args: 14 | data (:dict:): 15 | The payload with the text prompt and generation parameters. 16 | """ 17 | # process input 18 | inputs = data.pop("inputs", data) 19 | parameters = data.pop("parameters", None) 20 | 21 | # preprocess 22 | inputs = self.processor( 23 | text=[inputs], 24 | padding=True, 25 | return_tensors="pt",).to("cuda") 26 | 27 | # pass inputs with all kwargs in data 28 | if parameters is not None: 29 | with torch.autocast("cuda"): 30 | outputs = self.model.generate(**inputs, **parameters) 31 | else: 32 | with torch.autocast("cuda"): 33 | outputs = self.model.generate(**inputs,) 34 | 35 | # postprocess the prediction 36 | prediction = outputs[0].cpu().numpy() 37 | 38 | return [{"generated_audio": prediction}] --------------------------------------------------------------------------------