├── tools ├── cpp-example │ ├── CManeLists.txt │ └── lalalai_upload.cpp ├── nodejs-example │ └── lalalai-upload.js └── api │ ├── README.md │ ├── lalalai_demuser.py │ ├── lalalai_voice_converter.py │ └── lalalai_splitter.py ├── .gitignore └── readme.md /tools/cpp-example/CManeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0.0) 2 | project(lalalai_upload VERSION 0.1.0) 3 | 4 | include(CTest) 5 | enable_testing() 6 | 7 | find_package(CURL REQUIRED) 8 | 9 | 10 | add_executable(lalalai_upload lalalai_upload.cpp) 11 | 12 | if(CURL_FOUND) 13 | target_include_directories(lalalai_upload PRIVATE ${CURL_INCLUDE_DIRS}) 14 | target_link_libraries(lalalai_upload ${CURL_LIBRARIES}) 15 | else() 16 | message(FATAL_ERROR "CURL library not found") 17 | endif() 18 | 19 | target_compile_features(lalalai_upload PRIVATE cxx_std_17) 20 | 21 | set(CPACK_PROJECT_NAME ${PROJECT_NAME}) 22 | set(CPACK_PROJECT_VERSION ${PROJECT_VERSION}) 23 | include(CPack) 24 | -------------------------------------------------------------------------------- /tools/nodejs-example/lalalai-upload.js: -------------------------------------------------------------------------------- 1 | const axios = require('axios'); 2 | const fs = require('fs'); 3 | 4 | const fileName = '~/file.mp3'; 5 | 6 | try { 7 | const data = fs.readFileSync(fileName); 8 | try { 9 | (axios.post('https://www.lalal.ai/api/upload/', data, { 10 | headers: { 11 | 'Content-Disposition': 'attachment; filename=file.mp3', 12 | 'Authorization': 'license ' 13 | } 14 | })).then(res => console.log('Result:', res.data)) 15 | } 16 | catch (error) { 17 | console.error('Error:', error); 18 | } 19 | } catch (error) { 20 | console.error('Error:', error); 21 | } 22 | 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # macOS 2 | .DS_Store 3 | .DS_Store? 4 | ._* 5 | .Spotlight-V100 6 | .Trashes 7 | ehthumbs.db 8 | Thumbs.db 9 | 10 | # Python 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | *.so 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # Environments 88 | .env 89 | .venv 90 | env/ 91 | venv/ 92 | ENV/ 93 | env.bak/ 94 | venv.bak/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | .dmypy.json 109 | dmypy.json 110 | -------------------------------------------------------------------------------- /tools/cpp-example/lalalai_upload.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | size_t WriteCallback(char *contents, size_t size, size_t nmemb, void *userp) 8 | { 9 | ((std::string *)userp)->append((char *)contents, size * nmemb); 10 | return size * nmemb; 11 | } 12 | 13 | void uploadData(std::string data) 14 | { 15 | CURL *curl; 16 | CURLcode res; 17 | 18 | curl_global_init(CURL_GLOBAL_DEFAULT); 19 | curl = curl_easy_init(); 20 | 21 | if (curl) 22 | { 23 | curl_easy_setopt(curl, CURLOPT_URL, "https://www.lalal.ai/api/upload/"); 24 | curl_easy_setopt(curl, CURLOPT_POST, 1L); 25 | 26 | struct curl_slist *list = NULL; 27 | list = curl_slist_append(list, "Content-Disposition: attachment; filename=file.mp3"); 28 | list = curl_slist_append(list, "Authorization: license "); // TODO: PASTE LICENSE HERE 29 | curl_easy_setopt(curl, CURLOPT_HTTPHEADER, list); 30 | 31 | curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, data.size()); 32 | curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data.c_str()); 33 | 34 | std::string readBuffer; 35 | curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); 36 | curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer); 37 | 38 | res = curl_easy_perform(curl); 39 | 40 | std::cout << "[" << readBuffer << "]" << std::endl; 41 | 42 | if (res != CURLE_OK) 43 | fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res)); 44 | 45 | curl_easy_cleanup(curl); 46 | curl_slist_free_all(list); 47 | } 48 | 49 | curl_global_cleanup(); 50 | } 51 | 52 | int main() 53 | { 54 | const std::string fileName = "~/file.mp3"; // TODO: PASTE FILENAME HERE 55 | 56 | std::ifstream inFile; 57 | inFile.open(fileName, std::ios::binary); 58 | 59 | if (!inFile) 60 | { 61 | std::cerr << "Error: Unable to open file " << fileName << std::endl; 62 | return 1; 63 | } 64 | 65 | std::stringstream strStream; 66 | strStream << inFile.rdbuf(); 67 | auto str = strStream.str(); 68 | 69 | try 70 | { 71 | uploadData(std::move(str)); 72 | } 73 | catch (const std::exception &e) 74 | { 75 | std::cerr << "Error: " << e.what() << '\n'; 76 | } 77 | 78 | return 0; 79 | } 80 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # [LALAL.AI](https://www.lalal.ai/) 2 | 3 | Extract vocal, accompaniment and various instruments from any audio and video 4 | High-quality stem splitting based on the world's #1 AI-powered technology. 5 | 6 | ### About 7 | We are a team of specialists in the fields of artificial intelligence, machine learning, mathematical optimization, and digital signal processing. **Our goal is to make working with audio and video easier** for musicians, sound producers, music engineers, video bloggers, streamers, transcribers, translators, journalists, and many other professionals and creatives. 8 | 9 | In 2020, we developed a unique neural network called **Rocknet** using 20TB of training data to extract instrumentals and voice tracks from songs. In 2021, we created Cassiopeia, a next-generation solution superior to Rocknet that allowed improved splitting results with significantly fewer audio artifacts. 10 | 11 | Starting as a 2-stem splitter, LALAL.AI has grown significantly during 2021. In addition to **vocal and instrumental**, the service was enhanced with the capability to extract musical instruments – **drums, bass, acoustic guitar, electric guitar, piano, and synthesizer**. As a result of this upgrade, LALAL.AI became the [world’s first 8-stem splitter](https://www.lalal.ai/blog/lalal-ai-adds-the-8th-stem-for-separation-synthesizer/). In the same year, we also presented [business solutions](https://www.lalal.ai/business-solutions/), enabling owners of sites, services and applications to integrate our stem-splitting technology into their environments via API. 12 | 13 | Only available in English prior to 2021, LALAL.AI was translated into 7 other languages – Chinese, French, German, Italian, Japanese, Korean, and Spanish. Furthermore, we added new payment methods to make LALAL.AI easier to acquire and more accessible to people worldwide. 14 | 15 | In 2022, we created and released [Phoenix](https://www.lalal.ai/blog/phoenix-neural-network-vocal-separation/), a state-of-the-art audio source separation technology. In terms of stem-splitting accuracy, it surpassed not only our previous neural networks but also all other solutions on the market. 16 | 17 | Although Phoenix exclusively handled vocal/instrumental isolation at first, its powerful technology allowed us to continually introduce new stems on a regular basis. Throughout the year we trained Phoenix to extract all musical instruments that Cassiopeia supported. 18 | 19 | We also added two brand new stems, wind and string instruments, which no other service offered. With that update, LALAL.AI broke the record again and became the [world’s first 10-stem splitter](https://www.lalal.ai/blog/wind-string-instruments/). 20 | 21 | LALAL.AI’s innovative technologies are used not only for stem splitting. In July of 2022, we introduced [Voice Cleaner](https://www.lalal.ai/blog/voice-cleaner/), a noise cancellation solution that removes background music, mic rumble, vocal plosives, and many other types of extraneous noises from video and audio recordings. 22 | 23 | At the end of 2022, we created a [desktop version of LALAL.AI](https://www.lalal.ai/blog/lalalai-desktop-app/). The application enabled users to split audio and videos into stems in one convenient place on their Windows, macOS and Linux computers. 24 | 25 | In the two years since LALAL.AI was created, the project has grown tremendously, as has our workforce. Since the Rocknet neural network launch in 2020, the LALAL.AI team has doubled in size. We work hard to create unique high-quality solutions and we always have a lot of ideas and developments in store. Keep your eyes peeled for new possibilities and improvements! 26 | 27 | ### Legal Entity 28 | OmniSale GmbH 29 | Rigistrasse 3, 6300, Zug, Switzerland. 30 | 31 | ### Examples of API usage 32 | * [Python tool](/tools/api/) 33 | * [Node-js uploading example](/tools/nodejs-example/) 34 | * [C++ uploading example](/tools/cpp-example/) 35 | 36 | ### Forks and third party tools 37 | 38 | * Modified Python tool for extract multiple stems for only one upload https://github.com/lehenbauer/lalalai (by @lehenbauer) 39 | * GUI frontend for Python script. Currently for Mac only https://github.com/lehenbauer/unmixer (by @lehenbauer) 40 | 41 | 42 | -------------------------------------------------------------------------------- /tools/api/README.md: -------------------------------------------------------------------------------- 1 | # LALAL.AI API Examples 2 | 3 | This directory contains examples of interacting with the LALAL.AI API as described in [https://www.lalal.ai/api/help/](https://www.lalal.ai/api/help/) 4 | 5 | ## Scripts 6 | 7 | - `lalalai_splitter.py` - Audio source separation using various stems and neural networks 8 | - `lalalai_voice_converter.py` - Voice conversion using different voice packs 9 | - `lalalai_demuser.py` - Clean voice from background music 10 | 11 | ## lalalai_splitter.py Usage 12 | 13 | ```bash 14 | % python3 lalalai_splitter.py --license \ 15 | --input \ 16 | [--output ] \ 17 | [--stem ] \ 18 | [--filter ] \ 19 | [--splitter ] \ 20 | [--enhanced-processing ] \ 21 | [--noise-cancelling ] 22 | 23 | Parameters: 24 | --license User license key (required) 25 | --input Input directory or file (required) 26 | --output Output directory (default: current script directory) 27 | --stem Stem to extract (default: "vocals") 28 | choices: vocals, drum, bass, piano, electric_guitar, 29 | acoustic_guitar, synthesizer, voice, strings, wind 30 | Note: Different neural networks support different sets of stems 31 | --filter Post-processing filter intensity (default: 1) 32 | choices: 0 (mild), 1 (normal), 2 (aggressive) 33 | --splitter Neural network type (default: auto - selects most effective for stem) 34 | choices: phoenix, orion, perseus 35 | Auto selection priority: Perseus > Orion > Phoenix 36 | - Perseus: vocals, voice, drum, piano, bass, electric_guitar, acoustic_guitar 37 | - Orion: vocals, voice, drum, piano, bass, electric_guitar, acoustic_guitar 38 | - Phoenix: vocals, voice, drum, piano, bass, electric_guitar, acoustic_guitar, synthesizer, strings, wind 39 | --enhanced-processing Enable enhanced processing (default: false) 40 | Available for all stems except "voice" 41 | --noise-cancelling Noise cancelling level for "voice" stem only (default: 1) 42 | choices: 0 (mild), 1 (normal), 2 (aggressive) 43 | ``` 44 | 45 | ## lalalai_voice_converter.py Usage 46 | 47 | ```bash 48 | % python3 lalalai_voice_converter.py --license \ 49 | [--input ] \ 50 | [--uploaded_file_id ] \ 51 | [--output ] \ 52 | [--voice_pack_id ] \ 53 | [--accent_enhance <0.0-1.0>] \ 54 | [--pitch_shifting ] \ 55 | [--dereverb_enabled ] \ 56 | [--list] 57 | 58 | Parameters: 59 | --license User license key (required) 60 | --input Input directory or file (optional if using --uploaded_file_id) 61 | --uploaded_file_id Previously uploaded file ID (optional) 62 | --output Output directory (default: current script directory) 63 | --voice_pack_id Voice pack ID (default: "ALEX_KAYE") 64 | Available voice packs: https://www.lalal.ai/api/voice_packs/list/ 65 | --accent_enhance Accent processing strength (default: 1.0) 66 | Range: 0.0-1.0, where 0.0 = keep original accent, 1.0 = match target voice accent 67 | --pitch_shifting Tonality/pitch processing (default: true) 68 | true: Match target voice tonality 69 | false: Keep original tone 70 | --dereverb_enabled Echo/reverb processing (default: false) 71 | true: Remove echo/reverb from audio 72 | false: Restore original echo/reverb 73 | --list List available voice packs and exit 74 | ``` 75 | 76 | ## lalalai_demuser.py Usage 77 | 78 | ```bash 79 | % python3 lalalai_demuser.py --license \ 80 | --input \ 81 | [--output ] \ 82 | 83 | Parameters: 84 | --license User license key (required) 85 | --input Input directory or file (required) 86 | --output Output directory (default: current script directory) 87 | ``` 88 | -------------------------------------------------------------------------------- /tools/api/lalalai_demuser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2025 LALAL.AI 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | import sys 24 | 25 | if sys.version_info >= (3, 13): 26 | raise RuntimeError("This script requires Python 3.12 or earlier for cgi module compatibility. Actual version is: ", sys.version_info) 27 | 28 | 29 | import cgi 30 | import json 31 | import os 32 | import time 33 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, SUPPRESS 34 | from urllib.parse import quote, unquote, urlencode 35 | from urllib.request import urlopen, Request 36 | 37 | 38 | URL_API = "https://www.lalal.ai/api/" 39 | 40 | 41 | def update_percent(pct): 42 | pct = str(pct) 43 | sys.stdout.write("\b" * len(pct)) 44 | sys.stdout.write(" " * len(pct)) 45 | sys.stdout.write("\b" * len(pct)) 46 | sys.stdout.write(pct) 47 | sys.stdout.flush() 48 | 49 | 50 | def make_content_disposition(filename, disposition='attachment'): 51 | try: 52 | filename.encode('ascii') 53 | file_expr = f'filename="{filename}"' 54 | except UnicodeEncodeError: 55 | quoted = quote(filename) 56 | file_expr = f"filename*=utf-8''{quoted}" 57 | return f'{disposition}; {file_expr}' 58 | 59 | 60 | def upload_file(file_path, license): 61 | url_for_upload = URL_API + "upload/" 62 | _, filename = os.path.split(file_path) 63 | headers = { 64 | "Content-Disposition": make_content_disposition(filename), 65 | "Authorization": f"license {license}", 66 | } 67 | with open(file_path, 'rb') as f: 68 | request = Request(url_for_upload, f, headers) 69 | with urlopen(request) as response: 70 | upload_result = json.load(response) 71 | if upload_result["status"] == "success": 72 | return upload_result["id"] 73 | else: 74 | raise RuntimeError(upload_result["error"]) 75 | 76 | 77 | def split_file(file_id, license): 78 | url_for_split = URL_API + "split/" 79 | headers = { 80 | "Authorization": f"license {license}", 81 | } 82 | query_args = { 83 | 'id': file_id, 84 | 'stem': 'music', 85 | 'splitter': 'lyra', 86 | } 87 | 88 | # What you send to server 89 | print("Split task request body:", query_args) 90 | 91 | encoded_args = urlencode(query_args).encode('utf-8') 92 | request = Request(url_for_split, encoded_args, headers=headers) 93 | with urlopen(request) as response: 94 | split_result = json.load(response) 95 | if split_result["status"] == "error": 96 | raise RuntimeError(split_result["error"]) 97 | 98 | 99 | def check_file(file_id): 100 | url_for_check = URL_API + "check/?" 101 | query_args = {'id': file_id} 102 | encoded_args = urlencode(query_args) 103 | 104 | is_queueup = False 105 | 106 | while True: 107 | with urlopen(url_for_check + encoded_args) as response: 108 | check_result = json.load(response) 109 | 110 | if check_result["status"] == "error": 111 | raise RuntimeError(check_result["error"]) 112 | 113 | task_state = check_result["task"]["state"] 114 | 115 | if task_state == "success": 116 | update_percent("Progress: 100%\n") 117 | return check_result["split"] 118 | 119 | elif task_state == "error": 120 | raise RuntimeError(check_result["task"]["error"]) 121 | 122 | elif task_state == "progress": 123 | progress = int(check_result["task"]["progress"]) 124 | if progress == 0 and not is_queueup: 125 | if 'presets' in check_result and 'split' in check_result['presets']: 126 | # Settings extracted by server 127 | print("Using settings", check_result['presets']['split']) 128 | print("Queue up...") 129 | is_queueup = True 130 | elif progress > 0: 131 | update_percent(f"Progress: {progress}%") 132 | 133 | else: 134 | raise NotImplementedError('Unknown track state', task_state) 135 | 136 | time.sleep(15) 137 | 138 | 139 | def get_filename_from_content_disposition(header): 140 | _, params = cgi.parse_header(header) 141 | filename = params.get('filename') 142 | if filename: 143 | return filename 144 | filename = params.get('filename*') 145 | if filename: 146 | encoding, quoted = filename.split("''") 147 | unquoted = unquote(quoted, encoding) 148 | return unquoted 149 | raise ValueError('Invalid header Content-Disposition') 150 | 151 | 152 | def download_file(url_for_download, output_path): 153 | with urlopen(url_for_download) as response: 154 | filename = get_filename_from_content_disposition(response.headers["Content-Disposition"]) 155 | file_path = os.path.join(output_path, filename) 156 | with open(file_path, 'wb') as f: 157 | while (chunk := response.read(8196)): 158 | f.write(chunk) 159 | return file_path 160 | 161 | 162 | def batch_process_for_file(license, input_path, output_path): 163 | try: 164 | print(f'Uploading the file "{input_path}"...') 165 | file_id = upload_file(file_path=input_path, license=license) 166 | print(f'The file "{input_path}" has been successfully uploaded (file id: {file_id})') 167 | 168 | print(f'Processing the file "{input_path}"...') 169 | split_file(file_id, license) 170 | split_result = check_file(file_id) 171 | 172 | for url in (split_result['stem_track'], split_result['back_track']): 173 | print(f'Downloading the track file "{url}"...') 174 | downloaded_file = download_file(url, output_path) 175 | print(f'The track file has been downloaded to "{downloaded_file}"') 176 | 177 | print(f'The file "{input_path}" has been successfully split') 178 | except Exception as err: 179 | print(f'Cannot process the file "{input_path}": {err}') 180 | 181 | 182 | def batch_process(license, input_path, output_path): 183 | if os.path.isfile(input_path): 184 | batch_process_for_file(license, input_path, output_path) 185 | else: 186 | for path in os.listdir(input_path): 187 | full_path = os.path.join(input_path, path) 188 | if os.path.isfile(full_path): 189 | batch_process_for_file(license, full_path, output_path) 190 | 191 | 192 | def main(): 193 | parser = ArgumentParser(description='Lalalai splitter', formatter_class=ArgumentDefaultsHelpFormatter) 194 | parser.add_argument('--license', required=True, type=str, default=SUPPRESS, help='license key') 195 | parser.add_argument('--input', required=True, type=str, default=SUPPRESS, help='input directory or a file') 196 | parser.add_argument('--output', type=str, default=os.path.dirname(os.path.realpath(__file__)), help='output directory') 197 | 198 | args = parser.parse_args() 199 | 200 | os.makedirs(args.output, exist_ok=True) 201 | batch_process(args.license, args.input, args.output) 202 | 203 | 204 | if __name__ == '__main__': 205 | try: 206 | main() 207 | except Exception as err: 208 | print(err) 209 | -------------------------------------------------------------------------------- /tools/api/lalalai_voice_converter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2025 LALAL.AI 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | import sys 24 | 25 | if sys.version_info >= (3, 13): 26 | raise RuntimeError("This script requires Python 3.12 or earlier for cgi module compatibility. Actual version is: ", sys.version_info) 27 | 28 | import cgi 29 | import json 30 | import os 31 | 32 | import time 33 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, SUPPRESS 34 | from urllib.parse import quote, unquote, urlencode 35 | from urllib.request import urlopen, Request 36 | 37 | from dataclasses import dataclass 38 | 39 | URL_API = "https://www.lalal.ai/api/" 40 | 41 | @dataclass 42 | class VoiceChangeParameters: 43 | voice_pack_id: str 44 | accent_enhance: float 45 | pitch_shifting: int 46 | dereverb_enabled: bool 47 | 48 | 49 | def upload_file(file_path, license): 50 | url_for_upload = URL_API + "upload/" 51 | _, filename = os.path.split(file_path) 52 | headers = { 53 | "Content-Disposition": _make_content_disposition(filename), 54 | "Authorization": f"license {license}", 55 | } 56 | with open(file_path, 'rb') as f: 57 | request = Request(url_for_upload, f, headers) 58 | with urlopen(request) as response: 59 | upload_result = json.load(response) 60 | if upload_result["status"] == "success": 61 | return upload_result["id"] 62 | else: 63 | raise RuntimeError(upload_result["error"]) 64 | 65 | 66 | def change_voice(file_id, license, params: VoiceChangeParameters): 67 | url = URL_API + "change_voice/" 68 | headers = { 69 | "Authorization": f"license {license}", 70 | } 71 | 72 | query_args = { 73 | 'id': file_id, 74 | 'voice': params.voice_pack_id, 75 | 'accent_enhance': params.accent_enhance, 76 | 'pitch_shifting': params.pitch_shifting, 77 | 'dereverb_enabled': params.dereverb_enabled 78 | } 79 | print("Voice convert request body:", query_args) 80 | 81 | encoded_args = urlencode(query_args).encode('utf-8') 82 | request = Request(url, encoded_args, headers=headers) 83 | with urlopen(request) as response: 84 | split_result = json.load(response) 85 | if split_result["status"] == "error": 86 | raise RuntimeError(split_result["error"]) 87 | print(f"Start Voice convert task {split_result['task_id']}") 88 | 89 | 90 | def check_file(file_id): 91 | url_for_check = URL_API + "check/?" 92 | query_args = {'id': file_id} 93 | encoded_args = urlencode(query_args) 94 | 95 | preparation_phase = True 96 | 97 | while True: 98 | with urlopen(url_for_check + encoded_args) as response: 99 | check_result = json.load(response) 100 | 101 | if check_result["status"] == "error": 102 | raise RuntimeError(check_result["error"]) 103 | 104 | task_state = check_result["task"]["state"] 105 | 106 | if task_state == "success": 107 | _update_percent("Progress: 100%\n") 108 | return check_result["split"] 109 | 110 | elif task_state == "error": 111 | raise RuntimeError(check_result["task"]["error"]) 112 | 113 | elif task_state == "progress": 114 | progress = int(check_result["task"]["progress"]) 115 | if progress == 0 and preparation_phase: 116 | if 'presets' in check_result and 'split' in check_result['presets']: 117 | print("Using settings", check_result['presets']['split']) 118 | print("Queue up...") 119 | preparation_phase = False 120 | elif progress > 0: 121 | _update_percent(f"Progress: {progress}%") 122 | 123 | else: 124 | raise NotImplementedError('Unknown track state', task_state) 125 | 126 | time.sleep(15) 127 | 128 | 129 | def download_file(url_for_download, output_path): 130 | with urlopen(url_for_download) as response: 131 | filename = _get_filename_from_content_disposition(response.headers["Content-Disposition"]) 132 | file_path = os.path.join(output_path, filename) 133 | with open(file_path, 'wb') as f: 134 | while (chunk := response.read(8196)): 135 | f.write(chunk) 136 | return file_path 137 | 138 | 139 | def process_file(license, file_id, output_path, params: VoiceChangeParameters): 140 | try: 141 | print(f'Processing the file "{file_id}"...') 142 | change_voice(file_id, license, params) 143 | processing_result = check_file(file_id) 144 | 145 | print(f'Downloading the converted file "{processing_result['back_track']}"...') 146 | downloaded_file = download_file(processing_result['back_track'], output_path) 147 | print(f'The track file has been downloaded to "{downloaded_file}"') 148 | except Exception as err: 149 | print(f'Cannot process the file "{file_id}": {err}') 150 | raise 151 | 152 | 153 | def list_voice_packs(license): 154 | """List available voice packs for the user""" 155 | url = URL_API + "voice_packs/list/" 156 | headers = { 157 | "Authorization": f"license {license}", 158 | } 159 | 160 | request = Request(url, headers=headers) 161 | with urlopen(request) as response: 162 | result = json.load(response) 163 | if result["status"] == "error": 164 | raise RuntimeError(result["error"]) 165 | 166 | # Filter only ready_to_use packs 167 | ready_packs = [pack for pack in result["packs"] if pack["ready_to_use"]] 168 | 169 | # Print table header 170 | print(f"{'pack_id':<50} {'name':<50}") 171 | print("-" * 105) 172 | 173 | # Print each pack 174 | for pack in ready_packs: 175 | pack_id = pack["pack_id"] 176 | name = pack["name"] 177 | 178 | print(f"{pack_id:<50} {name:<50}") 179 | 180 | 181 | def _update_percent(pct): 182 | pct = str(pct) 183 | sys.stdout.write("\b" * len(pct)) 184 | sys.stdout.write(" " * len(pct)) 185 | sys.stdout.write("\b" * len(pct)) 186 | sys.stdout.write(pct) 187 | sys.stdout.flush() 188 | 189 | 190 | def _make_content_disposition(filename, disposition='attachment'): 191 | try: 192 | filename.encode('ascii') 193 | file_expr = f'filename="{filename}"' 194 | except UnicodeEncodeError: 195 | quoted = quote(filename) 196 | file_expr = f"filename*=utf-8''{quoted}" 197 | return f'{disposition}; {file_expr}' 198 | 199 | 200 | def _strtobool(val: str) -> bool: 201 | """Convert a string representation of truth to true (1) or false (0). 202 | 203 | True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values 204 | are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if 205 | 'val' is anything else. 206 | """ 207 | val = val.lower() 208 | if val in ('y', 'yes', 't', 'true', 'on', '1'): 209 | return True 210 | elif val in ('n', 'no', 'f', 'false', 'off', '0'): 211 | return False 212 | else: 213 | raise ValueError(f"invalid bool value {val!r}") 214 | 215 | 216 | def _get_filename_from_content_disposition(header): 217 | _, params = cgi.parse_header(header) 218 | filename = params.get('filename') 219 | if filename: 220 | return filename 221 | filename = params.get('filename*') 222 | if filename: 223 | encoding, quoted = filename.split("''") 224 | unquoted = unquote(quoted, encoding) 225 | return unquoted 226 | raise ValueError('Invalid header Content-Disposition') 227 | 228 | 229 | def main(): 230 | parser = ArgumentParser(description='Lalalai voice changer', formatter_class=ArgumentDefaultsHelpFormatter) 231 | parser.add_argument('--license', required=True, type=str, default=SUPPRESS, help='license key') 232 | parser.add_argument('--input', required=False, type=str, default=None, help='input directory or a file') 233 | parser.add_argument('--uploaded_file_id', required=False, type=str, default=None, help='uploaded file id') 234 | parser.add_argument('--output', type=str, default=os.path.dirname(os.path.realpath(__file__)), help='output directory') 235 | parser.add_argument('--voice_pack_id', type=str, default="ALEX_KAYE", help='pack_id in status "ready_to_use", choose from https://www.lalal.ai/api/voice_packs/list/ (must be logged in)') 236 | parser.add_argument('--accent_enhance', type=float, default=1, help='enable accent enhance (0.0-1.0, 1.0 by default)') 237 | parser.add_argument('--pitch_shifting', type=lambda x: bool(_strtobool(x)), default=True, choices=[True, False],) 238 | parser.add_argument('--dereverb_enabled', type=lambda x: bool(_strtobool(x)), default=False, choices=[True, False], help='remove echo') 239 | parser.add_argument('--list', action='store_true', help='list available voice packs and exit') 240 | 241 | args = parser.parse_args() 242 | 243 | # Handle list command first 244 | if args.list: 245 | list_voice_packs(args.license) 246 | return 247 | 248 | if args.uploaded_file_id and args.input: 249 | raise ValueError("You cannot specify both --uploaded_file_id and --input. Use one of them.") 250 | if not args.uploaded_file_id and not args.input: 251 | raise ValueError("You must specify either --uploaded_file_id or --input.") 252 | 253 | if args.input: 254 | print('Uploading the file', args.input) 255 | file_id = upload_file(file_path=args.input, license=args.license) 256 | print(f'The file "{args.input}" has been successfully uploaded (file id: {file_id})') 257 | else: 258 | print('Use uploaded file', args.uploaded_file_id) 259 | file_id = args.uploaded_file_id 260 | 261 | params = VoiceChangeParameters( 262 | voice_pack_id=args.voice_pack_id, 263 | accent_enhance=args.accent_enhance, 264 | pitch_shifting=args.pitch_shifting, 265 | dereverb_enabled=args.dereverb_enabled 266 | ) 267 | 268 | os.makedirs(args.output, exist_ok=True) 269 | process_file(args.license, file_id, args.output, params) 270 | 271 | 272 | if __name__ == '__main__': 273 | main() 274 | -------------------------------------------------------------------------------- /tools/api/lalalai_splitter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2025 LALAL.AI 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | import sys 24 | 25 | if sys.version_info >= (3, 13): 26 | raise RuntimeError("This script requires Python 3.12 or earlier for cgi module compatibility. Actual version is: ", sys.version_info) 27 | 28 | 29 | import cgi 30 | import json 31 | import os 32 | import time 33 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, SUPPRESS 34 | from urllib.parse import quote, unquote, urlencode 35 | from urllib.request import urlopen, Request 36 | 37 | 38 | URL_API = "https://www.lalal.ai/api/" 39 | 40 | _andromeda_stems = ('vocals', 'voice') 41 | _perseus_stems = ('vocals', 'voice', 'drum', 'piano', 'bass', 'electric_guitar', 'acoustic_guitar') 42 | _orion_stems = ('vocals', 'voice', 'drum', 'piano', 'bass', 'electric_guitar', 'acoustic_guitar') 43 | _phoenix_stems = ('vocals', 'voice', 'drum', 'piano', 'bass', 'electric_guitar', 'acoustic_guitar', 'synthesizer', 'strings', 'wind') 44 | 45 | 46 | def update_percent(pct): 47 | pct = str(pct) 48 | sys.stdout.write("\b" * len(pct)) 49 | sys.stdout.write(" " * len(pct)) 50 | sys.stdout.write("\b" * len(pct)) 51 | sys.stdout.write(pct) 52 | sys.stdout.flush() 53 | 54 | 55 | def make_content_disposition(filename, disposition='attachment'): 56 | try: 57 | filename.encode('ascii') 58 | file_expr = f'filename="{filename}"' 59 | except UnicodeEncodeError: 60 | quoted = quote(filename) 61 | file_expr = f"filename*=utf-8''{quoted}" 62 | return f'{disposition}; {file_expr}' 63 | 64 | 65 | def upload_file(file_path, license): 66 | url_for_upload = URL_API + "upload/" 67 | _, filename = os.path.split(file_path) 68 | headers = { 69 | "Content-Disposition": make_content_disposition(filename), 70 | "Authorization": f"license {license}", 71 | } 72 | with open(file_path, 'rb') as f: 73 | request = Request(url_for_upload, f, headers) 74 | with urlopen(request) as response: 75 | upload_result = json.load(response) 76 | if upload_result["status"] == "success": 77 | return upload_result["id"] 78 | else: 79 | raise RuntimeError(upload_result["error"]) 80 | 81 | 82 | def split_file(file_id, license, stem, splitter, enhanced_processing, noise_cancelling, dereverb_enabled): 83 | url_for_split = URL_API + "split/" 84 | headers = { 85 | "Authorization": f"license {license}", 86 | } 87 | query_args = { 88 | 'id': file_id, 89 | 'stem': stem, 90 | 'splitter': splitter, 91 | 'dereverb_enabled': dereverb_enabled, 92 | } 93 | 94 | if enhanced_processing is not None: 95 | query_args['enhanced_processing_enabled'] = enhanced_processing 96 | if noise_cancelling is not None: 97 | query_args['noise_cancelling_level'] = noise_cancelling 98 | 99 | # What you send to server 100 | print("Split task request body:", query_args) 101 | 102 | encoded_args = urlencode(query_args).encode('utf-8') 103 | request = Request(url_for_split, encoded_args, headers=headers) 104 | with urlopen(request) as response: 105 | split_result = json.load(response) 106 | if split_result["status"] == "error": 107 | raise RuntimeError(split_result["error"]) 108 | 109 | 110 | def check_file(file_id): 111 | url_for_check = URL_API + "check/?" 112 | query_args = {'id': file_id} 113 | encoded_args = urlencode(query_args) 114 | 115 | is_queueup = False 116 | 117 | while True: 118 | with urlopen(url_for_check + encoded_args) as response: 119 | check_result = json.load(response) 120 | 121 | if check_result["status"] == "error": 122 | raise RuntimeError(check_result["error"]) 123 | 124 | task_state = check_result["task"]["state"] 125 | 126 | if task_state == "success": 127 | update_percent("Progress: 100%\n") 128 | return check_result["split"] 129 | 130 | elif task_state == "error": 131 | raise RuntimeError(check_result["task"]["error"]) 132 | 133 | elif task_state == "progress": 134 | progress = int(check_result["task"]["progress"]) 135 | if progress == 0 and not is_queueup: 136 | if 'presets' in check_result and 'split' in check_result['presets']: 137 | # Settings extracted by server 138 | print("Using settings", check_result['presets']['split']) 139 | print("Queue up...") 140 | is_queueup = True 141 | elif progress > 0: 142 | update_percent(f"Progress: {progress}%") 143 | 144 | else: 145 | raise NotImplementedError('Unknown track state', task_state) 146 | 147 | time.sleep(15) 148 | 149 | 150 | def _strtobool(val: str) -> bool: 151 | """Convert a string representation of truth to true (1) or false (0). 152 | 153 | True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values 154 | are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if 155 | 'val' is anything else. 156 | """ 157 | val = val.lower() 158 | if val in ('y', 'yes', 't', 'true', 'on', '1'): 159 | return True 160 | elif val in ('n', 'no', 'f', 'false', 'off', '0'): 161 | return False 162 | else: 163 | raise ValueError(f"invalid bool value {val!r}") 164 | 165 | 166 | def get_filename_from_content_disposition(header): 167 | _, params = cgi.parse_header(header) 168 | filename = params.get('filename') 169 | if filename: 170 | return filename 171 | filename = params.get('filename*') 172 | if filename: 173 | encoding, quoted = filename.split("''") 174 | unquoted = unquote(quoted, encoding) 175 | return unquoted 176 | raise ValueError('Invalid header Content-Disposition') 177 | 178 | 179 | def download_file(url_for_download, output_path): 180 | with urlopen(url_for_download) as response: 181 | filename = get_filename_from_content_disposition(response.headers["Content-Disposition"]) 182 | file_path = os.path.join(output_path, filename) 183 | with open(file_path, 'wb') as f: 184 | while (chunk := response.read(8196)): 185 | f.write(chunk) 186 | return file_path 187 | 188 | 189 | def batch_process_for_file(license, input_path, output_path, stem, splitter, enhanced_processing, noise_cancelling, dereverb_enabled): 190 | try: 191 | print(f'Uploading the file "{input_path}"...') 192 | file_id = upload_file(file_path=input_path, license=license) 193 | print(f'The file "{input_path}" has been successfully uploaded (file id: {file_id})') 194 | 195 | print(f'Processing the file "{input_path}"...') 196 | split_file(file_id, license, stem, splitter, enhanced_processing, noise_cancelling, dereverb_enabled) 197 | split_result = check_file(file_id) 198 | 199 | for url in (split_result['stem_track'], split_result['back_track']): 200 | print(f'Downloading the track file "{url}"...') 201 | downloaded_file = download_file(url, output_path) 202 | print(f'The track file has been downloaded to "{downloaded_file}"') 203 | 204 | print(f'The file "{input_path}" has been successfully split') 205 | except Exception as err: 206 | print(f'Cannot process the file "{input_path}": {err}') 207 | 208 | 209 | def batch_process(license, input_path, output_path, stem, splitter, enhanced_processing, noise_cancelling, dereverb_enabled): 210 | if os.path.isfile(input_path): 211 | batch_process_for_file(license, input_path, output_path, stem, splitter, enhanced_processing, noise_cancelling, dereverb_enabled) 212 | else: 213 | for path in os.listdir(input_path): 214 | full_path = os.path.join(input_path, path) 215 | if os.path.isfile(full_path): 216 | batch_process_for_file(license, full_path, output_path, stem, splitter, enhanced_processing, noise_cancelling, dereverb_enabled) 217 | 218 | 219 | def _validate_stem(args): 220 | if args.splitter == 'andromeda' and args.stem not in _andromeda_stems: 221 | raise ValueError(f'{args.splitter} splitter does not support stem "{args.stem}". Should be one of {_andromeda_stems}') 222 | if args.splitter == 'perseus' and args.stem not in _perseus_stems: 223 | raise ValueError(f'{args.splitter} splitter does not support stem "{args.stem}". Should be one of {_perseus_stems}') 224 | if args.splitter == 'orion' and args.stem not in _orion_stems: 225 | raise ValueError(f'{args.splitter} splitter does not support stem "{args.stem}". Should be one of {_orion_stems}') 226 | if args.splitter == 'phoenix' and args.stem not in _phoenix_stems: 227 | raise ValueError(f'{args.splitter} splitter does not support stem "{args.stem}". Should be one of {_phoenix_stems}') 228 | 229 | 230 | def _get_latest_available_splitter(stem): 231 | if stem in _andromeda_stems: 232 | return 'andromeda' 233 | if stem in _perseus_stems: 234 | return 'perseus' 235 | if stem in _orion_stems: 236 | return 'orion' 237 | return 'phoenix' 238 | 239 | 240 | splitter_help = f''' 241 | The type of neural network used to split audio. 242 | Possible values are 'phoenix', 'orion', 'perseus' or 'andromeda'. 243 | If parameter is not provided - automatically choose most effective splitter for selected stem. 244 | Andromeda stems: {_andromeda_stems}. 245 | Perseus stems: {_perseus_stems}. 246 | Orion stems: {_orion_stems}. 247 | Phoenix stems: {_phoenix_stems}.''' 248 | 249 | 250 | def main(): 251 | parser = ArgumentParser(description='Lalalai splitter', formatter_class=ArgumentDefaultsHelpFormatter) 252 | parser.add_argument('--license', required=True, type=str, default=SUPPRESS, help='license key') 253 | parser.add_argument('--input', required=True, type=str, default=SUPPRESS, help='input directory or a file') 254 | parser.add_argument('--output', type=str, default=os.path.dirname(os.path.realpath(__file__)), help='output directory') 255 | parser.add_argument('--splitter', type=str, choices=['phoenix', 'orion', 'perseus', 'andromeda'], help=splitter_help) 256 | parser.add_argument('--stem', type=str, default='vocals', help='List of comma-separated stem options. One of ("vocals", "voice", "drum", "bass", "piano", "electric_guitar, "acoustic_guitar", "synthesizer", "strings", "wind")') 257 | parser.add_argument('--enhanced-processing', type=lambda x: bool(_strtobool(x)), default=False, choices=[True, False], help='all stems, except "voice". Andromeda splitter ignores this parameter.') 258 | parser.add_argument('--noise-cancelling', type=int, default=1, choices=[0, 1, 2], help='noise cancelling level for "voice" stem: (0: mild, 1: normal, 2: aggressive)') 259 | parser.add_argument('--dereverb_enabled', type=lambda x: bool(_strtobool(x)), default=False, choices=[True, False], help='remove echo') 260 | 261 | args = parser.parse_args() 262 | 263 | _validate_stem(args) 264 | args.splitter = args.splitter or _get_latest_available_splitter(args.stem) 265 | 266 | if args.stem == 'voice': 267 | args.enhanced_processing = None 268 | else: 269 | args.noise_cancelling = None 270 | 271 | os.makedirs(args.output, exist_ok=True) 272 | batch_process(args.license, args.input, args.output, args.stem, args.splitter, args.enhanced_processing, args.noise_cancelling, args.dereverb_enabled) 273 | 274 | 275 | if __name__ == '__main__': 276 | try: 277 | main() 278 | except Exception as err: 279 | print(err) 280 | --------------------------------------------------------------------------------