├── .gitignore ├── requirements.txt ├── README.md ├── LICENSE ├── bm2dx.py ├── bytebuffer.py └── extractor.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bs4 2 | lxml 3 | ifstools 4 | tqdm -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SDVX Song Extractor 2 | 3 | MUST USE PYTHON 3 4 | `pip install -r requirements.txt` 5 | 6 | Edit FOLDERS, OUT in `extractor.py` as desired. Afterwards: 7 | `py -3 extractor.py` 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Will 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /bm2dx.py: -------------------------------------------------------------------------------- 1 | import wave 2 | import audioop 3 | from io import BytesIO 4 | 5 | from bytebuffer import ByteBuffer 6 | 7 | class BM2DX(object): 8 | class Track(object): 9 | def __init__(self, buff): 10 | assert buff.get('4s') == b'2DX9' 11 | assert buff.get_u32() == 24 12 | wav_size = buff.get_u32() 13 | unk1 = buff.get_u16() 14 | self.id = buff.get_u16() 15 | unk2 = buff.get_u16() 16 | self.attenuation = buff.get_u16() 17 | self.loop_point = buff.get_u32() 18 | self.data = buff.get_bytes(wav_size) 19 | 20 | def __str__(self): 21 | return 'Track ID:{} attenuation:{} loop:{}'.format(self.id, self.attenuation, self.loop_point) 22 | 23 | 24 | def __init__(self, path): 25 | with open(path, 'rb') as f: 26 | self.contents = ByteBuffer(f.read(), endian = '<') 27 | self.name = self.contents.get('16s') 28 | self.header_size = self.contents.get_u32() 29 | self.filecount = self.contents.get_u32() 30 | self.contents.offset += 48 # random padding/flags/title bytes 31 | 32 | offsets = [] 33 | for _ in range(self.filecount): 34 | offsets.append(self.contents.get_u32()) 35 | 36 | self.tracks = [] 37 | for off in offsets: 38 | self.contents.offset = off 39 | self.tracks.append(self.Track(self.contents)) 40 | 41 | def __str__(self): 42 | ret = '2dx: "{}", {} track(s)'.format(self.name, len(self.tracks)) 43 | for t in self.tracks: 44 | ret += '\n\t{}'.format(str(t)) 45 | return ret 46 | -------------------------------------------------------------------------------- /bytebuffer.py: -------------------------------------------------------------------------------- 1 | from struct import * 2 | 3 | class ByteBuffer(): 4 | def __init__(self, input = b'', offset = 0, endian = '>'): 5 | # so multiple ByteBuffers can hold on to one set of underlying data 6 | # this is useful for writers in multiple locations 7 | if isinstance(input, bytearray): 8 | self.data = input 9 | else: 10 | if not isinstance(input, bytes): 11 | input = input.encode('utf-8') 12 | self.data = bytearray(input) 13 | self.endian = endian 14 | self.offset = offset 15 | self.end = len(self.data) 16 | 17 | def _format_type(self, type, count): 18 | if count is None: 19 | return self.endian + type 20 | else: 21 | return self.endian + str(count) + type 22 | 23 | def get_bytes(self, count): 24 | start = self.offset 25 | self.offset += count 26 | return self.data[start:self.offset] 27 | 28 | def get(self, type, count = None): 29 | ret = self.peek(type, count) 30 | size = calcsize(type) 31 | if count is not None: 32 | size *= count 33 | self.offset += size 34 | return ret 35 | 36 | def peek(self, type, count = None): 37 | fmt = self._format_type(type, count) 38 | ret = unpack_from(fmt, self.data, self.offset) 39 | return ret[0] if count is None else ret 40 | 41 | def append_bytes(self, data): 42 | self.data.extend(data) 43 | self.offset += len(data) 44 | 45 | def append(self, data, type, count = None): 46 | fmt = self._format_type(type, count) 47 | self.offset += calcsize(fmt) 48 | try: 49 | self.data.extend(pack(fmt, *data)) 50 | except TypeError: 51 | self.data.extend(pack(fmt, data)) 52 | 53 | def set(self, data, offset, type, count = None): 54 | fmt = self._format_type(type, count) 55 | try: 56 | pack_into(fmt, self.data, offset, *data) 57 | except TypeError: 58 | pack_into(fmt, self.data, offset, data) 59 | self.offset += calcsize(fmt) 60 | 61 | def hasData(self): 62 | return self.offset < self.end 63 | 64 | def realign_writes(self, size = 4): 65 | while len(self) % size: 66 | self.append_u8(0) 67 | 68 | def realign_reads(self, size = 4): 69 | while self.offset % size: 70 | self.offset += 1 71 | 72 | def __len__(self): 73 | return len(self.data) 74 | 75 | typeMap = { 76 | 's8' : 'b', 77 | 's16' : 'h', 78 | 's32' : 'i', 79 | 's64' : 'q', 80 | 'u8' : 'B', 81 | 'u16' : 'H', 82 | 'u32' : 'I', 83 | 'u64' : 'Q' 84 | } 85 | 86 | def _make_get(fmt): 87 | def _method(self): 88 | return self.get(fmt) 89 | return _method 90 | 91 | def _make_peek(fmt): 92 | def _method(self): 93 | return self.peek(fmt) 94 | return _method 95 | 96 | def _make_append(fmt): 97 | def _method(self, data): 98 | return self.append(data, fmt) 99 | return _method 100 | 101 | def _make_set(fmt): 102 | def _method(self, data, offset): 103 | return self.set(data, offset, fmt) 104 | return _method 105 | 106 | for name, fmt in typeMap.items(): 107 | _get = _make_get(fmt) 108 | _peek = _make_peek(fmt) 109 | _append = _make_append(fmt) 110 | _set = _make_set(fmt) 111 | setattr(ByteBuffer, 'get_' + name, _get) 112 | setattr(ByteBuffer, 'peek_' + name, _peek) 113 | setattr(ByteBuffer, 'append_' + name, _append) 114 | setattr(ByteBuffer, 'set_' + name, _set) 115 | -------------------------------------------------------------------------------- /extractor.py: -------------------------------------------------------------------------------- 1 | # `pip -r requirements.txt` 2 | # Edit FOLDERS, OUT as desired 3 | 4 | import sys 5 | if sys.version_info < (3,0): 6 | raise NotImplementedError('Python 2 is not supported, please use Python 3') 7 | 8 | from multiprocessing.dummy import Pool 9 | import os, subprocess, errno 10 | import tempfile 11 | import shutil 12 | from collections import OrderedDict 13 | 14 | from bs4 import BeautifulSoup 15 | from tqdm import tqdm 16 | from PIL import Image 17 | from ifstools import IFS 18 | 19 | from bm2dx import BM2DX 20 | 21 | # If you only want one format, change the unwanted ones to false 22 | # the source wavs are lossy (ADPCM) so a FLAC is not created 23 | PROCESS = { 24 | '320' : False, 25 | 'V0' : True, 26 | } 27 | 28 | # the raw files are quite quiet. Normalise them? 29 | AMPLIFY = True 30 | 31 | # add as many as you need to get all deleted songs 32 | # recommended: old releases first, as new versions may add new audio 33 | FOLDERS = ["D:\\Users\\Will\\Rhythm\\Sound Voltex II -Infinite Infection-", 34 | "D:\\Users\\Will\\Rhythm\\Sound Voltex III -Gravity Wars- Final", 35 | "D:\\Users\\Will\\Rhythm\\Sound Voltex IV - Heavenly Haven", 36 | ] 37 | 38 | #OUT = "Sound Voltex GST" 39 | OUT = r"D:\Users\Will\Rhythm\SongExtractor\Sound Voltex IV Extracted Soundtrack" 40 | ALBUM_NAME = "Sound Voltex Soundtrack" 41 | 42 | LAME = r"D:\Users\Will\Rhythm\SongExtractor\lame.exe" 43 | SOX = r"D:\Users\Will\Rhythm\SongExtractor\sox.exe" 44 | 45 | # do any filtering here 46 | def filter_func(songs): 47 | for id, song in list(songs.items()): 48 | if song.ver != 4: 49 | songs.pop(id) 50 | #pass 51 | 52 | # can ignore below this line 53 | # ----------------------------------------------------------------------------- 54 | 55 | if not os.path.isfile(LAME): 56 | raise OSError('lame encoder exe not found') 57 | if not os.path.isfile(SOX): 58 | raise OSError('sox converter exe not found') 59 | 60 | MAX_DIFF = 5 # NOV/ADV/EXH/INF/MXM 61 | 62 | OUTV0 = os.path.join(OUT, OUT + ' (V0)') 63 | OUT320 = os.path.join(OUT, OUT + ' (320)') 64 | 65 | DB_PATH = "data/others/music_db.xml" 66 | JACKET_PATH = "data/graphics/jk" 67 | SONG_PATH = "data/sound" 68 | 69 | TEMP = tempfile.gettempdir() 70 | 71 | def quote(thing): 72 | return '"' + thing.replace('"', '\\"') + '"' 73 | 74 | class Song(object): 75 | dx_versions = [ 76 | '1n', 77 | '2a', 78 | '3e', 79 | '4i', 80 | '5m', 81 | ] 82 | 83 | diff_map = { 84 | 'NOV' : 1, 85 | 'ADV' : 2, 86 | 'EXH' : 3, 87 | 'INF' : 4, 88 | 'MXM' : 5, 89 | } 90 | 91 | diff_strings = {v:k for k, v in diff_map.items()} 92 | 93 | def __init__(self, folder, all_2dx, all_jackets, xml): 94 | self.folder = folder 95 | self.all_2dx = all_2dx 96 | self.all_jackets = all_jackets 97 | 98 | self.name = fixBrokenChars(xml.title_name.text) 99 | self.id = int(xml['id']) 100 | self.ver = int(xml.version.text) 101 | self.infVer = int(xml.inf_ver.text) if xml.inf_ver else 0 102 | self.label = xml.label.text 103 | self.asciiName = xml.ascii.text 104 | self.artist = fixBrokenChars(xml.artist_name.text) 105 | self.minBpm = int(xml.bpm_min.text) / 100.0 106 | self.maxBpm = int(xml.bpm_max.text) / 100.0 107 | self.volume = int(xml.volume.text) / 127.0 108 | 109 | self.find_2dx() 110 | self.find_jackets() 111 | 112 | @property 113 | def infname(self): 114 | return {2: 'INF', 3: 'GRV', 4: 'HVN'}.get(self.infVer, 'INF') 115 | 116 | def find_2dx(self): 117 | try: 118 | # standard 119 | filename = "{:03d}_{:04d}_{}".format(self.ver, int(self.label), self.asciiName) 120 | except ValueError: 121 | # booth 122 | filename = "{}_{}".format(self.label, self.asciiName) 123 | # because some songs are still annoying 124 | if not self.dx_test(filename): 125 | filename = "{:03d}_{:04d}_{}".format(self.ver, self.id, self.asciiName) 126 | 127 | self.dx = [ 128 | self.dx_test(filename) 129 | ] 130 | for ver in self.dx_versions: 131 | ver_name = '{}_{}'.format(filename, ver) 132 | self.dx.append(self.dx_test(ver_name)) 133 | 134 | if all(x is None for x in self.dx): 135 | raise KeyError('Song {} has no music files'.format(self.id)) 136 | 137 | def find_jackets(self): 138 | jack_fmt = "jk_{:03d}_{:04d}_{}_b" 139 | self.jackets = [] 140 | for i in range(MAX_DIFF): 141 | jack = jack_fmt.format(self.ver, self.id, i+1) 142 | self.jackets.append(self.jacket_test(jack)) 143 | 144 | if all(x is None for x in self.jackets): 145 | raise KeyError('Song {} has no jacket files'.format(self.id)) 146 | 147 | def dx_test(self, dx): 148 | dx += '.2dx' 149 | return dx if dx in self.all_2dx else None 150 | 151 | def jacket_test(self, jacket): 152 | jacket += '.ifs' 153 | return jacket if jacket in self.all_jackets else None 154 | 155 | def get_jacket(self, diff = None): 156 | if diff is None: 157 | diff = self.diff_map['EXH'] 158 | 159 | diff -= 1 160 | if self.jackets[diff]: 161 | return self.jackets[diff] 162 | 163 | # go down until we find something, wrap if required 164 | test = list(range(diff, 0, -1)) + list(range(MAX_DIFF+1-diff, diff, -1)) 165 | for t in test: 166 | t -= 1 167 | if self.jackets[t]: 168 | return self.jackets[t] 169 | 170 | return None 171 | 172 | def extract_jacket(self, diff = None): 173 | path = os.path.join(self.folder, JACKET_PATH, self.get_jacket(diff)) 174 | ifs = IFS(path) 175 | textures = ifs.tree.folders['tex'].files.values() 176 | jacket = next(x for x in textures if x.name.endswith('.png')) 177 | dest = os.path.join(TEMP, jacket.name) 178 | with open(dest, 'wb') as f: 179 | f.write(jacket.load()) 180 | return dest 181 | 182 | @property 183 | def sanitized(self): 184 | sanitized = self.name 185 | # strip bad chars that windows won't allow 186 | homoglyphs = { 187 | '\\' : '\', 188 | '/' : '⁄', 189 | ':' : '։', 190 | '*' : '⁎', 191 | '?' : '?', 192 | '"' : "''", 193 | '<' : '‹', 194 | '>' : '›', 195 | '|' : 'ǀ', 196 | } 197 | for bad, good in homoglyphs.items(): 198 | sanitized = sanitized.replace(bad, good) 199 | return sanitized 200 | 201 | def load_2dx(self, dx): 202 | return BM2DX(os.path.join(self.folder, SONG_PATH, dx)).tracks 203 | 204 | def _lame_enc(self, jacket, wav, mp3, quality): 205 | bpmStr = twoDecimals(self.minBpm) 206 | if self.minBpm != self.maxBpm: 207 | bpmStr += '-' + twoDecimals(self.maxBpm) 208 | 209 | run(LAME, ' '.join([ 210 | quality, 211 | quote(wav), 212 | quote(mp3), 213 | '--tt', quote(self.name), 214 | '--ta', quote(self.artist), 215 | '--tl', quote(ALBUM_NAME), 216 | '--tn', quote(str(self.id)), 217 | '--tv', 'TPE2="Various Artists"', 218 | '--tv', 'TPOS=' + str(self.ver), 219 | '--tv', 'TBPM="' + bpmStr + '"', 220 | '--ti', quote(jacket), 221 | ])) 222 | 223 | def _convert(self, dx, diff = None): 224 | jacket = self.extract_jacket(diff) 225 | wav = os.path.join(TEMP, '{}.wav'.format(self.id)) 226 | out_wav = os.path.join(TEMP, '{}_out.wav'.format(self.id)) 227 | out_mp3 = "{:04d} - {}".format(self.id, self.sanitized) 228 | if diff is not None: 229 | diff = self.diff_strings[diff] 230 | if diff == 'INF': 231 | diff = self.infname 232 | out_mp3 += ' [{}]'.format(diff) 233 | out_mp3 += '.mp3' 234 | 235 | with open(wav, 'wb') as f: 236 | f.write(dx.data) 237 | 238 | # wavs from 2dx files aren't liked by lame 239 | sox_args = '-R "{}" -e signed-integer "{}"'.format(wav, out_wav) 240 | if AMPLIFY: 241 | sox_args = '--norm ' + sox_args 242 | run(SOX, sox_args) 243 | 244 | if PROCESS['320']: 245 | self._lame_enc(jacket, out_wav, os.path.join(OUT320, out_mp3), '-b320') 246 | if PROCESS['V0']: 247 | self._lame_enc(jacket, out_wav, os.path.join(OUTV0, out_mp3), '-V0') 248 | 249 | os.remove(jacket) 250 | os.remove(wav) 251 | os.remove(out_wav) 252 | 253 | def encode(self): 254 | for i in range(1, MAX_DIFF+1): 255 | if self.dx[i]: 256 | tracks = self.load_2dx(self.dx[i]) 257 | self._convert(tracks[0], i) 258 | 259 | # default map 260 | if self.dx[0]: 261 | tracks = self.load_2dx(self.dx[0]) 262 | 263 | # doesn't work since IV uses effect tracks again 264 | #if self.ver > 1 and len(tracks) > 1: 265 | 266 | # literally the only song that does this 267 | if self.id == 691: 268 | self._convert(tracks[1], self.diff_map['INF']) 269 | self._convert(tracks[0]) 270 | 271 | class Tutorial(Song): 272 | def __init__(self, folder, xml, all_jackets): 273 | Song.__init__(self, folder, None, None, xml) 274 | 275 | # finding the latest tutorial jacket, since the dll lies 276 | dummies = [x for x in all_jackets if x.endswith('dummy_b.ifs')] 277 | if len(dummies) == 1: 278 | dummy = dummies[0] 279 | try: 280 | self.ver = int(dummy[3:6]) 281 | except ValueError: # II is just jk_dummy 282 | self.ver = 2 283 | else: 284 | self.ver = max(int(x[3:6]) for x in dummies) 285 | dummy = 'jk_{:03d}_dummy_b.ifs'.format(self.ver) 286 | 287 | self.id = -self.ver 288 | self.artist = 'Konami' 289 | self.infVer = 0 290 | 291 | # for infinite infection 292 | for r in ['[sz:10]', '[sz:12]']: 293 | self.name = self.name.replace(r, '') 294 | 295 | self.dx = ['__tutorial.2dx'] 296 | self.dx.extend([None]*MAX_DIFF) 297 | 298 | self.jackets = [dummy] 299 | self.jackets.extend([None]*MAX_DIFF) 300 | 301 | def find_2dx(self): 302 | pass 303 | 304 | def find_jackets(self): 305 | pass 306 | 307 | def run(exe, arg): 308 | args = ' '.join((exe, arg)) 309 | FNULL = open(os.devnull, 'w') 310 | ret = subprocess.call(args, shell=False, stdout=FNULL, stderr=FNULL) 311 | 312 | if ret: 313 | raise OSError('Called process returned error') 314 | 315 | return ret 316 | 317 | def mkdir_p(path): 318 | try: 319 | os.makedirs(path) 320 | except OSError as exc: # Python >2.5 321 | if exc.errno == errno.EEXIST and os.path.isdir(path): 322 | pass 323 | else: 324 | raise 325 | 326 | def fixBrokenChars(name): 327 | # a bunch of chars get mapped oddly - bemani specific fuckery 328 | # MISSING: © 329 | replacements = [ 330 | ['\u203E', '~'], 331 | ['\u301C', '~'], 332 | ['\u49FA', 'ê'], 333 | ['\u5F5C', 'ū'], 334 | ['\u66E6', 'à'], 335 | ['\u66E9', 'è'], 336 | ['\u8E94', '🐾'], 337 | ['\u9A2B', 'á'], 338 | ['\u9A69', 'Ø'], 339 | ['\u9A6B', 'ā'], 340 | ['\u9A6A', 'ō'], 341 | ['\u9AAD', 'ü'], 342 | ['\u9B2F', 'ī'], 343 | ['\u9EF7', 'ē'], 344 | ['\u9F63', 'Ú'], 345 | ['\u9F67', 'Ä'], 346 | ['\u973B', '♠'], 347 | ['\u9F6A', '♣'], 348 | ['\u9448', '♦'], 349 | ['\u9F72', '♥'], 350 | ['\u9F76', '♡'], 351 | ['\u9F77', 'é'], 352 | ] 353 | for rep in replacements: 354 | name = name.replace(rep[0], rep[1]) 355 | return name 356 | 357 | def twoDecimals(num): 358 | return "{0:.2f}".format(num).rstrip('0').rstrip('.') 359 | 360 | def processTutorial(folder, songs, all_jackets): 361 | dll_path = os.path.join(folder, 'soundvoltex.dll') 362 | with open(dll_path,'rb') as f: 363 | dll = f.read() 364 | pattern = b'