├── .DS_Store ├── .gitignore ├── DemoTools ├── __init__.py ├── demoRejoiner.py ├── demoSplitter.py ├── demoTextExtractor.py ├── demoTextInjector.py └── demoTextTesting.py ├── Old vers ├── ExtractRadioDatV0.4.1.py ├── ExtractRadioDatV0.4.py ├── ExtractTextsV0.1.py ├── ExtractTextsV0.2.py ├── ExtractTextsV0.3..py ├── ExtractTextsV0.3.5 copy.py ├── ExtractTextsV0.3.5.py ├── ExtractTextsV0.3.6.py ├── ExtractTextsV0.3.8 Line by line.py ├── ExtractTextsV0.3.9.py ├── REMOVEDCHARS.PY ├── RadioDatTools--preXML.py ├── RadioDatToolsv0.4.5 backupcopy.py ├── characters_old.py ├── demoTextInjector_old.py ├── main.py ├── stageCalls-before-0A.ods ├── vagToWav.py ├── vagToWav2.py └── xmltest.py ├── README.md ├── RadioDatRecompiler.py ├── RadioDatTools.py ├── StageDirTools ├── Notes.txt ├── analyzeStageDirFiles.sh ├── assmembleDar.py ├── callsInStageDirFinder.py ├── extractDar.py └── stageDirFileExtractor.py ├── audioTools ├── sub-test-2.py ├── subtitle display test.py └── vagAudioTools.py ├── common └── structs.py ├── creditsHacking ├── creditsHacking.py ├── decryptionDiagram.md ├── imageComparison.sh ├── imageEncoder.py ├── imhex patterns 00eae8rar.txt ├── lz77-test.py ├── lzss-test.py ├── newCompressionTest.py ├── scra.py └── scratchpad.py ├── demoClasses.py ├── demoManager.py ├── graphicsExport ├── KanjiStillMissing.txt └── contextList.txt ├── insertVox.py ├── itemDescriptionFinder.py ├── jsonTools.py ├── quickTranslate.py ├── radioModule.py ├── radioTools ├── __init__.py ├── callExtactor.py ├── callInsertor.py └── jsonToCSV.py ├── requirements.txt ├── testing ├── compareDemos.sh ├── convertImage.sh ├── demoBinChecker.sh ├── exportAndAnalyze.sh ├── extractALLmaterials.sh ├── extractAllCalls.sh ├── findEndings.py ├── goblin.bat ├── incorrectRecompileCheck.py ├── patternChecker.py ├── radioDatUSAChecker.sh ├── runJpnBuildTest.sh ├── runusaBuildTest.sh ├── testAllRadioFiles.sh └── testRecompileAll.sh ├── translation ├── Output.txt ├── characters.py ├── combine.py ├── graphicShower.py ├── kanji.txt ├── radioDict.py └── unique graphics ├── voxTools ├── vagOutput.py ├── voxRejoiner.py ├── voxSplit.py ├── voxTextExtractor.py └── voxTextInjector.py ├── xmlModifierTools.py └── zmovieTools └── movieSplitter.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drsparklegasm/mgs1-scripts/fba25e409c5ad49938ba0b6d60e1e48a9c37fc7a/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ./RADIO*.DAT 2 | .DS* 3 | RADIO-jpn.DAT 4 | RADIO-usa.DAT 5 | Scratchpad.py 6 | mgs1-undub-docs/.obsidian 7 | mgs1-undub-docs/.trash 8 | __pycache__/ 9 | -------------------------------------------------------------------------------- /DemoTools/__init__.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | 3 | # Necessary to find all modules in this package... doesnt really work as expected. 4 | sys.path.append(os.path.abspath('./myScripts')) -------------------------------------------------------------------------------- /DemoTools/demoRejoiner.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.abspath('./myScripts')) 3 | import re 4 | import glob 5 | import struct 6 | import progressbar 7 | import translation.radioDict as RD 8 | import json 9 | 10 | import DemoTools.demoTextExtractor as DTE 11 | 12 | version = "usa" 13 | version = "jpn" 14 | disc = 1 15 | 16 | # Toggles 17 | debug = True 18 | 19 | 20 | # Directory configs 21 | inputDir = f'workingFiles/{version}-d{disc}/demo/bins' 22 | outputDir = f'workingFiles/{version}-d{disc}/demo/newBins' 23 | outputDemoFile = f'workingFiles/{version}-d{disc}/demo/new-DEMO.DAT' 24 | os.makedirs(outputDir, exist_ok=True) 25 | 26 | origBinFiles = glob.glob(os.path.join(inputDir, '*.dmo')) 27 | origBinFiles.sort(key=lambda f: int(f.split('-')[-1].split('.')[0])) 28 | 29 | newBinFiles = glob.glob(os.path.join(outputDir, '*.dmo')) 30 | origBinFiles.sort(key=lambda f: int(f.split('-')[-1].split('.')[0])) 31 | 32 | newDemoBytes = b'' 33 | 34 | with open(outputDemoFile, 'wb') as f: 35 | for file in origBinFiles: 36 | if file.replace('bins', 'newBins') in newBinFiles: 37 | file = file.replace('bins', 'newBins') 38 | basename = file.split("/")[-1].split(".")[0] 39 | print(f'{basename}: Using new version of the demo...') 40 | else: 41 | basename = file.split("/")[-1].split(".")[0] 42 | print(f'{basename}: Using old file...\r', end="") 43 | demoBytes = open(file, 'rb') 44 | newDemoBytes += demoBytes.read() 45 | demoBytes.close() 46 | f.write(newDemoBytes) 47 | f.close() 48 | 49 | print(f'{outputDemoFile} was written!') 50 | 51 | 52 | -------------------------------------------------------------------------------- /DemoTools/demoSplitter.py: -------------------------------------------------------------------------------- 1 | import os, struct 2 | # import progressbar, time 3 | 4 | version = "usa" 5 | disc = 1 6 | filename = f"build-src/{version}-d{disc}/MGS/DEMO.DAT" 7 | outputDir = f"workingFiles/{version}-d{disc}/demo/bins" 8 | 9 | demoFile = open(filename, 'rb') 10 | demoData = demoFile.read() 11 | 12 | offsets = [] 13 | os.makedirs(outputDir, exist_ok=True) 14 | opening = b'\x10\x08\x00\x00' 15 | # opening = b'\x10\x08\x00\x00\x05\x00\x00\x00' 16 | 17 | def findDemoOffsets(): 18 | offset = 0 19 | while offset < len(demoData) - 8: 20 | # print(f'We\'re at {offset}\n') 21 | checkbytes = demoData[offset:offset + 4] 22 | if checkbytes == opening: 23 | print(f'Offset found at offset {offset}!') 24 | offsets.append(offset) 25 | offset += 2048 # All demo files are aligned to 0x800, SIGNIFICANTLY faster to do this than +8! Credit to Green Goblin 26 | else: 27 | offset += 2048 28 | 29 | print(f'Ending! {len(offsets)} offsets found:') 30 | for offset in offsets: 31 | print(offset.to_bytes(4, 'big').hex()) 32 | 33 | def splitDemoFiles(): 34 | i = 0 35 | offsetFile = open(f'{outputDir}/demoOffsets.txt', 'w') 36 | for i in range(len(offsets)): 37 | start = offsets[i] 38 | if i < len(offsets) - 1: 39 | end = offsets[i + 1] 40 | else: 41 | end = len(demoData) 42 | f = open(f'{outputDir}/demo-{i + 1:02}.dmo', 'wb') 43 | offsetFile.write(f'{i + 1:02}: {start:08x} - {end:08x}, length: {end - start}\n') 44 | f.write(demoData[start:end]) 45 | f.close() 46 | print(f'Demo {i + 1} written!') 47 | 48 | print(f'{len(offsets)} demo files written!') 49 | 50 | 51 | if __name__ == '__main__': 52 | findDemoOffsets() 53 | splitDemoFiles() -------------------------------------------------------------------------------- /DemoTools/demoTextExtractor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from Green Goblins scripts. 3 | This is really heavily based on his awesome work. 4 | 5 | # Script for working with Metal Gear Solid data 6 | # 7 | # Copyright (C) 2023 Green_goblin (https://mgsvm.blogspot.com/) 8 | # 9 | # Permission to use, copy, modify, and/or distribute this software for any 10 | # purpose with or without fee is hereby granted, provided that the above 11 | # copyright notice and this permission notice appear in all copies. 12 | 13 | """ 14 | 15 | import os, sys 16 | sys.path.append(os.path.abspath('./myScripts')) 17 | import re 18 | import glob 19 | import struct 20 | import progressbar 21 | import translation.radioDict as RD 22 | import json 23 | 24 | demoScriptData: dict = {} 25 | 26 | bar = progressbar.ProgressBar() 27 | 28 | version = "usa" 29 | version = "jpn" 30 | disc = 1 31 | 32 | # Create a directory to store the extracted texts 33 | # Get the files from the folder directory 34 | inputDir = f'workingFiles/{version}-d{disc}/demo/bins' 35 | outputDir = f'workingFiles/{version}-d{disc}/demo/texts' 36 | os.makedirs(outputDir, exist_ok=True) 37 | outputJsonFile = f"workingFiles/{version}-d{disc}/demo/demoText-{version}.json" 38 | 39 | # Grab all files in the directory and sort into order. 40 | bin_files = glob.glob(os.path.join(inputDir, '*.dmo')) 41 | bin_files.sort(key=lambda f: int(f.split('-')[-1].split('.')[0])) 42 | 43 | # flags 44 | debug = True 45 | 46 | # List of files to skip (Ex: 005.bin does not contain texts) 47 | skipFilesListD1 = [ 48 | 'demo-05', 49 | 'demo-06', 50 | 'demo-31', 51 | 'demo-33', 52 | 'demo-35', 53 | 'demo-63', 54 | 'demo-67', 55 | 'demo-71', 56 | 'demo-72', 57 | ] 58 | 59 | # Set up progress bar 60 | bar.maxval = len(bin_files) 61 | barCount = 0 62 | bar.start() 63 | 64 | # DEBUG 65 | # if debug: 66 | # print(f'Only doing demo-1.bin!') 67 | # bin_files = [f'demoWorkingDir/{version}/bins/demo-25.bin'] 68 | 69 | def getTextHexes(textToAnalyze: bytes) -> tuple[list, bytes, list]: 70 | """ 71 | This just grabs all the text from each sector of the text area. 72 | We just grab the hex and return it. We also return the custom 73 | character bytes at the end, which should always make a dictionary. 74 | """ 75 | global debug 76 | 77 | #startingPoint = struct.unpack(" list: 124 | """ 125 | This is awful, but it should to a certain degree find demo offset spots. 126 | If there's a better way to do this lmk, but it's not too inefficient. 127 | """ 128 | patternA = b"\x03..." + b"...\x00" + b"....\x10\x00" # Figured out the universal pattern. 129 | # 03 ?? ?? ?? ?? ?? ?? 00 ?? ?? ?? ?? 10 00 14 00 >> For IMHEX usage 130 | # patternB = bytes.fromhex("FF FF FF 7F 10 00") 131 | # This is actually the indication a dialogue area runs to end of demo (until frame 0x7FFFFF) 132 | 133 | matches = re.finditer(patternA, demoData, re.DOTALL) 134 | offsets = [match.start() for match in matches] 135 | 136 | finalMatches = [] 137 | for offset in offsets: 138 | # Extract size of the area 139 | length = struct.unpack(' list: 160 | global debug 161 | global filename 162 | global version 163 | 164 | dialogue = [] 165 | 166 | if graphicsData is not None and filename is not None: 167 | demoDict = RD.makeCallDictionary(filename, graphicsData) 168 | else: 169 | demoDict = {} 170 | 171 | # Loop for all text, offsets, etc. 172 | for dialogueHex in textHexes: 173 | text = RD.translateJapaneseHex(dialogueHex, demoDict) 174 | # text = text.encode(encoding='utf8', errors='ignore') 175 | if debug: 176 | print(text) 177 | text = text.replace('\x00', "") 178 | dialogue.append(text) 179 | return dialogue 180 | 181 | def textToDict(dialogue: list) -> dict: 182 | i = 1 183 | textDict = {} 184 | for text in dialogue: 185 | textDict[f'{i:02}'] = text 186 | i += 1 187 | 188 | return textDict 189 | 190 | def writeTextToFile(filename: str, dialogue: list) -> None: 191 | global debug 192 | with open(filename, 'w', encoding='utf8') as f: 193 | for text in dialogue: 194 | f.write(f'{text}\n') 195 | f.close() 196 | 197 | def findOffsets(byteData: bytes, pattern: bytes) -> list: 198 | """ 199 | Find patterns in the byte data. 200 | """ 201 | foundPatterns = [] 202 | offset = 0 203 | while offset != -1: 204 | offset = byteData.find(pattern, offset) 205 | if offset != -1: 206 | foundPatterns.append(pattern) 207 | return foundPatterns 208 | 209 | if __name__ == "__main__": 210 | # Loop through each .bin file in the folder 211 | for bin_file in bin_files: 212 | # Skip files in the skip list 213 | filename = os.path.basename(bin_file) 214 | 215 | # Manual override to skip certain demos 216 | if filename in skipFilesListD1: 217 | continue 218 | 219 | if debug: 220 | print(f"Processing file: {bin_file}") 221 | 222 | # Open the binary file for reading in binary mode 223 | with open(bin_file, 'rb') as binary_file: 224 | demoData = binary_file.read() 225 | 226 | textOffsets = getTextAreaOffsets(demoData) 227 | 228 | print(f'{os.path.basename(bin_file)}: {textOffsets}') 229 | 230 | texts = [] 231 | timings = [] # list of timings (start time, duration) 232 | timingCount = 1 233 | 234 | for offset in textOffsets: 235 | subset = getTextAreaBytes(offset, demoData) 236 | textHexes, graphicsBytes, coords = getTextHexes(subset) 237 | texts.extend(getDialogue(textHexes, graphicsBytes)) 238 | timings.extend(coords) 239 | 240 | basename = filename.split('.')[0] 241 | demoScriptData[basename] = [textToDict(texts), textToDict(timings)] 242 | writeTextToFile(f'{outputDir}/{basename}.txt', texts) 243 | # writeTextToFile(f'{outputDir}/{basename}-timings.txt', timings) 244 | 245 | with open(outputJsonFile, 'w') as f: 246 | f.write(json.dumps(demoScriptData, ensure_ascii=False)) 247 | f.close() -------------------------------------------------------------------------------- /DemoTools/demoTextInjector.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from Green Goblins scripts. 3 | This is really heavily based on his awesome work. 4 | 5 | Script for working with Metal Gear Solid data 6 | 7 | Copyright (C) 2023 Green_goblin (https://mgsvm.blogspot.com/) 8 | 9 | Permission to use, copy, modify, and/or distribute this software for any 10 | purpose with or without fee is hereby granted, provided that the above 11 | copyright notice and this permission notice appear in all copies. 12 | 13 | """ 14 | 15 | import os, sys 16 | sys.path.append(os.path.abspath('./myScripts')) 17 | import re 18 | import glob 19 | import struct 20 | import progressbar 21 | import translation.radioDict as RD 22 | import json 23 | 24 | import DemoTools.demoTextExtractor as DTE 25 | from common.structs import subtitle 26 | 27 | version = "usa" 28 | version = "jpn" 29 | disc = 1 30 | 31 | # Toggles 32 | debug = True 33 | 34 | # Directory configs 35 | inputDir = f'workingFiles/{version}-d{disc}/demo/bins' 36 | outputDir = f'workingFiles/{version}-d{disc}/demo/newBins' 37 | injectJson = f'build-proprietary/demo/demoText-{version}-undub.json' 38 | os.makedirs(outputDir, exist_ok=True) 39 | 40 | # Collect files to use 41 | bin_files = glob.glob(os.path.join(inputDir, '*.dmo')) 42 | bin_files.sort(key=lambda f: int(f.split('-')[-1].split('.')[0])) 43 | 44 | # Collect source json to inject 45 | injectTexts = json.load(open(injectJson, 'r')) 46 | 47 | """class subtitle: 48 | text: str 49 | startFrame: int 50 | duration: int 51 | 52 | def __init__(self, dialogue, b, c) -> None: 53 | self.text = dialogue 54 | self.startFrame = int(b) 55 | self.duration = int(c) 56 | 57 | return 58 | 59 | def __str__(self) -> str: 60 | a = f'Subtitle contents: Start: {self.startFrame} Duration: {self.duration} Text: {self.text}' 61 | return a 62 | 63 | def __bytes__(self) -> bytes: 64 | 65 | # Simple. Encodes the dialogue as bytes. 66 | # Adds the buffer we need to be divisible by 4... 67 | # Return the new bytes. 68 | 69 | subtitleBytes: bytes = struct.pack("III", self.startFrame, self.duration, 0) 70 | subtitleBytes += RD.encodeJapaneseHex(self.text)[0] 71 | bufferNeeded = 4 - (len(subtitleBytes) % 4) 72 | subtitleBytes += bytes(bufferNeeded) 73 | 74 | return subtitleBytes""" 75 | 76 | def assembleTitles(texts: dict, timings: dict) -> list [subtitle]: 77 | subsList = [] 78 | for i in range(len(texts)): 79 | index = "{:02}".format(i + 1) 80 | start = timings.get(index).split(",")[0] 81 | duration = timings.get(index).split(",")[1] 82 | a = subtitle(texts.get(index), start, duration) 83 | subsList.append(a) 84 | 85 | return subsList 86 | """ 87 | # TODO: 88 | - change key to int 89 | - make sure range hits all texts 90 | """ 91 | skipFilesListD1 = [ 92 | 'demo-05', 93 | 'demo-06', 94 | 'demo-31', 95 | 'demo-33', 96 | 'demo-35', 97 | 'demo-63', 98 | 'demo-67', 99 | 'demo-71', 100 | 'demo-72', 101 | ] 102 | 103 | def genSubBlock(subs: list [subtitle] ) -> bytes: 104 | """ 105 | Injects the new text to the original data, returns the bytes. 106 | Also returns the index we were at when we finished. 107 | 108 | """ 109 | newBlock = b'' 110 | for i in range(len(subs) -1): 111 | length = struct.pack("I", len(bytes(subs[i])) + 4) 112 | newBlock += length + bytes(subs[i]) 113 | 114 | # Add the last one 115 | newBlock += bytes(4) + bytes(subs[-1]) 116 | 117 | return newBlock 118 | 119 | def injectSubtitles(originalBinary: bytes, newTexts: dict, frameLimit: int = 1, timings: dict = None) -> bytes: 120 | """ 121 | Injects the new text to the original data, returns the bytes. 122 | Also returns the index we were at when we finished. 123 | 124 | New vers: Framelimit is the end of a cutscene segment. 125 | """ 126 | 127 | def encodeNewText(text: str, timing: str): 128 | """ 129 | Simple. Encodes the dialogue as bytes. 130 | Adds the buffer we need to be divisible by 4... 131 | Return the new bytes. 132 | """ 133 | timings = int(timing.split(',')) 134 | start = timings[0] 135 | duration = timings[1] 136 | 137 | subtitleBytes: bytes = struct.pack("III", start, duration, 0) 138 | subtitleBytes += RD.encodeJapaneseHex(text)[0] 139 | bufferNeeded = 4 - (len(subtitleBytes) % 4) 140 | for j in range(bufferNeeded): 141 | newBytes += b'\x00' 142 | j += 1 143 | 144 | return subtitleBytes 145 | 146 | 147 | 148 | newBytes = b"" 149 | firstLengthBytes = originalBinary[18:20] 150 | firstLength = struct.unpack(' bytes: 188 | """ 189 | Returns the header portion only for a given dialogue section. 190 | """ 191 | headerLength = struct.unpack("H", data[14:16])[0] + 4 192 | return data[:headerLength] 193 | 194 | # if debug: 195 | # print(f'Only injecting Demo 25!') 196 | # bin_files = ['demoWorkingDir/usa/bins/demo-25.dmo'] 197 | 198 | if __name__ == "__main__": 199 | """ 200 | Main logic is here. 201 | """ 202 | for file in bin_files: 203 | print(os.path.basename(f"{file}: "), end="") 204 | filename = os.path.basename(file) 205 | basename = filename.split(".")[0] 206 | 207 | # if debug: 208 | # print(f'Processing {basename}') 209 | 210 | if basename in skipFilesListD1: 211 | if debug: 212 | print(f'{basename} in skip list. Continuing... ') 213 | continue 214 | 215 | # if injectTexts[basename] is None: 216 | if basename not in injectTexts: 217 | print(f'{basename} was not in the json. Skipping...\r', end="") 218 | continue 219 | 220 | # Initialize the demo data and the dictionary we're using to replace it. 221 | origDemoData = open(file, 'rb').read() 222 | origBlocks = len(origDemoData) // 0x800 # Use this later to check we hit the same length! 223 | demoDict: dict = injectTexts[basename][0] 224 | demoTimings: dict = injectTexts[basename][1] 225 | 226 | subtitles = assembleTitles(demoDict, demoTimings) 227 | 228 | offsets = DTE.getTextAreaOffsets(origDemoData) 229 | # nextStart = 1 # index of subtitle to encode. No longer needed. 230 | newDemoData = origDemoData[0 : offsets[0]] # UNTIL the header 231 | 232 | for Num in range(len(offsets)): 233 | oldHeader = getDemoDiagHeader(origDemoData[offsets[Num]:]) 234 | oldLength = struct.unpack("H", oldHeader[1:3])[0] 235 | frameStart = struct.unpack("I", oldHeader[4:8])[0] 236 | frameLimit = struct.unpack("I", oldHeader[8:12])[0] 237 | # Get only subtitles in this section. 238 | subsForSection = [] 239 | for sub in subtitles: 240 | if frameStart <= sub.startFrame < frameLimit: 241 | subsForSection.append(sub) 242 | newSubBlock = genSubBlock(subsForSection) # TODO: CODE THIS DEF 243 | newLength = len(oldHeader) + len(newSubBlock) 244 | 245 | newHeader = bytes.fromhex("03") + struct.pack("H", newLength) + bytes(1) + struct.pack("II", frameStart, frameLimit) + oldHeader[12:16] + struct.pack("I", len(oldHeader) + len(newSubBlock) - 4) + oldHeader[20:] 246 | newDemoData += newHeader + newSubBlock 247 | # Add the rest of the data from this to the next offset OR until end of original demo. 248 | if Num < len(offsets) - 1: # if it is NOT the last... 249 | newDemoData += origDemoData[offsets[Num] + oldLength: offsets[Num + 1]] 250 | else: 251 | newDemoData += origDemoData[offsets[Num] + oldLength: ] 252 | # if debug: 253 | # print(newSubBlock.hex(sep=" ", bytes_per_sep=4)) 254 | 255 | """# Buffer the demo to 0x800 block 256 | if len(newDemoData) % 0x800 != 0: 257 | if len(newDemoData) // 0x800 < len(origDemoData) // 0x800: 258 | newDemoData += bytes(len(newDemoData) % 0x800) 259 | else: 260 | checkBytes = newDemoData[len(newDemoData) - len(origDemoData):] 261 | if checkBytes == bytes(len(checkBytes)): 262 | newDemoData = newDemoData[:len(newDemoData) - len(checkBytes)]""" 263 | 264 | # Adjust length to match original file. 265 | if len(newDemoData) == len(origDemoData): 266 | print("Alignment correct!") 267 | elif len(newDemoData) < len(origDemoData): # new demo shorter 268 | newDemoData += bytes(len(origDemoData) - len(newDemoData)) 269 | if len(newDemoData) % 0x800 == 0: 270 | print("Alignment correct!") 271 | else: 272 | checkBytes = newDemoData[len(newDemoData) - len(origDemoData):] 273 | if checkBytes == bytes(len(checkBytes)): 274 | newDemoData = newDemoData[:len(newDemoData) - len(checkBytes)] 275 | else: 276 | print(f'CRITICAL ERROR! New demo cannot be truncated to original length!') 277 | exit() 278 | 279 | newBlocks = len(newDemoData) // 0x800 280 | if newBlocks != origBlocks: 281 | print(f"{len(newDemoData)} / {len(origDemoData)}") 282 | print(f'BLOCK MISMATCH!\nNew data is {newBlocks} blocks, old was {origBlocks} blocks.\nTHERE COULD BE PROBLEMS IN RECOMPILE!!') 283 | 284 | # Finished work! Write the new file. 285 | newFile = open(f'{outputDir}/{basename}.dmo', 'wb') 286 | newFile.write(newDemoData) 287 | newFile.close() 288 | print(f'New Demo Files have been injected!') 289 | exit(0) -------------------------------------------------------------------------------- /DemoTools/demoTextTesting.py: -------------------------------------------------------------------------------- 1 | import DemoTools.demoTextExtractor as DTE 2 | import re, struct 3 | 4 | inputFile = 'demoWorkingDir/jpn/bins/demo-6.dmo' 5 | demoFile = open(inputFile, 'rb') 6 | demoData = demoFile.read() 7 | 8 | 9 | -------------------------------------------------------------------------------- /Old vers/ExtractTextsV0.1.py: -------------------------------------------------------------------------------- 1 | ##!/bin/python3 2 | 3 | # Assumes RADIO.DAT for filename 4 | 5 | import os 6 | import struct 7 | 8 | filename = "RADIO.DAT" 9 | offset = 0 10 | 11 | radioFile = open(filename, 'rb') 12 | 13 | """ 14 | freq = struct.unpack('>h', radioFile.read(2)) 15 | 16 | print(type(freq)) 17 | if 14000 < freq[0] < 14300: print(str(freq[0]) + f' is the first call') 18 | """ 19 | 20 | def checkIsFreq(checkByte): 21 | global radioFile 22 | bytes = struct.unpack('>h', checkByte) 23 | freq = bytes[0] / 100 24 | print(freq) 25 | if 140 < freq < 143: 26 | return True 27 | else: 28 | return False 29 | 30 | checkByte = radioFile.read(2) 31 | offset += 2 32 | if checkIsFreq(checkByte): 33 | callHeader = radioFile.read(10) 34 | offset += 10 35 | # Perform 3 additional operations 36 | unk0 = callHeader[0:2] 37 | unk1 = callHeader[2:4] 38 | unk2 = callHeader[4:6] 39 | print(callHeader) 40 | print(unk0) 41 | print(unk1) 42 | print(unk2) 43 | # Optional check that unk2 is always 0x00 0x00 ? 44 | buffer = callHeader[7:9] 45 | length = struct.unpack('>h', radioFile.read(offset)) 46 | print(length[0]) # this is the length we need to pull next 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /Old vers/ExtractTextsV0.2.py: -------------------------------------------------------------------------------- 1 | ##!/bin/python3 2 | 3 | # Assumes RADIO.DAT for filename 4 | """ 5 | At this point we're just ensuring that each call has a correct length variable at the 9th byte 6 | 7 | """ 8 | 9 | import os 10 | import struct 11 | 12 | filename = "RADIO-usa.DAT" 13 | #filename = "RADIO-jpn.DAT" 14 | 15 | offset = 0 16 | # offset = 293536 # Freq 140.85 17 | 18 | radioFile = open(filename, 'rb') 19 | 20 | radioData = radioFile.read() 21 | offset = 0 22 | fileSize = radioData.__len__() 23 | 24 | # print(fileSize) 1776859! 25 | 26 | def getFreq(offsetCheck): 27 | global radioData 28 | global radioFile 29 | radioFile.seek(offsetCheck) 30 | bytes = radioFile.read(2) 31 | freq = struct.unpack('>h', bytes) 32 | return freq[0] / 100 33 | 34 | def getCallLength(offset): 35 | global radioFile 36 | radioFile.seek(offset + 9) 37 | lengthBytes = radioFile.read(2) 38 | lengthT = struct.unpack('>h', lengthBytes) 39 | return lengthT[0] 40 | 41 | 42 | # Right now this iterates how many calls match a pattern before this breaks 43 | while offset < fileSize: 44 | if offset == fileSize: 45 | print("Offset and fileSize match!!!") 46 | break 47 | 48 | i = getFreq(offset) 49 | length = getCallLength(offset) 50 | 51 | if 140 < i < 143: 52 | print(f'Call from {i} found! Offset is {hex(offset)}') 53 | offset += length + 9 54 | else: 55 | print(f"Something went wrong at offset {hex(offset)}!\nWe did not find a call!") 56 | byteTup = struct.unpack('s', radioFile.read(1)) 57 | command = byteTup[0] 58 | print(command) 59 | offset += length + 9 + 36 60 | 61 | 62 | print(hex(offset)) 63 | 64 | 65 | 66 | 67 | """ 68 | Going specifically by call won't work... let's try going by command one at a time. 69 | """ 70 | 71 | 72 | 73 | """ 74 | freq = struct.unpack('>h', radioFile.read(2)) 75 | 76 | print(type(freq)) 77 | if 14000 < freq[0] < 14300: print(str(freq[0]) + f' is the first call') 78 | 79 | 80 | def checkIsFreq(checkByte): 81 | global radioFile 82 | bytes = struct.unpack('>h', checkByte) 83 | freq = bytes[0] / 100 84 | print(freq) 85 | if 140 < freq < 143: 86 | return True 87 | else: 88 | return False 89 | 90 | checkByte = radioFile.read(2) 91 | offset += 2 92 | if checkIsFreq(checkByte): 93 | callHeader = radioFile.read(10) 94 | offset += 10 95 | # Perform 3 additional operations 96 | unk0 = callHeader[0:2] 97 | unk1 = callHeader[2:4] 98 | unk2 = callHeader[4:6] 99 | print(callHeader) 100 | print(unk0) 101 | print(unk1) 102 | print(unk2) 103 | # Optional check that unk2 is always 0x00 0x00 ? 104 | buffer = callHeader[7:9] 105 | length = struct.unpack('>h', buffer) 106 | print(length[0]) # this is the length we need to pull next 107 | 108 | 109 | """ 110 | 111 | -------------------------------------------------------------------------------- /Old vers/ExtractTextsV0.3..py: -------------------------------------------------------------------------------- 1 | ##!/bin/python3 2 | 3 | # Assumes RADIO.DAT for filename 4 | """ 5 | We can't get all the way through, so let's try parsing some calls. 6 | """ 7 | 8 | import os 9 | import struct 10 | 11 | filename = "RADIO-usa.DAT" 12 | #filename = "RADIO-jpn.DAT" 13 | 14 | offset = 0 15 | # offset = 293536 # Freq 140.85 16 | 17 | radioFile = open(filename, 'rb') 18 | output = open("output.txt", '+a') 19 | 20 | offset = 0 21 | fileSize = radioData.__len__() 22 | 23 | # print(fileSize) # Result is 1776859! 24 | 25 | def checkFreq(offsetCheck): 26 | global radioFile 27 | radioFile.seek(offsetCheck) 28 | bytes = radioFile.read(2) 29 | freq = struct.unpack('>h', bytes) 30 | if 14000 < freq[0] < 14300: 31 | return True 32 | else: 33 | return False 34 | 35 | def getFreq(offsetCheck): 36 | global radioFile 37 | radioFile.seek(offsetCheck) 38 | bytes = radioFile.read(2) 39 | freq = struct.unpack('>h', bytes) 40 | return freq[0] / 100 41 | 42 | def getCallLength(offset): 43 | global radioFile 44 | radioFile.seek(offset + 9) # Call length is after 8 bytes, then 0x80, then the length of the script? 45 | 46 | lengthBytes = radioFile.read(2) 47 | lengthT = struct.unpack('>h', lengthBytes) 48 | return lengthT[0] 49 | 50 | 51 | # Right now this iterates how many calls match a pattern before this breaks 52 | 53 | """ 54 | def checkCalls(): 55 | global offset 56 | global fileSize 57 | 58 | while offset < fileSize: 59 | 60 | 61 | i = getFreq(offset) 62 | length = getCallLength(offset) 63 | 64 | if 140 < i < 143: 65 | print(f'Call from {i} found! Offset is {hex(offset)}') 66 | offset += length + 9 67 | else: 68 | print(f"Something went wrong at offset {hex(offset)}!\nWe did not find a call!") 69 | byteTup = struct.unpack('s', radioFile.read(1)) 70 | command = byteTup[0] 71 | print(hex(command)) 72 | 73 | 74 | print(hex(offset)) 75 | return 76 | """ 77 | 78 | def getBytesAtOffset(offset): 79 | global radioFile 80 | radioFile.seek(offset) 81 | byte = radioFile.read(1) 82 | return byte 83 | 84 | def handleCall(offsetCheck): # Assume call is just an 8 byte header for now 85 | global radioFile 86 | global output 87 | radioFile.seek(offset) 88 | header = radioFile.read(8) 89 | 90 | # Separate the header 91 | Freq = header[0:2] 92 | unk0 = header[2:4] 93 | unk1 = header[4:6] 94 | unk2 = header[6:8] 95 | output.write(f'Call Header: {Freq}, {unk0}, {unk1}, {unk2} ') 96 | 97 | return 98 | 99 | def handleCommand(offsetCheck): 100 | global radioFile 101 | global output 102 | 103 | output.write(f'Handling the command...\n') 104 | radioFile.seek(offsetCheck) 105 | commandByte = radioFile.read(1) 106 | command = commandByte.hex() 107 | output.write(f'command is {command}\n') 108 | 109 | 110 | match command: 111 | case b'\x31': 112 | return "Switch Op?\n" 113 | case _: 114 | return "UNKNOWN!\n" 115 | 116 | while offset < fileSize: 117 | if offset == fileSize: 118 | print("Offset and fileSize match!!!\n END PROGRAM") 119 | break 120 | if checkFreq(offset): 121 | freq = getFreq(offset) 122 | print(f"Call found! Frequency is {freq}\n") 123 | output.write(f'Call {freq}') 124 | handleCall(offset) 125 | offset += 8 126 | else: 127 | byte = getBytesAtOffset(offset) 128 | thisCommand = commandToEnglish(byte) 129 | print(thisCommand + " is the command to handle with value: " + str(byte)) 130 | output.write(f'Command is {handleCommand}') 131 | commandToEnglish(byte) 132 | handleCommand(offset) 133 | offset += 1 134 | break 135 | 136 | 137 | output.close() -------------------------------------------------------------------------------- /Old vers/ExtractTextsV0.3.5 copy.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | # Assumes RADIO.DAT for filename 4 | """ 5 | We can't get all the way through, so let's try parsing some calls. 6 | 7 | Switching commands as I go to use the radioData as that would be in memory... 8 | """ 9 | 10 | import os 11 | import struct 12 | 13 | filename = "RADIO-usa.DAT" 14 | #filename = "RADIO-jpn.DAT" 15 | 16 | offset = 0 17 | # offset = 293536 # Freq 140.85 18 | 19 | radioFile = open(filename, 'rb') 20 | output = open("output.txt", '+a') 21 | 22 | offset = 0 23 | radioData = radioFile.read() 24 | fileSize = radioData.__len__() 25 | 26 | Header = radioData[ offset : offset + 8] 27 | print(type(Header)) 28 | print(Header) 29 | 30 | freq = struct.unpack('>h', Header[0:2])[0] 31 | print(freq) 32 | print(hex(freq)) 33 | 34 | command = b'\x80' 35 | hex = command.decode('utf-8','') 36 | print(f'Command: {command}, hex = {hex}') -------------------------------------------------------------------------------- /Old vers/ExtractTextsV0.3.5.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | # Assumes RADIO.DAT for filename 4 | """ 5 | We can't get all the way through, so let's try parsing some calls. 6 | """ 7 | 8 | import os 9 | import struct 10 | 11 | filename = "RADIO-usa.DAT" 12 | #filename = "RADIO-jpn.DAT" 13 | 14 | offset = 0 15 | # offset = 293536 # Freq 140.85 16 | 17 | radioFile = open(filename, 'rb') 18 | output = open("output.txt", 'w') 19 | 20 | offset = 0 21 | radioData = radioFile.read() # The byte stream is better to use than the file on disk if you can. 22 | fileSize = radioData.__len__() 23 | 24 | 25 | 26 | commandNamesEng = {b'\x01':'SUBTITLE', b'\x02':'VOX_CUES', b'\x03':'ANI_FACE', b'\x04':'ADD_FREQ', 27 | b'\x05':'MEM_SAVE', b'\x06':'AUD_CUES', b'\x07':'ASK_USER', b'\x08':'SAVEGAME', 28 | b'\x10':'IF_CHECK', b'\x11':'ELSE', b'\x12':'ELSE_IFS', b'\x30':'SWITCH', 29 | b'\x31':'SWITCHOP', b'\x80':'GCL_SCPT', b'\xFF':'ANIMATION', b'\x00':'NULL' 30 | } 31 | 32 | def commandToEnglish(hex): 33 | try: 34 | commandNamesEng[hex] 35 | return commandNamesEng[hex] 36 | except: 37 | return "BYTE WAS NOT DEFINED!!!!" 38 | 39 | 40 | # print(fileSize) # Result is 1776859! 41 | 42 | def checkFreq(offsetCheck): 43 | # Checks if the next two bytes are a codec number or not. 44 | global radioData 45 | freq = struct.unpack('>h', radioData[ offset : offset + 2])[0] # INT from two bytes 46 | 47 | if 14000 < freq < 14300: 48 | return True 49 | else: 50 | return False 51 | 52 | def getFreq(offsetCheck): 53 | global radioFile 54 | 55 | radioFile.seek(offsetCheck) 56 | bytes = radioFile.read(2) 57 | 58 | freq = struct.unpack('>h', radioData[ offset : offset + 2])[0] 59 | return freq / 100 60 | 61 | def getCallLength(offset): 62 | global radioFile 63 | radioFile.seek(offset + 9) # Call length is after 8 bytes, then 0x80, then the length of the script? 64 | 65 | lengthBytes = radioFile.read(2) 66 | lengthT = struct.unpack('>h', lengthBytes) 67 | return lengthT[0] 68 | 69 | 70 | def getBytesAtOffset(offset): 71 | global radioFile 72 | radioFile.seek(offset) 73 | byte = radioFile.read(1) 74 | return byte 75 | 76 | def handleCall(offsetCheck): # Assume call is just an 8 byte header for now 77 | global radioFile 78 | global output 79 | radioFile.seek(offset) 80 | header = radioFile.read(8) 81 | 82 | # Separate the header 83 | Freq = header[0:2] 84 | unk0 = header[2:4] 85 | unk1 = header[4:6] 86 | unk2 = header[6:8] 87 | output.write(f'Call Header: {Freq}, {unk0}, {unk1}, {unk2} \n') 88 | return 89 | 90 | def handleCommand(offsetCheck): 91 | global radioFile 92 | global output 93 | 94 | output.write(f'Handling the command...\n') 95 | commandByte = radioData[offsetCheck].to_bytes() 96 | output.write(f'command is {commandByte}\n') 97 | 98 | match commandByte: 99 | case b'\x80': 100 | offsetCheck += 1 101 | length = struct.unpack('>h', radioData[ offsetCheck : offsetCheck + 2])[0] 102 | output.write(f'Length of command is {length}\n') 103 | 104 | return length + 1 105 | case _: 106 | return 8 107 | 108 | 109 | while offset < fileSize: 110 | offsetHex = hex(offset) 111 | output.write(f'Loop start! Offset is currently {offset} or {offsetHex}\n') 112 | if offset == fileSize: 113 | print("Offset and fileSize match!!!\n END PROGRAM") 114 | break 115 | if checkFreq(offset): 116 | freq = getFreq(offset) 117 | print(f"Call found! Frequency is {freq}\n") 118 | handleCall(offset) 119 | offset += 8 120 | else: 121 | byte = getBytesAtOffset(offset) 122 | thisCommand = commandToEnglish(byte) 123 | print(thisCommand + " is the command to handle with value: " + str(byte)) 124 | byteInt = byte[0] 125 | 126 | 127 | commandToEnglish(byte) 128 | length = handleCommand(offset) 129 | offset += length 130 | 131 | 132 | output.close() -------------------------------------------------------------------------------- /Old vers/ExtractTextsV0.3.6.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | # Assumes RADIO.DAT for filename 4 | """ 5 | We can't get all the way through, so let's try parsing some calls. 6 | 7 | v0.3.6: Adding a "Chunk pull" and "chunk analyzer" 8 | """ 9 | 10 | 11 | import os 12 | import struct 13 | 14 | filename = "/Users/solidmixer/projects/mgs1-undub/RADIO-usa.DAT" 15 | #filename = "RADIO-jpn.DAT" 16 | 17 | offset = 0 18 | # offset = 293536 # Freq 140.85 19 | 20 | radioFile = open(filename, 'rb') 21 | output = open("output.txt", 'w') 22 | 23 | offset = 0 24 | radioData = radioFile.read() # The byte stream is better to use than the file on disk if you can. 25 | fileSize = radioData.__len__() 26 | 27 | 28 | 29 | commandNamesEng = {b'\x01':'SUBTITLE', b'\x02':'VOX_CUES', b'\x03':'ANI_FACE', b'\x04':'ADD_FREQ', 30 | b'\x05':'MEM_SAVE', b'\x06':'AUD_CUES', b'\x07':'ASK_USER', b'\x08':'SAVEGAME', 31 | b'\x10':'IF_CHECK', b'\x11':'ELSE', b'\x12':'ELSE_IFS', b'\x30':'SWITCH', 32 | b'\x31':'SWITCHOP', b'\x80':'GCL_SCPT', b'\xFF':'ANIMATION', b'\x00':'NULL' 33 | } 34 | 35 | def commandToEnglish(hex): 36 | try: 37 | commandNamesEng[hex] 38 | return commandNamesEng[hex] 39 | except: 40 | return "BYTE WAS NOT DEFINED!!!!" 41 | 42 | 43 | # print(fileSize) # Result is 1776859! 44 | 45 | def checkFreq(offsetCheck): # Checks if the next two bytes are a codec number or not. Returns True or False. 46 | global radioData 47 | freq = struct.unpack('>h', radioData[ offset : offset + 2])[0] # INT from two bytes 48 | 49 | if 14000 < freq < 14300: 50 | return True 51 | else: 52 | return False 53 | 54 | def getFreq(offsetCheck): # If freq is at offset, return frequency as 140.15 55 | global radioFile 56 | 57 | radioFile.seek(offsetCheck) 58 | bytes = radioFile.read(2) 59 | 60 | freq = struct.unpack('>h', radioData[ offset : offset + 2])[0] 61 | return freq / 100 62 | 63 | def getCallLength(offset): # Returns the length of the call, offset must be at the freq bytes 64 | global radioFile 65 | radioFile.seek(offset + 9) # Call length is after 8 bytes, then 0x80, then the length of the script in 2x bytes, then FF 66 | 67 | lengthBytes = radioFile.read(2) 68 | lengthT = struct.unpack('>h', lengthBytes) 69 | return lengthT[0] 70 | 71 | def getLength(offsetCheck): # Returns the length of the command, offset must be at the freq bytes 72 | global radioData 73 | 74 | lengthBytes = radioData[offsetCheck + 1: offsetCheck + 3] 75 | lengthT = struct.unpack('>H', lengthBytes)[0] 76 | return lengthT 77 | 78 | def getByteAtOffset(offsetCheck): # Returns a single byte, probably redundant 79 | global radioData 80 | return radioData[offsetCheck] 81 | 82 | def handleCallHeader(offsetCheck): # Assume call is just an 8 byte header for now 83 | global radioFile 84 | global output 85 | radioFile.seek(offset) 86 | header = radioFile.read(12) 87 | 88 | # Separate the header 89 | Freq = header[0:2] 90 | unk0 = header[2:4] 91 | unk1 = header[4:6] 92 | unk2 = header[6:8] 93 | 94 | if header[8:9] == b'\x80': 95 | callLength = header[9:11] 96 | numBytes = struct.unpack('>h', callLength) 97 | else: 98 | output.write(f'ERROR AT HEX {callLength}! ') 99 | 100 | # Quick check we ended with an FF 101 | if header[11] == b'\xFF': 102 | output.write('Call intro nded with FF successfully\n') 103 | else: 104 | output.write(f'Call header DID NOT end in FF! Check hex at {callLength}') 105 | 106 | output.write(f'Call Header: {Freq}, {unk0}, {unk1}, {unk2}, Call is {numBytes[0]} bytes long, hex {callLength}:\n') 107 | return 108 | 109 | def handleCommand(offsetCheck): # We get through the file! But needs refinement... We're not ending evenly and lengths are too long. 110 | # global radioFile 111 | global radioData 112 | global output 113 | 114 | output.write(f'Handling the command... ') 115 | commandByte = radioData[offsetCheck] #.to_bytes()? 116 | output.write(f'Command is {commandByte}\n') 117 | 118 | if commandByte == b'\x00': 119 | return 1 120 | 121 | length = getLength(offsetCheck) 122 | output.write(f'Length of command is {length}\n') 123 | commandBytes = radioData[offset : offset + length + 2] 124 | print(commandByte, ": Offset: ", offsetCheck, " // Content: ", commandBytes, end="\n\n") 125 | return length + 2 126 | """ 127 | match commandByte: 128 | case b'\x80': 129 | offsetCheck += 1 130 | length = getLength(offsetCheck) 131 | output.write(f'Length of command is {length}\n') 132 | commandBytes = radioData[offset:offset + length + 1] 133 | print(commandBytes, end="\n") 134 | return length + 1 135 | case _: 136 | return 8 # We'll hope whatever we run into is just 8 bytes long. """ 137 | 138 | def getChunk(offsetCheck): # THIS IS NOT RETURNING A SUBSET OF THE BYTES! WTF! 139 | global radioFile 140 | global fileSize 141 | 142 | start = offsetCheck 143 | radioFile.seek(offsetCheck) 144 | for byte in radioFile.read(): 145 | if byte == '\xFF': 146 | end = offsetCheck 147 | return radioData[start : end +1] 148 | else: 149 | offsetCheck += 1 150 | return b'\x00' 151 | 152 | 153 | while offset < fileSize: 154 | offsetHex = hex(offset) 155 | output.write(f'Loop start! Offset is currently {offset} or {offsetHex}\n') 156 | if offset == fileSize: 157 | print("Offset and fileSize match!!!\n END PROGRAM") 158 | break 159 | if checkFreq(offset): 160 | freq = getFreq(offset) 161 | output.write(f"Call found! Frequency is {freq}\n") 162 | callLength = getCallLength(offset) 163 | output.write(f'Call is {callLength} bytes long') 164 | handleCallHeader(offset) 165 | offset += 12 166 | else: 167 | # phrase = getChunk(offset) 168 | offset += handleCommand(offset) 169 | 170 | 171 | # Close output file 172 | output.close() -------------------------------------------------------------------------------- /Old vers/ExtractTextsV0.3.8 Line by line.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | # Assumes RADIO.DAT for filename 4 | """ 5 | We can't get all the way through, so let's try parsing some calls. 6 | 7 | v0.3.6: Adding a "Chunk pull" and "chunk analyzer" 8 | """ 9 | 10 | 11 | import os, struct, re 12 | 13 | filename = "/home/solidmixer/projects/mgs1-undub/RADIO-usa.DAT" 14 | #filename = "RADIO-jpn.DAT" 15 | 16 | offset = 0 17 | # offset = 293536 # Freq 140.85 18 | 19 | radioFile = open(filename, 'rb') 20 | output = open("output.txt", 'w') 21 | 22 | offset = 0 23 | radioData = radioFile.read() # The byte stream is better to use than the file on disk if you can. 24 | fileSize = radioData.__len__() 25 | 26 | 27 | 28 | commandNamesEng = {b'\x01':'SUBTITLE', b'\x02':'VOX_CUES', b'\x03':'ANI_FACE', b'\x04':'ADD_FREQ', 29 | b'\x05':'MEM_SAVE', b'\x06':'AUD_CUES', b'\x07':'ASK_USER', b'\x08':'SAVEGAME', 30 | b'\x10':'IF_CHECK', b'\x11':'ELSE', b'\x12':'ELSE_IFS', b'\x30':'SWITCH', 31 | b'\x31':'SWITCHOP', b'\x80':'GCL_SCPT', b'\xFF':'END_LINE', b'\x00':'NULL' 32 | } 33 | 34 | def commandToEnglish(hex): 35 | try: 36 | commandNamesEng[hex] 37 | return commandNamesEng[hex] 38 | except: 39 | return "BYTE WAS NOT DEFINED!!!!" 40 | 41 | 42 | # print(fileSize) # Result is 1776859! 43 | 44 | def checkFreq(offsetCheck): # Checks if the next two bytes are a codec number or not. Returns True or False. 45 | global radioData 46 | freq = struct.unpack('>h', radioData[ offset : offset + 2])[0] # INT from two bytes 47 | 48 | if 14000 < freq < 14200: 49 | return True 50 | else: 51 | return False 52 | 53 | def getFreq(offsetCheck): # If freq is at offset, return frequency as 140.15 54 | global radioFile 55 | 56 | radioFile.seek(offsetCheck) 57 | bytes = radioFile.read(2) 58 | 59 | freq = struct.unpack('>h', radioData[ offset : offset + 2])[0] 60 | return freq / 100 61 | 62 | def getCallLength(offset): # Returns the length of the call, offset must be at the freq bytes 63 | global radioFile 64 | radioFile.seek(offset + 9) # Call length is after 8 bytes, then 0x80, then the length of the script in 2x bytes, then FF 65 | 66 | lengthBytes = radioFile.read(2) 67 | lengthT = struct.unpack('>h', lengthBytes)[0] 68 | return lengthT 69 | 70 | def getLength(offsetCheck): # Returns the length of the command, offset must be at the freq bytes 71 | global radioData 72 | 73 | lengthBytes = radioData[offsetCheck + 1: offsetCheck + 3] 74 | lengthT = struct.unpack('>H', lengthBytes)[0] 75 | return lengthT 76 | 77 | def getByteAtOffset(offsetCheck): # Returns a single byte, probably redundant 78 | global radioData 79 | return radioData[offsetCheck] 80 | 81 | def handleCallHeader(offsetCheck): # Assume call is just an 8 byte header for now 82 | global radioData 83 | global output 84 | header = radioData[offset: offset + 12 ] 85 | 86 | # Separate the header 87 | Freq = header[0:2] 88 | unk0 = header[2:4] 89 | unk1 = header[4:6] 90 | unk2 = header[6:8] 91 | callLength = header[9:11] 92 | numBytes = 0 93 | 94 | if header[8].to_bytes() == b'\x80': 95 | numBytes = struct.unpack('>h', callLength)[0] 96 | else: 97 | output.write(f'ERROR AT byte {offset}! Call length is reading as {numBytes} \n') 98 | 99 | # Quick check we ended with an FF 100 | if header[11].to_bytes() == b'\xff': 101 | output.write('Call intro ended with FF successfully\n') 102 | else: 103 | output.write(f'Call header DID NOT end in FF! Check hex at {offset + 11}') 104 | 105 | output.write(f'Call Header: {Freq}, {unk0}, {unk1}, {unk2}, Call is {numBytes} bytes long, hex {callLength}:\n') 106 | return 107 | 108 | def handleCommand(offsetCheck): # We get through the file! But needs refinement... We're not ending evenly and lengths are too long. 109 | # global radioFile 110 | global radioData 111 | global output 112 | commandByte = radioData[offsetCheck].to_bytes() 113 | 114 | match commandByte: 115 | case b'\x00': # AKA A null 116 | output.write('NULL!\n') 117 | return offsetCheck + 1 118 | case b'\x01': 119 | output.write('Dialogue! -- ') 120 | length = getLength(offsetCheck) 121 | while radioData[offsetCheck + length + 1].to_bytes() != b'\xff': 122 | output.write('We have a long one! Length is not FF, adding 1...\n') 123 | length += 1 124 | line = radioData[offsetCheck: offsetCheck + length + 3] 125 | unk1 = line[3:5] 126 | unk2 = line[5:7] 127 | unk3 = line[7:9] 128 | dialogue = line[9: length + 1] 129 | # output.write(f'Last byte in line is {line[length + 1].to_bytes()}\n') ## Should always end in FF! 130 | 131 | if b'\x80\x23\x80\x4e' in dialogue: 132 | dialogue = dialogue.replace(b'\x80\x23\x80\x4e', b'\x5c\x72\x5c\x6e') 133 | output.write('Dialogue new line replaced! \n') 134 | 135 | writeToFile = f'Length (int) = {length}, UNK1 = {unk1.hex()}, UNK2 = {unk2.hex()}, UNK3 = {unk3.hex()}, Text: {str(dialogue)}\n' 136 | output.write(writeToFile) 137 | return offsetCheck + length + 2 138 | case _: 139 | output.write('Command is not cased! -- ') 140 | start = offset 141 | while radioData[offsetCheck].to_bytes() != b'\xFF': 142 | offsetCheck += 1 143 | line = radioData[start : offsetCheck + 1] 144 | writeToFile = str(commandByte) + ": Offset: " + str(offsetCheck) + " // Content: " + str(line.hex()) + "\n\n" 145 | output.write(writeToFile) 146 | return offsetCheck + 1 147 | 148 | """ 149 | match commandByte: 150 | case b'\x80': 151 | offsetCheck += 1 152 | length = getLength(offsetCheck) 153 | output.write(f'Length of command is {length}\n') 154 | commandBytes = radioData[offset:offset + length + 1] 155 | print(commandBytes, end="\n") 156 | return length + 1global output 157 | case _: 158 | return 8 # We'll hope whatever we run into is just 8 bytes long. """ 159 | 160 | """ 161 | def getChunk(offsetCheck): # THIS IS NOT RETURNING A SUBSET OF THE BYTES! WTF! 162 | global radioFile 163 | global fileSize 164 | 165 | start = offsetCheck 166 | radioFile.seek(offsetCheck) 167 | for byte in radioFile.read(): 168 | if byte == '\xFF': 169 | end = offsetCheck 170 | return radioData[start : end +1] 171 | else: 172 | offsetCheck += 1 173 | return b'\x00' 174 | """ 175 | 176 | while offset < fileSize: 177 | offsetHex = hex(offset) 178 | perc = offset / fileSize * 100 179 | print(f'We are at {perc}% through the file') 180 | if offset >= fileSize - 1: 181 | print("Reached end of file!!!\n END PROGRAM") 182 | break 183 | if checkFreq(offset): 184 | freq = getFreq(offset) 185 | output.write(f"Call found! Frequency is {freq}\n") 186 | callLength = getCallLength(offset) 187 | output.write(f'Call is {callLength} bytes long') 188 | handleCallHeader(offset) 189 | offset += 12 190 | start = offset 191 | else: 192 | offset = handleCommand(offset) 193 | """ 194 | else: 195 | if radioData[offset].to_bytes() == b'\x80': # Expressing FF as a byte string wasnt working :| 196 | output.write("We matched an FF\n") 197 | line = radioData[start : offset + 1] 198 | output.write(line.hex()) 199 | output.write('\n') 200 | print('Wrote line to file!\n') 201 | offset += 1 202 | start = offset 203 | else: 204 | offset += 1 205 | """ 206 | # Close output file 207 | output.close() -------------------------------------------------------------------------------- /Old vers/ExtractTextsV0.3.9.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | # Assumes RADIO.DAT for filename 4 | 5 | """ 6 | We can't get all the way through, so let's try parsing some calls. 7 | 8 | v0.3.6: Adding a "Chunk pull" and "chunk analyzer" 9 | v0.3.9: Removed Chunk pull 10 | """ 11 | 12 | 13 | import os, struct, re 14 | import radioDict 15 | 16 | #filename = "/home/solidmixer/projects/mgs1-undub/RADIO-usa.DAT" 17 | filename = "RADIO-usa.DAT" 18 | #filename = "RADIO-jpn.DAT" 19 | 20 | # We'll do a better check for this later. 21 | if filename.__contains__('jpn'): 22 | jpn = True 23 | else: 24 | jpn = False 25 | 26 | offset = 0 27 | # offset = 293536 # Freq 140.85 Hex 0x47AA0 28 | # Offset = 1773852 # Deepthroat 140.48 Hex 0x1B111C 29 | 30 | radioFile = open(filename, 'rb') 31 | output = open("output.txt", 'w') 32 | 33 | offset = 0 34 | radioData = radioFile.read() # The byte stream is better to use than the file on disk if you can. 35 | fileSize = radioData.__len__() 36 | 37 | # print(fileSize) # Result is 1776859! 38 | 39 | # A lot of this is work in progress or guessing 40 | commandNamesEng = {b'\x01':'SUBTITLE', b'\x02':'VOX_CUES', b'\x03':'ANI_FACE', b'\x04':'ADD_FREQ', 41 | b'\x05':'MEM_SAVE', b'\x06':'AUD_CUES', b'\x07':'ASK_USER', b'\x08':'SAVEGAME', 42 | b'\x10':'IF_CHECK', b'\x11':'ELSE', b'\x12':'ELSE_IFS', b'\x30':'SWITCH', 43 | b'\x31':'SWITCHOP', b'\x80':'GCL_SCPT', b'\xFF':'CMD_HEDR', b'\x00':'NULL' 44 | } 45 | 46 | def commandToEnglish(hex): 47 | try: 48 | commandNamesEng[hex] 49 | return commandNamesEng[hex] 50 | except: 51 | return "BYTE WAS NOT DEFINED!!!!" 52 | 53 | def checkFreq(offsetCheck): # Checks if the next two bytes are a codec number or not. Returns True or False. 54 | global radioData 55 | freq = struct.unpack('>h', radioData[ offset : offset + 2])[0] # INT from two bytes 56 | 57 | if 14000 < freq < 14200: 58 | return True 59 | else: 60 | return False 61 | 62 | def getFreq(offsetCheck): # If freq is at offset, return frequency as 140.15 63 | global radioFile 64 | 65 | radioFile.seek(offsetCheck) 66 | bytes = radioFile.read(2) 67 | 68 | freq = struct.unpack('>h', radioData[ offset : offset + 2])[0] 69 | return freq / 100 70 | 71 | def getCallLength(offset): # Returns the length of the call, offset must be at the freq bytes 72 | global radioFile 73 | radioFile.seek(offset + 9) # Call length is after 8 bytes, then 0x80, then the length of the script in 2x bytes, then FF 74 | 75 | lengthBytes = radioFile.read(2) 76 | lengthT = struct.unpack('>h', lengthBytes)[0] 77 | return lengthT 78 | 79 | def getLength(offsetCheck): # Returns the length of the command, offset must be at the freq bytes 80 | global radioData 81 | 82 | lengthBytes = radioData[offsetCheck + 1: offsetCheck + 3] 83 | lengthT = struct.unpack('>H', lengthBytes)[0] 84 | return lengthT 85 | 86 | def getByteAtOffset(offsetCheck): # Returns a single byte, probably redundant 87 | global radioData 88 | return radioData[offsetCheck] 89 | 90 | def handleCallHeader(offsetCheck): # Assume call is just an 8 byte header for now 91 | global radioData 92 | global output 93 | header = radioData[offset: offset + 12 ] 94 | 95 | # Separate the header 96 | Freq = header[0:2] 97 | unk0 = header[2:4] 98 | unk1 = header[4:6] 99 | unk2 = header[6:8] 100 | callLength = header[9:11] 101 | numBytes = 0 102 | 103 | if header[8].to_bytes() == b'\x80': 104 | numBytes = struct.unpack('>h', callLength)[0] 105 | else: 106 | output.write(f'ERROR AT byte {offset}! Call length is reading as {numBytes} \n') 107 | 108 | # Quick check we ended with an FF 109 | if header[11].to_bytes() == b'\xff': 110 | output.write('Call intro ended with FF successfully\n') 111 | else: 112 | output.write(f'Call header DID NOT end in FF! Check hex at {offset + 11}') 113 | 114 | output.write(f'Call Header: {Freq}, {unk0}, {unk1}, {unk2}, Call is {numBytes} bytes long, hex {callLength}\n') 115 | return 116 | 117 | def handleCommand(offsetCheck): # We get through the file! But needs refinement... We're not ending evenly and lengths are too long. 118 | # global radioFile 119 | global radioData 120 | global output 121 | commandByte = radioData[offsetCheck].to_bytes() 122 | 123 | match commandByte: 124 | case b'\x00': # AKA A null 125 | output.write('NULL in Command check!\n') 126 | return offsetCheck + 1 127 | case b'\x01': 128 | output.write('Dialogue! -- ') 129 | length = getLength(offsetCheck) 130 | while radioData[offsetCheck + length + 1].to_bytes() != b'\xff': 131 | print(f'We have a long one at offset {offsetCheck}! Length is not FF, adding 1...\n') 132 | length += 1 133 | line = radioData[offsetCheck: offsetCheck + length + 3] 134 | unk1 = line[3:5] 135 | unk2 = line[5:7] 136 | unk3 = line[7:9] 137 | dialogue = line[9: length + 1] 138 | # output.write(f'Last byte in line is {line[length + 1].to_bytes()}\n') ## Should always end in FF! 139 | 140 | if b'\x80\x23\x80\x4e' in dialogue: # this replaces the in-game hex for new line with a \\r\\n 141 | dialogue = dialogue.replace(b'\x80\x23\x80\x4e', b'\x5c\x72\x5c\x6e') 142 | output.write('Dialogue new line replaced! \n') 143 | 144 | if jpn: 145 | dialogue = translateJapaneseHex(dialogue) 146 | writeToFile = f'Length (int) = {length}, UNK1 = {unk1.hex()}, UNK2 = {unk2.hex()}, UNK3 = {unk3.hex()}, Text: {str(dialogue.hex())}\n' 147 | else: 148 | writeToFile = f'Length (int) = {length}, UNK1 = {unk1.hex()}, UNK2 = {unk2.hex()}, UNK3 = {unk3.hex()}, Text: {str(dialogue)}\n' 149 | # Write to file 150 | output.write(writeToFile) 151 | return offsetCheck + length + 2 152 | case _: 153 | output.write('Command is not cased! -- ') 154 | start = offset 155 | while radioData[offsetCheck].to_bytes() != b'\xFF': 156 | offsetCheck += 1 157 | line = radioData[start : offsetCheck + 1] 158 | writeToFile = str(commandByte) + ": Offset: " + str(offsetCheck) + " // Content: " + str(line.hex()) + "\n\n" 159 | output.write(writeToFile) 160 | return offsetCheck + 1 161 | 162 | def translateJapaneseHex(bytestring): 163 | i = 0 164 | messageString = '' 165 | 166 | while i < len(bytestring) - 1: 167 | messageString += radioDict.getRadioChar(bytestring[i:i+2].hex()) 168 | i += 2 169 | return messageString 170 | 171 | if __name__ == '__main__': 172 | while offset < fileSize: 173 | offsetHex = hex(offset) 174 | perc = offset / fileSize * 100 175 | print(f'We are at {perc}% through the file') 176 | if offset >= fileSize - 1: 177 | print("Reached end of file!!!\n END PROGRAM") 178 | break 179 | if checkFreq(offset): 180 | freq = getFreq(offset) 181 | output.write(f"Call found! Frequency is {freq}\n") 182 | callLength = getCallLength(offset) 183 | output.write(f'Call is {callLength} bytes long') 184 | handleCallHeader(offset) 185 | offset += 12 186 | start = offset 187 | else: 188 | offset = handleCommand(offset) 189 | # Close output file 190 | output.close() -------------------------------------------------------------------------------- /Old vers/demoTextInjector_old.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from Green Goblins scripts. 3 | This is really heavily based on his awesome work. 4 | 5 | Script for working with Metal Gear Solid data 6 | 7 | Copyright (C) 2023 Green_goblin (https://mgsvm.blogspot.com/) 8 | 9 | Permission to use, copy, modify, and/or distribute this software for any 10 | purpose with or without fee is hereby granted, provided that the above 11 | copyright notice and this permission notice appear in all copies. 12 | 13 | """ 14 | 15 | import os, sys 16 | sys.path.append(os.path.abspath('./myScripts')) 17 | import re 18 | import glob 19 | import struct 20 | import progressbar 21 | import translation.radioDict as RD 22 | import json 23 | 24 | import DemoTools.demoTextExtractor as DTE 25 | 26 | version = "usa" 27 | # version = "jpn" 28 | 29 | # Toggles 30 | debug = True 31 | 32 | 33 | # Directory configs 34 | inputDir = f'demoWorkingDir/{version}/bins' 35 | outputDir = f'demoWorkingDir/{version}/newBins' 36 | injectJson = f'demoWorkingDir/{version}/demoText-{version}.json' 37 | os.makedirs(outputDir, exist_ok=True) 38 | 39 | bin_files = glob.glob(os.path.join(inputDir, '*.bin')) 40 | bin_files.sort(key=lambda f: int(f.split('-')[1].split('.')[0])) 41 | 42 | injectTexts = json.load(open(injectJson, 'r')) 43 | 44 | skipFilesListD1 = [ 45 | 'demo-05', 46 | 'demo-06', 47 | 'demo-31', 48 | 'demo-33', 49 | 'demo-35', 50 | 'demo-63', 51 | 'demo-67', 52 | 'demo-71', 53 | 'demo-72', 54 | ] 55 | 56 | def injectSubtitles(originalBinary: bytes, newTexts: dict, startingNum: int = 1, timings: dict = None) -> tuple [bytes, int]: 57 | """ 58 | Injects the new text to the original data, returns the bytes. 59 | Also returns the index we were at when we finished. 60 | """ 61 | 62 | def encodeNewText(text: str): 63 | """ 64 | Simple. Encodes the text as bytes. 65 | Adds the buffer we need to be divisible by 4... 66 | Return the new bytes. 67 | """ 68 | newBytes: bytes = RD.encodeJapaneseHex(text)[0] 69 | bufferNeeded = 4 - (len(newBytes) % 4) 70 | for j in range(bufferNeeded): 71 | newBytes += b'\x00' 72 | j += 1 73 | 74 | return newBytes 75 | 76 | newBytes = b"" 77 | firstLengthBytes = originalBinary[18:20] 78 | firstLength = struct.unpack(' None: 14 | offset = 0 15 | nullCount = 0 16 | 17 | while offset < fileSize - 1: # We might need to change this to Case When... as well. 18 | # Offset Tracking 19 | if debugOutput: 20 | print(f'Main loop: offset is {offset}') 21 | 22 | if nullCount == 4: 23 | RDT.output.write(f'ALERT!!! We just had 4x Nulls in a row at offset {offset}\n') 24 | nullCount = 0 25 | 26 | # MAIN LOGIC 27 | if radioData[offset].to_bytes() == b'\x00': # Add logic to tally the nulls for reading ease 28 | RDT.indentLines() 29 | if radioData[offset + 1].to_bytes() == b'\x31': # For some reason switch statements don't have an FF 30 | length = RDT.handleCommand(offset) 31 | else: 32 | RDT.output.write(f"Null! (Main loop) offset = {offset}\n") 33 | nullCount += 1 34 | if layerNum > 0: 35 | layerNum -= 1 36 | length = 1 37 | elif radioData[offset].to_bytes() == b'\xFF': # Commands start with FF 38 | nullCount = 0 39 | length = RDT.handleCommand(offset) 40 | elif RDT.checkFreq(offset): # If we're at the start of a call 41 | nullCount = 0 42 | RDT.handleCallHeader(offset) 43 | length = 11 # In this context, we only want the header 44 | layerNum = 1 45 | else: # Something went wrong, we need to kinda reset 46 | length = RDT.handleUnknown(offset) # This will go until we find a call frequency 47 | offset += length 48 | 49 | RDT.output.close() 50 | 51 | def extractRadioCallHeaders(filename: str) -> None: 52 | offset = 0 53 | global jpn 54 | global indentToggle 55 | global debugOutput 56 | global fileSize 57 | 58 | # Handle inputting radio file: 59 | global radioFile 60 | global radioData 61 | """ 62 | radioFile = open(filename, 'rb') 63 | radioData = radioFile.read() 64 | fileSize = len(radioData) 65 | """ 66 | RDT.setOutputFile(filename) 67 | 68 | while offset < fileSize - 1: # We might need to change this to Case When... as well. 69 | # Offset Tracking 70 | if debugOutput: 71 | print(f'offset is {offset}') 72 | 73 | # MAIN LOGIC 74 | if radioData[offset].to_bytes() == b'\x00': # Add logic to tally the nulls for reading ease 75 | length = 1 76 | elif RDT.checkFreq(offset): 77 | length = RDT.handleCallHeader(offset) 78 | else: 79 | length = 1 80 | offset += length 81 | if offset == fileSize: 82 | print(f'File was parsed successfully! Written to {filename}') 83 | break 84 | 85 | RDT.output.close() 86 | 87 | def main(): 88 | # Parser logic 89 | parser = argparse.ArgumentParser(description=f'Parse a binary file for Codec call GCL. Ex. script.py ') 90 | 91 | parser.add_argument('filename', required=False, type=str, help="The call file to parse. Can be RADIO.DAT or a portion of it.") 92 | parser.add_argument('-o', '--output', type=str, required=False, help="(Optional) Provides an output file (.txt)") 93 | 94 | parser.add_argument('-v', '--verbose', action='store_true', help="Write any errors to stdout for help parsing the file") 95 | parser.add_argument('-j', '--japanese', action='store_true', help="Toggles translation for Japanese text strings") 96 | parser.add_argument('-i', '--indent', action='store_true', help="Indents container blocks, WORK IN PROGRESS!") 97 | args = parser.parse_args() 98 | 99 | if not args.filename: 100 | args.filename = os.read(f'Please provide filename: ') 101 | if args.verbose: 102 | debugOutput = True 103 | 104 | if args.japanese: 105 | jpn = True 106 | 107 | if args.indent: 108 | indentToggle = True 109 | 110 | if args.output: 111 | output = open(args.output, 'w') 112 | outputFilename = args.output 113 | 114 | # Handle inputting radio file: 115 | global radioFile 116 | global radioData 117 | global fileSize 118 | 119 | radioFile = open(args.filename, 'rb') 120 | #radioFile = open(filename, 'rb') 121 | radioData = radioFile.read() 122 | fileSize = len(radioData) 123 | 124 | extractRadioCallHeaders('headers.txt') 125 | analyzeRadioFile() 126 | 127 | 128 | 129 | 130 | 131 | # This doesn't work because i did not code with contextual variables in mind >:O 132 | if __name__ == '__main__': 133 | # We should get args from user. Using argParse 134 | # main() 135 | 136 | # Parser logic 137 | parser = argparse.ArgumentParser(description=f'Parse a binary file for Codec call GCL. Ex. script.py ') 138 | 139 | parser.add_argument('filename', type=str, help="The call file to parse. Can be RADIO.DAT or a portion of it.") 140 | parser.add_argument('-o', '--output', type=str, required=False, help="(Optional) Provides an output file (.txt)") 141 | 142 | parser.add_argument('-v', '--verbose', action='store_true', help="Write any errors to stdout for help parsing the file") 143 | parser.add_argument('-j', '--japanese', action='store_true', help="Toggles translation for Japanese text strings") 144 | parser.add_argument('-i', '--indent', action='store_true', help="Indents container blocks, WORK IN PROGRESS!") 145 | args = parser.parse_args() 146 | 147 | if not args.filename: 148 | args.filename = os.read(f'Please provide filename: ') 149 | if args.verbose: 150 | debugOutput = True 151 | 152 | if args.japanese: 153 | jpn = True 154 | 155 | if args.indent: 156 | indentToggle = True 157 | 158 | if args.output: 159 | output = open(args.output, 'w') 160 | outputFilename = args.output 161 | 162 | # Handle inputting radio file: 163 | global radioFile 164 | global radioData 165 | fileSize 166 | 167 | radioFile = open(args.filename, 'rb') 168 | #radioFile = open(filename, 'rb') 169 | radioData = radioFile.read() 170 | fileSize = len(radioData) 171 | 172 | extractRadioCallHeaders('headers.txt') 173 | analyzeRadioFile() -------------------------------------------------------------------------------- /Old vers/stageCalls-before-0A.ods: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drsparklegasm/mgs1-scripts/fba25e409c5ad49938ba0b6d60e1e48a9c37fc7a/Old vers/stageCalls-before-0A.ods -------------------------------------------------------------------------------- /Old vers/vagToWav.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basically had LLM generate this from ColdSauce's VAG to WAV converter 3 | https://github.com/ColdSauce/psxsdk/blob/master/tools/vag2wav.c 4 | """ 5 | import struct 6 | import wave 7 | 8 | # Constants for VAG and WAV files 9 | SAMPLE_RATE = 22050 # Sample rate for PlayStation ADPCM (VAG) 10 | CHANNELS = 1 # Mono audio 11 | BITS_PER_SAMPLE = 16 # WAV files typically use 16-bit PCM 12 | BLOCK_SIZE = 24 # Each block contains 16 samples and 4 coefficients 13 | 14 | # Coefficients for the ADPCM decoding 15 | ADPCM_COEFFICIENTS = [ 16 | (0, 0), 17 | (60, 0), 18 | (115, -52), 19 | (98, -55), 20 | (122, -60) 21 | ] 22 | 23 | def adpcm_decode(vag_data): 24 | pcm_data = [] 25 | s_1, s_2 = 0.0, 0.0 26 | 27 | for i in range(0, len(vag_data), BLOCK_SIZE): 28 | block = vag_data[i:i + BLOCK_SIZE] 29 | 30 | # First byte contains the shift factor and predictor number 31 | predictor_number = (block[0] >> 4) & 0x0F 32 | shift_factor = block[0] & 0x0F 33 | 34 | # Coefficients for the ADPCM decoding 35 | f1, f2 = ADPCM_COEFFICIENTS[predictor_number] 36 | 37 | # Decode each sample in the block 38 | for j in range(4, BLOCK_SIZE): 39 | nibble = (block[j // 2] >> ((j % 2) * 4)) & 0x0F 40 | 41 | # Sign-extend the 4-bit nibble to a 16-bit signed integer 42 | if nibble >= 8: 43 | nibble -= 16 44 | 45 | # Calculate the predicted sample value 46 | predicted_sample = int((f1 * s_1 + f2 * s_2) / 64) 47 | 48 | # Add the decoded difference to the predicted sample 49 | differential_sample = (nibble << shift_factor) + predicted_sample 50 | 51 | # Clamp the sample to 16-bit signed integer range 52 | if differential_sample > 32767: 53 | differential_sample = 32767 54 | elif differential_sample < -32768: 55 | differential_sample = -32768 56 | 57 | # Add the sample to the PCM data list 58 | pcm_data.append(differential_sample) 59 | 60 | # Update the previous two samples for the next iteration 61 | s_2, s_1 = s_1, differential_sample 62 | 63 | return pcm_data 64 | 65 | def vag_to_wav(vag_file_path, wav_file_path): 66 | with open(vag_file_path, "rb") as vag_file: 67 | # Read the VAG header 68 | vag_name = vag_file.read(4) 69 | 70 | if vag_name != b'VAGp': 71 | print(f"{vag_file_path} is not in VAG format. Aborting.") 72 | return -1 73 | 74 | # Skip the version and other metadata (for simplicity, we assume known structure) 75 | vag_file.seek(12) 76 | data_size = struct.unpack(">I", vag_file.read(4))[0] 77 | 78 | print(f"Data Size: {data_size} bytes") 79 | 80 | # Read the VAG audio data 81 | vag_data = vag_file.read(data_size - 16) # Skip header and metadata 82 | 83 | # Decode ADPCM to PCM 84 | pcm_data = adpcm_decode(vag_data) 85 | 86 | # Write the WAV file 87 | with wave.open(wav_file_path, "wb") as wav_file: 88 | wav_file.setnchannels(CHANNELS) 89 | wav_file.setsampwidth(BITS_PER_SAMPLE // 8) 90 | wav_file.setframerate(SAMPLE_RATE) 91 | 92 | # Convert PCM data to bytes and write to the WAV file 93 | pcm_bytes = struct.pack(f'>{len(pcm_data)}h', *pcm_data) 94 | wav_file.writeframes(pcm_bytes) 95 | 96 | print(f"Converted {vag_file_path} to {wav_file_path}") 97 | return 0 98 | 99 | # Example usage 100 | if __name__ == "__main__": 101 | vag_file_path = "workingFiles/vag-examples/00042.vag" 102 | wav_file_path = "workingFiles/vag-examples/00042.wav" 103 | vag_to_wav(vag_file_path, wav_file_path) 104 | -------------------------------------------------------------------------------- /Old vers/vagToWav2.py: -------------------------------------------------------------------------------- 1 | import struct 2 | 3 | def vag2wav(vag_path, wav_path): 4 | # Open the VAG file 5 | with open(vag_path, "rb") as vag: 6 | # Read the header 7 | header = vag.read(48) 8 | if len(header) < 48: 9 | raise ValueError("Invalid VAG file: Header too short.") 10 | 11 | # Check the magic number (VAGp) 12 | magic_number = header[:4] 13 | if magic_number != b'VAGp': 14 | raise ValueError(f"Invalid VAG file: Magic number {magic_number} does not match 'VAGp'.") 15 | 16 | # Extract data_size 17 | data_size = struct.unpack(">I", header[12:16])[0] 18 | 19 | # Prepare the WAV header 20 | wav_header = ( 21 | b'RIFF' + struct.pack(">= 4 55 | flags = struct.unpack("B", vag.read(1))[0] # flags 56 | 57 | if flags == 7: 58 | break 59 | 60 | for i in range(0, 28, 2): 61 | d = struct.unpack("B", vag.read(1))[0] 62 | s = (d & 0xf) << 12 63 | if s & 0x8000: 64 | s |= 0xffff0000 65 | samples[i] = int(s >> shift_factor) 66 | 67 | s = (d & 0xf0) << 8 68 | if s & 0x80000: 69 | s |= 0xffff0000 70 | samples[i + 1] = int(s >> shift_factor) 71 | 72 | for i in range(28): 73 | samples[i] += s_1 * f[predict_nr][0] + s_2 * f[predict_nr][1] 74 | s_2 = s_1 75 | s_1 = samples[i] 76 | 77 | # Clamp to 16-bit signed integer range 78 | sample_value = max(-32768, min(32767, int(samples[i] + 0.5))) 79 | pcm.write(struct.pack(" ") 85 | sys.exit(1) 86 | 87 | vag_path = sys.argv[1] 88 | wav_path = sys.argv[2] 89 | vag2wav(vag_path, wav_path) 90 | -------------------------------------------------------------------------------- /Old vers/xmltest.py: -------------------------------------------------------------------------------- 1 | import os, xml.etree.ElementTree as ET 2 | 3 | root = ET.Element("library") 4 | 5 | books = [ 6 | {"title": "The Catcher in the Rye", "author": "J.D. Salinger", "edition": "First", "publication_year": "1951"}, 7 | {"title": "1984", "author": "George Orwell", "edition": "Second", "publication_year": "1949"}, 8 | {"title": "To Kill a Mockingbird", "author": "Harper Lee", "edition": "Third", "publication_year": "1960"} 9 | ] 10 | 11 | for book in books: 12 | book_element = ET.SubElement(root, "book", { 13 | "edition": book["edition"], 14 | "publication_year": book["publication_year"] 15 | }) 16 | title_element = ET.SubElement(book_element, "title") 17 | title_element.text = book['title'] 18 | author_element = ET.SubElement(book_element, "author") 19 | author_element.text = book['author'] 20 | 21 | # Convert the tree to a string 22 | tree = ET.ElementTree(root) 23 | tree.write("library.xml") 24 | 25 | # Pretty print XML for viewing 26 | from xml.dom.minidom import parseString 27 | xmlstr = parseString(ET.tostring(root)).toprettyxml(indent=" ") 28 | print(xmlstr) 29 | 30 | ##### 31 | 32 | xml_data = """ 33 | 34 | 35 | 36 | 37 | 38 | """ 39 | 40 | # Parse the XML data 41 | root = ET.fromstring(xml_data) 42 | 43 | # Find the book with the title "1984" 44 | book = root.find(".//book[@title='1984']") 45 | 46 | if book is not None: 47 | # Change the edition attribute 48 | book.set("edition", "Revised Edition") 49 | 50 | # If you want to add new attributes or modify child elements, do so here 51 | # For example, changing the author: 52 | # book.set("author", "New Author Name") 53 | 54 | # Print out the modified XML 55 | xmlstr = ET.tostring(root, encoding='unicode') 56 | print(xmlstr) 57 | else: 58 | print("Book not found") 59 | 60 | 61 | import xml.etree.ElementTree as ET 62 | 63 | # Create the root element 64 | root = ET.Element("library") 65 | 66 | # Add multiple books using SubElement 67 | ET.SubElement(root, "book", {"title": "Book One", "author": "Author A"}) 68 | ET.SubElement(root, "book", {"title": "Book Two", "author": "Author B"}) 69 | 70 | # Print the resulting XML 71 | tree = ET.ElementTree(root) 72 | xmlstr = ET.tostring(root, encoding='unicode') 73 | print(xmlstr) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mgs1-scripts 2 | Reverse engineering scripts for MGS1. 3 | So far, mostly scoped on RADIO.DAT extraction. 4 | 5 | # Project Goals 6 | 7 | I started this to finally have an un-dubbed version of Metal Gear Solid to play with. Hopefully once we can inject english subtitles into the Japanese version, we'll be able to experience the original VA performance and see the subtleties between versions released in the US and JPN. 8 | 9 | You can find additional details on my blog at ![mgs-undubbed.io](https://blog.mgs-undubbed.io) 10 | 11 | # Next steps: 12 | 13 | Most of the tooling is done. There are some finishing touches for demo and zmovie still before the code is complete. Then it's onto translation. 14 | 15 | # Usage 16 | 17 | Most of the scripts will have command-line based arguments. I do have some directories still hard coded. 18 | 19 | ## Directories and Flow 20 | 21 | Each of the files have several scripts to help with editing. I do want to explain how my scripts are used, though. Most everything has a command line usage with arguments. There are some instances where things still need to be hard coded. For that I've tried to keep all of the variables in the top section so they can be easily changed. 22 | 23 | The top level of my project folder looks like this: 24 | 25 | ``` 26 | [Project Folder] 27 | ├── build 28 | │   ├── jpn-d1 29 | │   │   ├── DUMMY3M.DAT 30 | │   │   ├── MGS 31 | │   │   ├── SYSTEM.CNF 32 | │   │   ├── license_data.dat 33 | │   │   └── rebuild.xml 34 | │   ├── jpn-d2 ... 35 | │   ├── usa-d1 36 | │   └── usa-d2 37 | ├── build-proprietary 38 | │   ├── README.md 39 | │   ├── US Version Text.txt 40 | │   ├── demo 41 | │   │   ├── demoText-jpn-modified.json 42 | │   │   └── demoText-jpn-undub.json 43 | │   ├── itemDesc-inject.json 44 | │   ├── itemDesc-jpn.json 45 | │   ├── jsonCompare.py 46 | │   ├── radio 47 | │   │   ├── codecWindowPreview.py 48 | │   │   ├── dialogueLineReplace.json 49 | │   │   ├── dialogueSwap.py 50 | │   │   ├── output.json 51 | │   │   └── storyCalls.json 52 | │   ├── vox 53 | │   │   ├── vox-jpn-d1.json 54 | │   │   └── voxText-jpn-d1.json 55 | │   └── zmovie 56 | ├── build-src 57 | │   ├── jpn-d1 58 | │   │   ├── DUMMY3M.DAT 59 | │   │   ├── MGS 60 | │   │   ├── SYSTEM.CNF 61 | │   │   ├── license_data.dat 62 | │   │   └── rebuild.xml 63 | │   ├── jpn-d2 ... 64 | │   ├── usa-d1 ... 65 | │   └── usa-d2 ... 66 | ├── myScripts 67 | │   ├── DemoTools 68 | │   ├── ... etc 69 | ├── workingFiles 70 | ``` 71 | 72 | For each of the top level directories here's my description: 73 | - **build-src** : All original iso files and the rebuild.xml generated by dumpsxiso are written here. 74 | - **build** : These are the modified files that are rebuilt using mkpsxiso for testing 75 | - **build-proprietary** : These files contain my script files that are injected. 76 | - **myScripts** : (this repository of code) 77 | - **workingFiles** : I should have everything extracted by script going here. This was a recent change, not everything is there. 78 | 79 | The reasons I had for splitting them this way was: 80 | 1. Common file paths that could have [*version*] or [*disk*] swapped out easily. 81 | 2. This keeps an unmodified version of the disk's original contents so that i can replace it when i want to revert changes. 82 | 3. the build-proprietary folder now holds my working files so that they are not included. The scripts themselves have no copyrighted material, but as the "source" files for my modifications contain the US version texts, I wanted to make a separate git repository for both collaboration and tracking changes to my mods. 83 | 84 | Hopefully this helps wth the script, once I revisit front-end and UX, I'll look at doing a master config file so that it's clear where things are being saved. For now, my goal for each script will be to function similarly to the radioTools.py script where an input/output file is specified in the command line usage. 85 | 86 | ## Recompilation workflow 87 | 88 | For a working example of how everything is compiled, check this script as it is what i use for running all of the scripts in tandem. 89 | myScripts/testing/runJpnBuildTest.sh 90 | 91 | This script.. 92 | 1. Starts with the unmodified files 93 | 2. Extracts them to the working directory 94 | 3. Modifies them with my new dialogues from build-proprietary 95 | 4. Compiles the new dat files and moves them to build/ (If something is excluded or skipped we replace it with the original) 96 | 5. Also runs any fixes (for example, stage.dir offset adjustments) 97 | 6. Compiles a new iso with mkpsxiso, and 98 | 7. opens the resulting iso file in Duckstation for testing. 99 | 100 | I do want to note for testing, do not use savestates as that has led to crashes/instability for me. Use them within the same iso, don't save a state and then load it with a new build. 101 | 102 | # The scripts 103 | 104 | ## Radio.dat 105 | 106 | Quick overview: 107 | 108 | 1. RadioDatTools.py -- Extract game text in xml and json format 109 | 2. xmlModifierTools.py -- Imports adjusted json dialogue into the XML file. Recomputes lengths of all calls as needed 110 | 3. RadioDatRecompiler.py -- Takes an XML Radio data and creates a .dat file. Can run the recompiler and also adjust stage.dir values (using -s and -S flags) 111 | 112 | Most scripts have an arg parser, use -h for help. 113 | 114 | ex: 115 | 116 | `RadioDatTools.py -h` for help. 117 | 118 | ``` 119 | usage: RadioDatTools.py [-h] [-v] [-i] [-s] [-H] [-g] [-x] [-z] filename [output] 120 | 121 | Parse a binary file for Codec call GCL. Ex. script.py 122 | 123 | positional arguments: 124 | filename The call file to parse. Can be RADIO.DAT or a portion of it. 125 | output Output Filename (.txt) 126 | 127 | options: 128 | -h, --help show this help message and exit 129 | -v, --verbose Write any errors to stdout for help parsing the file 130 | -i, --indent Indents container blocks, WORK IN PROGRESS! 131 | -s, --split Split calls into individual bin files 132 | -H, --headers Extract call headers ONLY! 133 | -g, --graphics export graphics 134 | -x, --xmloutput Exports the call data into XML format 135 | -z, --iseeeva Exports the dialogue in a json like Iseeeva's script 136 | ``` 137 | 138 | ## Demo.dat 139 | 140 | Example usage: 141 | 142 | 1. splitDemoFiles.py -- Splits all demo files to individual demos 143 | 2. demoTextExtractor.py -- Extracts texts from all demo files in the output folder 144 | 3. demoTextInjector.py -- Injects json text back into demo files, outputs the binaries as new files 145 | 4. demoRejoiner.py -- Joins all demo files into one large DAT file. 146 | 147 | ## Known issues: 148 | - RADIO.DAT: MGS Integral does nto recompile correctly. I think there is extra null space between call data (after graphics data) that will need to be accounted for. The data is correct, but there's also too much graphics data. 149 | - RADIO.DAT: Recompiler works but will not correctly count/re-encode special characters. 150 | - RADIO.DAT: Still missing/incorrect kanji characters that need to be OCR'd from their graphics files. ~30 yet to identify, numerous others are wrong. Reach out to me if you would like to help translate them! 151 | - RADIO.DAT: Have not tested all the offset adjustments to STAGE.DIR yet. Could be faulty. Works so far as I've tested. 152 | 153 | This tool is now functional with some limitations: 154 | 1. Save blocks need some manual tweaks in the code to be 100% accurate on recompile, but it can be done. 155 | 2. Length calculations should be correct. The script will warn you if a call exceeds the safe limit (length bytes are only 2, so max length in bytes of a call is 65535, if we exceed this the files may not work properly.) 156 | 157 | # Using this script to replace the dialogue. 158 | 159 | To use this to make changes, run it in more or less this way... Here's an example workflow: 160 | 161 | 1. Use RadioDatTools.py to export an `XML` and `json` file containing the full data. 162 | ```python radioDatTools.py RADIO.DAT -zx``` 163 | 2. Edit the XML data. If using the json, use the jsonTools.py to inject call dialogue into XML data. Optionally use json tools to merge dialogue with offsets from different versions. 164 | ```python jsontools.py subtitles.json offsets.json``` 165 | 3. Use xmlModifierTools.py to inject the json data to the XML. Differnt aspects can be commeted out, but should match the original if untouched. 166 | ```python xmlModifierTools.py inject RADIO-output-Iseeva.json RADIO-output.xml``` 167 | 4. Once the XML is fully completed, it's time to recompile RADIO.DAT. Use the radioDatRecompiler to recompile any valid XML into a binary DAT file. use the -S to modify STAGE.DIR offset numbers. There will be expected errors, but at this time it might work. If STAGE.DIR is specified (-s) we use that as a template to fix offsets and output a new file (use -S to set output name) 168 | ```python RadioDatRecompiler.py -p RADIO-output.xml new-RADIO.DAT -s STAGE.DIR -S new-STAGE.DIR``` 169 | 170 | There are nuances there but that's basically the gist. either `RadioRecompiler -p` or `xmlModifierTools prepare` will calculate the lenght changes needed. For more info, use -h on any script. 171 | 172 | [Note: Recompiling with the -x uses the original hex for dialogue and overrides any changes, but DOES NOT RECALCULATE LENGTHS! Use it to ensure recompilation is working, not for xml files where lengths were changed.] 173 | 174 | # Scripts Overview 175 | 176 | ## Main tools: 177 | 178 | ### RadioDatTools.py 179 | 180 | This extracts all call data, hopefully keeping other byte data intact in the file. The goal is to have all bytes there so it can be re-compiled into a new file. -h for help. This should be mostly complete now. Remaining work will be adjusting XML container data as needed for recompilation. 181 | 182 | Can also split calls out for further analysis. 183 | 184 | Usage: 185 | 186 | ``` 187 | $ RadioDatTools.py path/to/Radio.dat [outputfilename] [-h, -i, -d, ...] 188 | ``` 189 | 190 | ### RadioDatRecompiler.py 191 | Recompiles a given XML document (exported from RadioDatTools) into a binary file. 192 | 193 | Eventually, it will inject the json data and recompute the lengths for all containers. 194 | 195 | ### xmlModifierTools.py 196 | 197 | Scripts to modify the XML, including recalculating lengths once dialogues have been changed. 198 | 199 | NOTE! It will not correctly account for any two-byte characters that were decoded! 200 | 201 | ### jsonTools.py 202 | 203 | Use this to zip together offsets from one json and subtitles from another json (useful for injecting an English subtitle in with japanese offsets) 204 | 205 | ### StageDirTools 206 | 207 | ### callsInStageDirFinder.py 208 | 209 | Scripts for finding all call offsets in Stage.dir. Currently this is working. Can be run on its own for analysis tools. 210 | 211 | Logic is shamelessly reverse engineered from iseeeva's radio extractor: 212 | https://github.com/iseeeva/metal/tree/main 213 | 214 | ## radioTools 215 | 216 | ### callExtractor.py 217 | 218 | Extracts a single call based on offsets (leaves in a bin format), to be merged into a better library 219 | 220 | ### callInsertor.py 221 | 222 | Inserts a call into an existing RADIO.DAT file. Useful if you want to modify only one call's worth of binary and inject it at the original offset. Good for testing recompiler logic. 223 | 224 | ### splitRadioFile.py -- DEPRECATED 225 | 226 | Previously split RADIO.DAT into individual calls. Use RadioDatTools with the -s option. 227 | 228 | ### characters.py 229 | 230 | Contains dicts in use by the radioDict library. SOME CHARACTERS HAVE YET TO BE IDENTIFIED! 231 | 232 | ### radioDict.py 233 | 234 | The heart of the translation of japanese/special character hex. This has libraries for decoding the odd hex codes into japanese characters, but can also assist in outputting graphics found in the data. 235 | 236 | NOTE: Does not yet decode / re-encode all characters. 237 | 238 | -------------------------------------------------------------------------------- /StageDirTools/Notes.txt: -------------------------------------------------------------------------------- 1 | 02348A 2 | 3 | 4 | 01 00 91 00 5 | A7 96 73 62 8A 24 02 00 # Length of file: 0x02238A # SB / bin 6 | 00 00 6E 64 38 E6 00 00 # nd # length E638? Starts 0x00023800 This is the DAR file 7 | A7 96 63 6B 00 00 00 00 # CK 00 8 | A7 96 63 6C B4 00 00 00 # CL 180 bytes kmd 9 | A7 96 63 68 B8 00 00 00 # CG 184 bytes 10 | 54 EA 63 67 EC 00 00 00 # CH EC = 11 | 00 00 63 FF F8 03 00 00 # C 0xff // 1016 b / 12 | 13 | 14 | 22 AD 73 77 40 46 01 00 # 00ad22.wvx 15 | 3A 6D 73 65 B0 11 00 00 # 16 | 90 ED 73 6D 90 04 00 00 17 | 18 | 19 | -------------------------------------------------------------------------------- /StageDirTools/analyzeStageDirFiles.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | stage_dir="radioDatFiles/stage-jpn/" 4 | 5 | for script in $(find $stage_dir -name '*.gcx'); do 6 | echo $script 7 | python3 myScripts/StageDirTools/callsInStageDirFinder.py $script 8 | done 9 | -------------------------------------------------------------------------------- /StageDirTools/assmembleDar.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import os 3 | import glob 4 | import argparse 5 | 6 | def extract_numeric_prefix(filename): 7 | # Extract the numeric prefix before the first hyphen 8 | base_name = os.path.basename(filename) 9 | prefix = base_name.split('-')[0] 10 | return int(prefix) 11 | 12 | def getHashHex(filename: str) -> str: 13 | return filename.split('-')[1].split('.')[0] 14 | 15 | 16 | if __name__ == "__main__": 17 | darFileName: str 18 | inputDir: str 19 | 20 | parser = argparse.ArgumentParser(description=f'Creates a dar file from a directory with .pcx files. Ex: assembleDar.py path/to/pcxfiles/ [output.dar]') 21 | parser.add_argument('input', type=str, help="Folder containing .pcx files to assemble into a DAR.") 22 | parser.add_argument('filename', type=str, help="Output filename, ex: new-01.dar") 23 | 24 | args= parser.parse_args() 25 | 26 | inputDir = args.input 27 | darFileName = args.filename 28 | 29 | files = glob.glob(f'{inputDir}/*') 30 | 31 | # Sort the files using the custom key 32 | files.sort(key=extract_numeric_prefix) 33 | 34 | darBytes = b'' 35 | 36 | for file in files: 37 | # Get header bytes 38 | fileHeader = getHashHex(os.path.basename(file)) 39 | fileHeadBytes = bytes.fromhex(fileHeader)[::-1] 40 | print(fileHeadBytes.hex()) 41 | with open(file, 'rb') as f: 42 | data = f.read() 43 | f.close() 44 | darBytes += fileHeadBytes + struct.pack("I", len(data)) + data 45 | 46 | with open(darFileName, 'wb') as f: 47 | f.write(darBytes) 48 | f.close -------------------------------------------------------------------------------- /StageDirTools/callsInStageDirFinder.py: -------------------------------------------------------------------------------- 1 | import os, struct 2 | import argparse 3 | import json 4 | 5 | import progressbar 6 | bar = progressbar.ProgressBar() 7 | 8 | # filename = "radioDatFiles/STAGE-usa-d1.DIR" 9 | 10 | freqList = [ 11 | b'\x37\x05', # 140.85, Campbell 12 | b'\x37\x10', # 140.96, Mei Ling 13 | b'\x36\xbf', # 140.15, Meryl 14 | b'\x37\x20', # 141.12, Otacon 15 | b'\x37\x48', # 141.52, Nastasha 16 | b'\x37\x64', # 141.80, Miller 17 | b'\x36\xE0', # 140.48, Deepthroat 18 | b'\x36\xb7', # 140.07, Staff, Integral exclusive 19 | b'\x36\xbb', 20 | b'\x36\xbc', 21 | bytes.fromhex('36bb'), 22 | bytes.fromhex('36bc'), # 140.12, ???? 23 | b'\x37\xac', # 142.52, Nastasha? ACCIDENT 24 | ] 25 | 26 | # This dict will have {stageOffset: [ callOffset int, hexstr ] } to be updated later. 27 | offsetDict: dict[int, tuple[int, str]] = {} 28 | filesize = 0 29 | stageData = b'' 30 | debug = False 31 | outputFileToggle = False 32 | 33 | def checkFreq(offset): 34 | global stageData 35 | 36 | if stageData[offset + 1 : offset + 3] in freqList: 37 | return True 38 | else: 39 | return False 40 | 41 | def writeCall(offset): 42 | global stageData 43 | global freqList 44 | global outputFileToggle 45 | 46 | callHex = stageData[offset + 4: offset + 8].hex() 47 | callInt = str(struct.unpack('>L', b'\x00' + stageData[offset + 5: offset + 8])[0]) 48 | offsetDict.update({offset: (callInt, callHex)}) 49 | 50 | # Write to output file: 51 | if outputFileToggle: 52 | 53 | writeString = f'{offset},' # Offset in stage.dir 54 | writeString += stageData[offset: offset + 4].hex() + "," # Offset of the frequency as it appears in hex 55 | writeString += str(struct.unpack('>h', stageData[offset + 1: offset + 3])[0]) + "," # Call Frequency 56 | writeString += f'{callHex},{callInt},\n' # offset (hex, int) of call in Radio.dat 57 | output.write(writeString) 58 | 59 | # For now this will just get all offsets of radio calls in the stage.dir and write a CSV file with the relevent offsets. 60 | def getCallOffsets(): 61 | global filesize 62 | global bar 63 | 64 | offset = 0 65 | bar.maxval = filesize 66 | bar.start() 67 | 68 | while offset < filesize: 69 | # Check for \x01 first, then check for a call 70 | if stageData[offset].to_bytes() == b'\x01' and stageData[offset + 3].to_bytes() == b'\x0a': # After running without this, seems all call offsets DO have 0x0a in the 4th byte 71 | if checkFreq(offset): # We only write the call to the csv if the call matches a frequency, this check might not be needed....? 72 | # Optional print, this is still useful for progress I guess 73 | # print(f'Offset {offset} has a possible call!\n====================================\n') 74 | writeCall(offset) 75 | offset += 1 # No matter what we increase offset in all scenarios 76 | bar.update(offset) 77 | bar.finish() 78 | 79 | 80 | def main(args=None): 81 | global stageData 82 | global filesize 83 | global outputFileToggle 84 | 85 | stageData = stageDir.read() # The byte stream is better to use than the file on disk if you can. 86 | filesize = len(stageData) 87 | 88 | # Write csv header 89 | output.write('offset,call hex,frequency,call data offset\n') 90 | 91 | # Main used to just be getting the call offsets 92 | getCallOffsets() 93 | print('Finished checking for calls in STAGE.DIR!') 94 | output.close() 95 | 96 | with open("callOffsetDict.json", 'w') as f: 97 | f.write(json.dumps(offsetDict)) 98 | f.close 99 | 100 | if __name__ == "__main__": 101 | 102 | # We should get args from user. Using argParse 103 | parser = argparse.ArgumentParser(description=f'Search a GCX file for RADIO.DAT codec calls') 104 | # REQUIRED 105 | parser.add_argument('filename', type=str, help="The GCX file to Search. Can be RADIO.DAT or a portion of it.") 106 | parser.add_argument('output', nargs="?", type=str, help="Output Filename (.txt)") 107 | 108 | args = parser.parse_args() 109 | 110 | # Args parsed 111 | filename: str = args.filename 112 | 113 | stageName = filename.split('/')[-2] 114 | stageFile = filename.split('/')[-1].split(".")[0] 115 | 116 | print(f'{stageName}/{stageFile}') 117 | 118 | if args.output: 119 | outputFile = args.output 120 | outputFileToggle = True 121 | else: 122 | outputFile = f'stageAnalysis-jpn/{stageName}-{stageFile}.csv' 123 | 124 | stageDir = open(filename, 'rb') 125 | output = open(outputFile, 'w') 126 | 127 | main() 128 | 129 | def init(filename: str): 130 | global filesize 131 | global stageData 132 | 133 | stageDir = open(filename, 'rb') 134 | stageData = stageDir.read() 135 | filesize = len(stageData) 136 | 137 | if debug: 138 | print(offsetDict) 139 | 140 | print(f'Getting STAGE.DIR call offsets... please be patient!') 141 | getCallOffsets() 142 | 143 | print('Finished checking for calls in STAGE.DIR! Ready to proceed.') -------------------------------------------------------------------------------- /StageDirTools/extractDar.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple script that exports all .pcx images in a .dar archive. 3 | """ 4 | import struct 5 | import os 6 | import argparse 7 | 8 | def parse_dar_file(file_path, output_dir): 9 | with open(file_path, 'rb') as f: 10 | # Read and parse the header (example assumes header contains number of files) 11 | num_files = struct.unpack(' str: 79 | self.filename = f'{self.nameChecksum}.{extTable.get(self.fileType)}' 80 | return self.filename 81 | 82 | def getBlocks(self): 83 | if self.fileFamily == 0x63: 84 | self.numBlocks = 0 85 | else: 86 | self.numBlocks = self.size // 0x800 + 1 87 | 88 | def __str__(self): 89 | global filename 90 | filename = extTable.get(self.fileType) 91 | printText = f'File: {self.nameChecksum}.{filename}\n\tOffset: {self.start}\n\tSize: {self.size}' 92 | return printText 93 | 94 | def getStagesInBin(): 95 | global stageData 96 | 97 | size = struct.unpack("I", stageData[0:4])[0] 98 | tableOfConts = {} # List of stages and their blocks (start, end) 99 | stageList = [] 100 | 101 | offset = 4 102 | while offset < size: 103 | newStage = stageContents() 104 | # Get the data 105 | stageName = stageData[offset:offset + 8].decode('utf8').rstrip('\x00') 106 | location = struct.unpack("I", stageData[offset + 8: offset + 12])[0] 107 | locationEnd = struct.unpack("I", stageData[offset + 20: offset + 24])[0] 108 | if locationEnd == 0: 109 | locationEnd = len(stageData) // 0x800 110 | 111 | # Old: 112 | tableOfConts.update({stageName: (location, locationEnd)}) 113 | 114 | # Add attributes to stage object: 115 | newStage.name = stageName 116 | newStage.startBlock = location 117 | newStage.endBlock = locationEnd 118 | newStage.binaryData = stageData[location * 0X800: locationEnd * 0X800] 119 | 120 | # Add to list 121 | stageList.append(newStage) 122 | offset += 12 123 | 124 | return stageList 125 | 126 | allStages = getStagesInBin() 127 | 128 | def getStage(name: str): 129 | global allStages 130 | for stage in allStages: 131 | if stage.name == name: 132 | return stage 133 | print(f'Error! "{name}" is not a valid stage name! Exiting... ') 134 | exit(1) 135 | 136 | """for key in tableOfConts.keys(): 137 | print(f'{key}: {tableOfConts.get(key)}')""" 138 | 139 | def printStageOffsets() -> None: 140 | for stage in allStages: 141 | print(f'{stage.name}: ({stage.startBlock}, {stage.endBlock})') 142 | return 143 | 144 | def extractStageBins(): 145 | """ 146 | Writes individual stage binaries to individual folders/files 147 | """ 148 | for stage in allStages: 149 | os.makedirs(f"{outputFolder}{stage.name}") 150 | with open(f'{outputFolder}{stage.name}/{stage.name}.bin', 'wb') as f: 151 | f.write(stage.binaryData) 152 | print(f'Stage {stage.name} written!') 153 | 154 | def getStageFiles(fileListBin: bytes) -> list: 155 | """ 156 | This gets the list of files in the stage. Alternatively, can be called 157 | from within the stage 158 | """ 159 | offset = 4 160 | stageFiles = [] 161 | blockOffset = 1 # Always 1, never seen a table of contents longer than 0x800 bytes. 162 | 163 | while offset < len(fileListBin): 164 | currentFile = stageFile() 165 | # We can loop to see which type of file we hit and add it. 166 | if fileListBin[offset : offset + 8] == bytes(8): # Reached end of the contents 167 | if debug: 168 | print(f'Reached end of list! Breaking...') 169 | break 170 | elif fileListBin[offset + 2] == 0x63: # Handling C files... 171 | stageCFiles = [] 172 | cfileHeaders = [] 173 | while fileListBin[offset + 3] != 0xFF: 174 | cfileHeaders.append(fileListBin[offset: offset + 8]) 175 | offset += 8 176 | cfileHeaders.reverse() 177 | cfileEnd = struct.unpack("I", fileListBin[offset + 4: offset + 8])[0] # Used to track the end of a file, as these are crunched together. 178 | cFileBlocks = cfileEnd // 0x800 + 1 179 | for header in cfileHeaders: 180 | currentCFile = stageFile() 181 | currentCFile.nameChecksum = header[0:2][::-1].hex() 182 | currentCFile.fileFamily = header[2] 183 | currentCFile.fileType = header[3] 184 | currentCFile.startBlock = blockOffset # Doesnt mean anything for the cfiles. 185 | # Start, end, size 186 | currentCFile.end = cfileEnd 187 | currentCFile.start = (blockOffset * 0x800) + struct.unpack("I", header[4:8])[0] 188 | currentCFile.size = (blockOffset * 0x800) + currentCFile.end - currentCFile.start 189 | 190 | # We update this for the next loop 191 | cfileEnd = currentCFile.start 192 | # Add to sub list, which is then reversed and added to stageFiles 193 | stageCFiles.append(currentCFile) 194 | 195 | # Before exiting c family loop, add the total blocks 196 | blockOffset += cFileBlocks 197 | 198 | # Add the C files to stage files 199 | stageCFiles.reverse() 200 | for file in stageCFiles: 201 | stageFiles.append(file) 202 | # Offset stil at the cfile total, ok to add 8 bytes. 203 | else: 204 | tocEntry = fileListBin[offset : offset + 8] 205 | currentFile.nameChecksum = tocEntry[0:2][::-1].hex() 206 | currentFile.fileFamily = tocEntry[2] 207 | currentFile.fileType = tocEntry[3] 208 | 209 | # Now the dicey bits... 210 | currentFile.size = struct.unpack("I", tocEntry[4:8])[0] 211 | currentFile.startBlock = blockOffset 212 | currentFile.start = blockOffset * 0x800 213 | currentFile.end = currentFile.start + currentFile.size 214 | 215 | # Update the file blocks (blocks is how many blocks of 0x800 size it needs) 216 | fileBlocks = currentFile.size // 0x800 + 1 217 | blockOffset += fileBlocks 218 | 219 | # Add to the files list 220 | stageFiles.append(currentFile) 221 | # After each file, we increase offset by 8 222 | offset += 8 223 | 224 | # Optional debug output of the file. 225 | """if debug: # Took this out for now. 226 | for file in stageFiles: 227 | print(file)""" 228 | 229 | return stageFiles 230 | 231 | def printStageFiles(): 232 | for stage in allStages: 233 | print(stage) 234 | 235 | 236 | """ 237 | Next step: Write block and file size calcs. 238 | Then: The file exports on a per-stage basis. 239 | """ 240 | 241 | def exportStageFiles(stageName: str, file:str=None) -> None: 242 | pass 243 | 244 | if __name__ == "__main__": 245 | 246 | exportFileData: bytes = None 247 | 248 | stageSelect = input('Which stage do you want to list files from? \n') 249 | stage = getStage(stageSelect) 250 | files: list [stageFile] = getStageFiles(stage.binaryData[0:0x800]) 251 | for file in files: 252 | print(file) 253 | fileToExport = input(f'Which file from stage {stageSelect} do you want to export? [ALL exports all files!]\n') 254 | 255 | # Ensure output directory exists 256 | os.makedirs(f"{outputFolder}{stageSelect}", exist_ok=True) 257 | 258 | for file in files: 259 | file.getFilename() 260 | if file.filename == fileToExport: 261 | exportFileData = stage.binaryData[file.start: file.end] 262 | break 263 | 264 | if fileToExport == "ALL": 265 | i = 0 266 | for file in files: 267 | exportFileData = stage.binaryData[file.start: file.end] 268 | with open(f'{outputFolder}{stageSelect}/{stageSelect}-{i:02}-{file.filename}', 'wb') as f: 269 | f.write(exportFileData) 270 | f.close() 271 | i += 1 272 | elif exportFileData == None: 273 | print(f'Export failed! {fileToExport} was not found in stage {stageSelect}! Exiting...') 274 | exit(2) 275 | else: 276 | with open(f'{outputFolder}{stageSelect}/{fileToExport}', 'wb') as f: 277 | f.write(exportFileData) 278 | f.close() 279 | 280 | exit(0) -------------------------------------------------------------------------------- /audioTools/sub-test-2.py: -------------------------------------------------------------------------------- 1 | # Subtitle test 2 2 | 3 | import json 4 | import time 5 | 6 | # Load JSON data 7 | with open('workingFiles/vag-testing.json', 'r') as file: 8 | dialogue_data = json.load(file) 9 | 10 | # Extract dialogues from the nested structure 11 | dialogues = [] 12 | for key1, value1 in dialogue_data.items(): 13 | for key2, value2 in value1.items(): 14 | for key3, dialogue in value2.items(): 15 | start_frame = int(dialogue['startFrame']) 16 | display_frames = int(dialogue['displayFrames']) 17 | text = dialogue.get('text', '') 18 | dialogues.append((start_frame, display_frames, text)) 19 | 20 | # Sort dialogues by start frame 21 | dialogues.sort(key=lambda x: x[0]) 22 | 23 | # Simulate frame counting and display subtitles 24 | current_frame = 0 25 | while True: 26 | # Clear the screen (works in Unix-like systems) 27 | print("\033[H\033[J", end="") 28 | 29 | # Track which dialogues are currently active 30 | active_dialogues = [] 31 | 32 | # Check if there are any dialogues to display at the current frame 33 | for start_frame, display_frames, text in dialogues: 34 | if start_frame <= current_frame < start_frame + display_frames: 35 | active_dialogues.append(text) 36 | 37 | # Print all active dialogues 38 | for text in active_dialogues: 39 | print(text) 40 | 41 | # Increment the frame counter 42 | current_frame += 1 43 | 44 | # Simulate frame rate (30 fps) 45 | time.sleep(1/30) 46 | 47 | # Break condition to stop the loop after a certain number of frames or other criteria 48 | if current_frame > 200: # Adjust this condition as needed 49 | break 50 | -------------------------------------------------------------------------------- /audioTools/subtitle display test.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import json 3 | 4 | # Initialize Pygame 5 | pygame.init() 6 | 7 | # Screen dimensions 8 | screen_width, screen_height = 800, 600 9 | screen = pygame.display.set_mode((screen_width, screen_height)) 10 | pygame.display.set_caption("Dialogue Display") 11 | 12 | # Load JSON data 13 | with open('workingfiles/vag-testing.json', 'r') as file: 14 | dialogue_data = json.load(file) 15 | 16 | # Font for displaying text 17 | font = pygame.font.Font(None, 36) 18 | fps = 30 19 | clock = pygame.time.Clock() 20 | 21 | # Function to draw text on the screen 22 | def draw_text(text, x, y): 23 | surface = font.render(text, True, (255, 255, 255)) 24 | screen.blit(surface, (x, y)) 25 | 26 | # Main loop 27 | running = True 28 | current_frame = 0 29 | 30 | while running: 31 | for event in pygame.event.get(): 32 | if event.type == pygame.QUIT: 33 | running = False 34 | 35 | # Clear the screen 36 | screen.fill((0, 0, 0)) 37 | 38 | # Determine which dialogue to show based on current frame 39 | for key1, value1 in dialogue_data.items(): 40 | for key2, value2 in value1.items(): 41 | for key3, dialogue in value2.items(): 42 | start_frame = int(dialogue['startFrame']) 43 | display_frames = int(dialogue['displayFrames']) 44 | 45 | if start_frame <= current_frame < (start_frame + display_frames): 46 | draw_text(dialogue['text'], 100, 100) 47 | 48 | # Update the screen 49 | pygame.display.flip() 50 | 51 | # Increment frame and control the frame rate 52 | current_frame += 1 53 | clock.tick(fps) 54 | 55 | # Quit Pygame 56 | pygame.quit() 57 | -------------------------------------------------------------------------------- /audioTools/vagAudioTools.py: -------------------------------------------------------------------------------- 1 | import ffmpeg 2 | import subprocess, os 3 | 4 | filename = "" 5 | tempDir = "/tmp" 6 | 7 | def splitVagFile(filename, leftChanFilename, rightChanFilename): 8 | # Check if the file is a VAG file 9 | with open(filename, 'rb') as f: 10 | data = f.read() 11 | header = data[:0x40] 12 | oldSize = int.from_bytes(header[12:16], 'big') 13 | newSize = (oldSize // 2).to_bytes(4, 'big') 14 | 15 | leftChannelData = header[16:] 16 | rightChannelData = header[16:] 17 | 18 | for i in range(0x40, len(data), 0x2000): 19 | leftChannelData += data[i:i+0x1000] 20 | rightChannelData += data[i+0x1000:i+0x2000] 21 | 22 | with open(leftChanFilename, 'wb') as f: 23 | f.write(header[0:12]) 24 | f.write(newSize) 25 | f.write(leftChannelData) 26 | 27 | with open(rightChanFilename, 'wb') as f: 28 | f.write(header[0:12]) 29 | f.write(newSize) 30 | f.write(rightChannelData) 31 | 32 | def convert_vag_to_wav(input_path, output_path): 33 | ( 34 | ffmpeg 35 | .input(input_path, f='vag') 36 | .output(output_path) 37 | .overwrite_output() 38 | .run() 39 | ) 40 | 41 | def convert_stereo_vag_to_wav(left_vag, right_vag, output_wav): 42 | # Set inputs separately to obects 43 | try: 44 | left = ffmpeg.input(left_vag, f='vag') 45 | right = ffmpeg.input(right_vag, f='vag') 46 | ffmpeg.filter([left, right], 'join', inputs=2, channel_layout='stereo').output(output_wav, acodec='pcm_s16le').overwrite_output().run() 47 | except ffmpeg.Error as e: 48 | print('FFmpeg error:', e.stderr.decode()) 49 | 50 | 51 | def play_with_ffplay(wav_file): 52 | try: 53 | print(subprocess.run(['ffplay', wav_file, "-nodisp", "-autoexit"])) 54 | except subprocess.SubprocessError as e: 55 | print(e) 56 | 57 | def playVagFile(filename: str) -> str: 58 | """ 59 | Automatically plays vag file, regardless of format. Returns the full path of the 60 | """ 61 | global tempDir 62 | with open(filename, 'rb') as f: 63 | magic = f.read(4) 64 | if magic == b'VAGp': 65 | print(f'File {filename} is MONO! Not playing!') 66 | convert_vag_to_wav(filename, f"{tempDir}/temp.wav") 67 | elif magic == b'VAGi': 68 | # Interleaved file! Play separately. 69 | splitVagFile(filename, f"{tempDir}/temp-L.vag", f"{tempDir}/temp-R.vag") 70 | convert_stereo_vag_to_wav(f"{tempDir}/temp-L.vag", f"{tempDir}/temp-R.vag", f"{tempDir}/temp.wav") 71 | # Cleanup 72 | # os.remove(f"{tempDir}/temp-L.wav") 73 | # os.remove(f"{tempDir}/temp-R.wav") 74 | else: 75 | print(f'ERROR! File was not valid VAG file. Magic: 0x{magic.hex()} // {magic}') 76 | return -1 77 | 78 | # File is ready, play it!!! 79 | play_with_ffplay(f"{tempDir}/temp.wav") 80 | os.remove(f"{tempDir}/temp.wav") 81 | return 0 82 | 83 | 84 | def main(): 85 | # TESTING AREA 86 | convert_stereo_vag_to_wav("workingFiles/vag-examples/testLeft.vag", "workingFiles/vag-examples/testRight.vag", "workingFiles/vag-examples/newFile.wav") 87 | play_with_ffplay("workingFiles/vag-examples/newFile.wav") 88 | 89 | if __name__ == "__main__": 90 | main() -------------------------------------------------------------------------------- /common/structs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Commonly used structures for MGS dialogue lines will go here. 3 | 4 | demoSub is one of the big ones as the similar thing is used in demo, vox, zmovie. 5 | """ 6 | import sys, struct, os 7 | sys.path.append(os.path.abspath('./myScripts')) 8 | 9 | import translation.radioDict as RD 10 | 11 | 12 | class subtitle: 13 | text: str 14 | startFrame: int 15 | duration: int 16 | 17 | def __init__(self, dialogue_or_bytes, b = None, c = None) -> None: 18 | if type(dialogue_or_bytes) == bytes: 19 | length, start, duration = struct.unpack("III", rawBytes[0:12]) 20 | self.text = dialogue_or_bytes[16:].strip(bytes.fromhex("00")) 21 | self.startFrame = int(start) 22 | self.duration = int(duration) 23 | elif type(dialogue_or_bytes) == str: 24 | self.text = dialogue_or_bytes 25 | self.startFrame = int(b) 26 | self.duration = int(c) 27 | 28 | return 29 | 30 | # def __init__(self, rawBytes: bytes) -> None: 31 | # length, start, duration = struct.unpack("III", rawBytes[0:12]) 32 | # self.text = rawBytes[16:].strip(bytes.fromhex("00")) 33 | # self.startFrame = int(start) 34 | # self.duration = int(duration) 35 | 36 | # return 37 | 38 | def __str__(self) -> str: 39 | a = f'Subtitle contents: Start: {self.startFrame} Duration: {self.duration} Text: {self.text}' 40 | return a 41 | 42 | def __bytes__(self) -> bytes: 43 | """ 44 | Simple. Encodes the dialogue as bytes. 45 | Adds the buffer we need to be divisible by 4... 46 | Return the new bytes. 47 | """ 48 | subtitleBytes: bytes = struct.pack("III", self.startFrame, self.duration, 0) 49 | subtitleBytes += RD.encodeJapaneseHex(self.text)[0] 50 | bufferNeeded = 4 - (len(subtitleBytes) % 4) 51 | subtitleBytes += bytes(bufferNeeded) 52 | 53 | return subtitleBytes -------------------------------------------------------------------------------- /creditsHacking/decryptionDiagram.md: -------------------------------------------------------------------------------- 1 | 01 FF 2 | 3 | C7 01 4 | FF X 0x48 times (72) 5 | 6 | 01 EE 7 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffee 8 | 9 | 84 01 10 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeee 11 | 12 | 87 07 13 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeeffffeeeeeeeeee 14 | 15 | 16 | 01 FE 17 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeeffffeeeeeeeeeefe 18 | 19 | 83 07 20 | ffeeee 21 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeeffffeeeeeeeeeefeffeeee 22 | 23 | C8 58 0x48 added, from 58 back 24 | 25 | 00 26 | 27 | C7 01 28 | 29 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeeffffeeeeeeeeee 30 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeefeffeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff 31 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeee 32 | -------------------------------------------------------------------------------- /creditsHacking/imageComparison.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Fixed this to use the correct path for the script based on OS 4 | if uname == "Darwin"; then 5 | SCRIPT="python3 /Users/solidmixer/projects/mgs1-undub/myScripts/creditsHacking/imageEncoder.py" 6 | else 7 | SCRIPT="python3 /home/solidmixer/projects/mgs1-undub/myScripts/creditsHacking/imageEncoder.py" 8 | fi 9 | 10 | echo "" > creditsHacking/output/recreatedPalletes.txt 11 | 12 | # Run the script on all the images 13 | for file in $(ls -1 creditsHacking/output/images/*.tga); do 14 | echo "Running $file through script..." 15 | $SCRIPT $file >> creditsHacking/output/recreatedPalletes.txt 16 | done 17 | 18 | # Compare the blocks generated 19 | for file in $(ls -1 creditsHacking/output/blocks/*.txt); do 20 | BASENAME=$(basename $file) 21 | if diff $file creditsHacking/output/verification/$BASENAME; then 22 | echo "Block $BASENAME is the same" 23 | else 24 | echo "Block $BASENAME is different" 25 | fi 26 | done -------------------------------------------------------------------------------- /creditsHacking/imhex patterns 00eae8rar.txt: -------------------------------------------------------------------------------- 1 | 2 | // image 1 3 | u8 image00[0x422C] @ 0x2C; 4 | 5 | // image 2 0x00004258 6 | u8 image01[0x2914] @ 0x4280; 7 | 8 | // image 3 9 | u8 image02[0x6950] @ 0x6BBC; 10 | 11 | // image 4 12 | u8 image03[0x5ad4] @0xD534; 13 | 14 | // image 5 15 | u8 image04[0x3190] @0x13030; -------------------------------------------------------------------------------- /creditsHacking/lz77-test.py: -------------------------------------------------------------------------------- 1 | def lz77_compress(data, window_size=128): 2 | """ 3 | Compresses data using a simple LZ77 algorithm. 4 | This was created by chatgpt. I wanted to see if we could 5 | replicate the lz77 compression used on graphics. As it stands, 6 | its very similar but the imlimentation is likely different. 7 | """ 8 | compressed = [] 9 | i = 0 10 | 11 | while i < len(data): 12 | # Look for the longest match in the sliding window 13 | match_distance = 0 14 | match_length = 0 15 | 16 | for j in range(max(0, i - window_size), i): 17 | length = 0 18 | while (i + length < len(data) and 19 | data[j + length] == data[i + length] and 20 | length < 255): # Limit match length 21 | length += 1 22 | 23 | if length > match_length: 24 | match_distance = i - j 25 | match_length = length 26 | 27 | # Add match or literal to the compressed output 28 | if match_length > 1: 29 | # (distance, length, next character) 30 | next_char = data[i + match_length] if i + match_length < len(data) else None 31 | compressed.append((match_distance, match_length, next_char)) 32 | i += match_length + 1 33 | else: 34 | # Literal (distance=0, length=0, char) 35 | compressed.append((0, 0, data[i].to_bytes().hex())) 36 | i += 1 37 | 38 | return compressed 39 | 40 | # Example: Compress 160 bytes of random data 41 | data = bytes.fromhex('ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f21fe9f11fb14000059000110fe3e60ff8fb2ffffffffcf0300d49f010040ff5f40ffcf0100b3ffffff3c0092ff6f0030fb6fd2ffff19f9150051fdffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff') 42 | 43 | compressed = lz77_compress(data) 44 | 45 | # Print the compressed data 46 | print("Compressed Data:") 47 | for entry in compressed: 48 | print(entry) -------------------------------------------------------------------------------- /creditsHacking/lzss-test.py: -------------------------------------------------------------------------------- 1 | def lzss_compress(data, window_size=128, lookahead_buffer_size=128): 2 | """ 3 | Compress a bytes object using a simple LZSS algorithm. 4 | 5 | Args: 6 | data (bytes): The data to compress. 7 | window_size (int): The size of the sliding window. 8 | lookahead_buffer_size (int): The size of the lookahead buffer. 9 | 10 | Returns: 11 | list[tuple]: The compressed data as a list of (offset, length, next_byte) tuples. 12 | """ 13 | compressed = [] 14 | i = 0 15 | 16 | while i < len(data): 17 | match_distance = 0 18 | match_length = 0 19 | 20 | # Sliding window start 21 | start_window = max(0, i - window_size) 22 | 23 | # Look for the longest match in the sliding window 24 | for j in range(start_window, i): 25 | length = 0 26 | while (length < lookahead_buffer_size and 27 | i + length < len(data) and 28 | data[j + length] == data[i + length]): 29 | length += 1 30 | 31 | if length > match_length: 32 | match_distance = i - j 33 | match_length = length 34 | 35 | # If a match is found, add it as a (distance, length, next byte) tuple 36 | if match_length > 1: 37 | next_byte = data[i + match_length] if i + match_length < len(data) else None 38 | compressed.append((match_distance, match_length, next_byte)) 39 | i += match_length + 1 40 | else: 41 | # Add a literal (distance=0, length=0, next_byte) 42 | compressed.append((0, 0, data[i])) 43 | i += 1 44 | 45 | return compressed 46 | 47 | # Example usage 48 | data = bytes.fromhex('ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeefeffefeeffffefeeeeeeeeeeeeeefeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff') 49 | compressed_data = lzss_compress(data) 50 | print("Compressed Data:") 51 | for entry in compressed_data: 52 | print(entry) -------------------------------------------------------------------------------- /creditsHacking/newCompressionTest.py: -------------------------------------------------------------------------------- 1 | import os, struct 2 | from PIL import Image 3 | import numpy as np 4 | # from creditsHacking.creditsHacking import imageData 5 | import argparse 6 | 7 | debug = True 8 | 9 | def compressLine(data: bytes) -> bytes: 10 | 11 | global debug 12 | 13 | def findNextPatternOrRepeat(data: bytes, index: int) -> int: 14 | """ 15 | Finds how many bytes starting at index 16 | until either we repeat the same byte 4x 17 | or the next 4 bytes are a repeated pattern 18 | """ 19 | count = 0 20 | checkLength = 3 21 | while True: 22 | patternCheck = data[index + count: index + count + checkLength] 23 | if len(set(patternCheck)) == 1 or data[:index + count].find(patternCheck) != -1: 24 | break 25 | else: 26 | count += 1 27 | 28 | return count 29 | 30 | def getLongestRepeat(data: bytes, index: int) -> int: 31 | """ 32 | Get the longest repeated character starting at index. 33 | """ 34 | 35 | before = data[:index] 36 | after = data[index:] 37 | 38 | count = 0 39 | while count < min(128, len(after)): 40 | if len(set(after[:count + 1])) == 1: 41 | count += 1 42 | else: 43 | break 44 | 45 | # print(f'{count} bytes were repeated following index {index}' ) 46 | return count 47 | 48 | def getLongestPattern(data: bytes, index: int) -> tuple [int, int]: 49 | """ 50 | For the index, return the longest pattern starting there that 51 | appears earlier in the data and how far back to go. 52 | """ 53 | before = data[:index] 54 | after = data[index:] 55 | 56 | count = 0 57 | while count < len(after): 58 | if before.find(after[:count + 1]) != -1: 59 | count += 1 60 | else: 61 | break 62 | 63 | distance = abs(len(before) - before.rfind(after[:count])) 64 | 65 | return distance, count 66 | 67 | """ 68 | Actual compression Def starts here. 69 | 70 | """ 71 | 72 | compressedData = b'' 73 | i = 0 74 | 75 | while i < len(data): 76 | newBytes = b'' 77 | repeatCount = getLongestRepeat(data, i) 78 | distance, patternLen = getLongestPattern(data, i) 79 | 80 | if patternLen >= repeatCount and patternLen > 1: 81 | if data[i - 1] == data[i]: 82 | newBytes += (repeatCount + 0x80).to_bytes() + int(1).to_bytes() 83 | i += repeatCount 84 | else: 85 | newBytes += (patternLen + 0x80).to_bytes() + distance.to_bytes() 86 | i += patternLen 87 | elif repeatCount > 3: 88 | if data[i - 1] == data[i] and i != 0: 89 | newBytes += (repeatCount + 0x80).to_bytes() + int(1).to_bytes() 90 | i += repeatCount 91 | else: 92 | newBytes += int(1).to_bytes() + data[i].to_bytes() 93 | newBytes += (repeatCount - 1 + 0x80).to_bytes() + int(1).to_bytes() 94 | i += repeatCount 95 | else: 96 | newString = findNextPatternOrRepeat(data, i) 97 | if newString < 1: 98 | newString = 1 99 | newBytes += newString.to_bytes() + data[i : i + newString] 100 | i += newString 101 | if debug: 102 | print(f'Compressed data: {newBytes.hex(sep=' ')}') 103 | 104 | compressedData += newBytes 105 | 106 | compressedData += bytes.fromhex('00') 107 | 108 | return compressedData 109 | 110 | 111 | line = bytes.fromhex('ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff') 112 | 113 | print(compressLine(line).hex(sep=' ')) 114 | 115 | """ 116 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff 117 | 6f 40 f2 0b 04 fb 04 aa ba ff bf 60 ff ef 41 0d f5 6f b0 118 | ffffffff 119 | 04fdeeff9f50abebff143ef3cf60ef04fcffff0483fdbf50ffdf505fc0ffff09f905fcef31ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff 120 | 121 | """ 122 | # Original 123 | # 01 FF B1 01 13 6F 40 F2 0B 04 FB 04 AA BA FF BF 60 FF EF 41 0D F5 6F B0 84 17 11 04 FD EE FF 9F 50 AB EB FF 14 3E F3 CF 60 EF 04 FC 83 13 11 83 FD BF 50 FF DF 50 5F C0 FF FF 09 F9 05 FC EF 31 B2 6E 00 124 | # 01 ff b1 01 13 6f 40 f2 0b 04 fb 04 aa ba ff bf 60 ff ef 41 0d f5 6f b0 84 17 81 11 10 fd ee ff 9f 50 ab eb ff 14 3e f3 cf 60 ef 04 fc 83 13 11 83 fd bf 50 ff df 50 5f c0 ff ff 09 f9 05 fc ef 31 b2 6e 00 125 | # mine -------------------------------------------------------------------------------- /creditsHacking/scra.py: -------------------------------------------------------------------------------- 1 | # MINE 2 | # 01 ff c7 01 0a ee ee ee fe ff ef ee ff ff ef 01 ee 86 01 82 0e c5 01 00 3 | # 01 FF C7 01 01 EE 07 EE EE FE FF EF EE FF 81 01 82 04 86 01 82 0E C5 01 00 4 | # 01 FF C6 01 13 EF 2B 00 40 EC FF 0C 60 FE FF 6D 02 72 ED 04 00 00 40 FE C6 59 00 5 | # 01 FF C6 01 13 BE 01 74 05 E2 FF 08 10 FE EF 06 76 04 E5 46 04 40 64 FE C6 59 00 6 | # 01 FF C6 01 13 4E 70 EE 8E E8 EF 03 06 E9 EF 30 EE 6E EB EE 0E E0 EE FE C6 59 00 7 | # 01 FF C6 01 02 0D D0 83 05 0C BF 50 08 E6 EF 02 D8 EE FE FF 0E E0 01 FF C7 01 00 8 | # 01 FF C6 01 02 0A E4 83 05 0C 6E 80 0D E1 EF 09 00 A5 EE FF 0E E0 01 FF C7 01 00 9 | # 01 FF C6 01 02 0A E4 83 05 0C 2E C0 5E 90 FF CE 26 00 E6 FF 0E E0 01 FF C7 01 00 10 | # 01 FF C6 01 02 0C E1 83 05 0C 0B 00 00 60 FE ED EE 08 C0 FF 0E E0 01 FF C7 01 00 11 | # 01 FF C6 01 11 2E 80 EE AE EB 06 43 44 11 BE A3 EE 4E A0 FF 0E E0 01 FF C7 01 00 12 | # 01 FF C6 01 11 9E 10 A7 18 D2 02 EC EE 06 D9 10 A8 08 D1 FF 0E E0 01 FF C7 01 00 13 | # 01 FF C6 01 11 EF 08 00 10 B9 50 FE EF 09 E6 18 00 00 E8 FF 0E E0 01 FF C7 01 00 14 | # 01 FF C7 01 01 EE 84 01 87 07 01 FE 83 07 C8 58 00 15 | 16 | # Original 17 | # 01 FF C7 01 08 EE EE EE FE FF EF EE FF 83 04 86 01 82 0E C5 01 00 18 | # 01 FF C6 01 13 EF 2B 00 40 EC FF 0C 60 FE FF 6D 02 72 ED 04 00 00 40 FE C6 59 00 19 | # 01 FF C6 01 13 BE 01 74 05 E2 FF 08 10 FE EF 06 76 04 E5 46 04 40 64 FE C6 59 00 20 | # 01 FF C6 01 13 4E 70 EE 8E E8 EF 03 06 E9 EF 30 EE 6E EB EE 0E E0 EE FE C6 59 00 21 | # 01 FF C6 01 02 0D D0 83 05 0C BF 50 08 E6 EF 02 D8 EE FE FF 0E E0 C7 58 01 FF 00 22 | # 01 FF C6 01 02 0A E4 83 05 0C 6E 80 0D E1 EF 09 00 A5 EE FF 0E E0 C7 58 01 FF 00 23 | # 01 FF C6 01 02 0A E4 83 05 0C 2E C0 5E 90 FF CE 26 00 E6 FF 0E E0 C7 58 01 FF 00 24 | # 01 FF C6 01 02 0C E1 83 05 0C 0B 00 00 60 FE ED EE 08 C0 FF 0E E0 C7 58 01 FF 00 25 | # 01 FF C6 01 11 2E 80 EE AE EB 06 43 44 11 BE A3 EE 4E A0 FF 0E E0 C7 58 01 FF 00 26 | # 01 FF C6 01 11 9E 10 A7 18 D2 02 EC EE 06 D9 10 A8 08 D1 FF 0E E0 C7 58 01 FF 00 27 | # 01 FF C6 01 11 EF 08 00 10 B9 50 FE EF 09 E6 18 00 00 E8 FF 0E E0 C7 58 01 FF 00 28 | # 01 FF C7 01 01 EE 84 01 87 07 01 FE 83 07 C8 58 00 29 | 30 | EE EE EE FE FF EF EE FF EF EE FF EF 31 | EE EE EE FE FF EF EE FF 32 | 83 04 33 | EF EE FF 34 | 86 01 35 | EF 36 | 37 | 38 | ee ee ee fe ff ef ee ff ff ef ee ee ee ee ee ee ee fe ff -------------------------------------------------------------------------------- /demoManager.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | from xml.dom.minidom import parseString 3 | import os, sys, json 4 | 5 | # Add subfolder(s) relative to the script location 6 | # script_dir = os.path.dirname(os.path.abspath(__file__)) 7 | # sys.path.insert(0, os.path.join(script_dir, 'radioTools')) 8 | # sys.path.insert(0, os.path.join(script_dir, 'demoTools')) 9 | # Assuming your submodule is in 'my-renamed-lib' 10 | # submodule_path = os.path.join(os.path.dirname(__file__), "tools", "myscripts", "radioTools") # Adjust path if needed 11 | # submodule_path = os.path.join(os.path.dirname(__file__), "tools", "myscripts") # Adjust path if needed 12 | # sys.path.insert(0, submodule_path) # Insert at the beginning to prioritize 13 | 14 | import demoClasses as demoCtrl 15 | # import tools.myscripts.translation.radioDict as RD 16 | # import tools.myscripts.translation.characters 17 | 18 | 19 | # 20 | demoDatData: bytes 21 | demoStructure: list [demoCtrl.demo] 22 | workingDemo: demoCtrl.demo 23 | 24 | # Testing Variables 25 | filename = "build-src/usa-d1/MGS/DEMO.DAT" 26 | # demoDatData = open(filename, "rb").read() 27 | outputFilename = "workingFiles/demoDat.xml" 28 | 29 | DEMO_HEADER: bytes = b'\x10\x08\x00\x00' 30 | DEMO_CHUNKSIZE: int = 0x800 31 | 32 | def findDemoOffsets(demoFileData: bytes, header: bytes, chunkSize: int): 33 | """ 34 | Modified from the original splitter. This now accepts chunk size and header. 35 | This should work for Demo, Vox, and Zmovie (Zmovie has different chunk size) 36 | """ 37 | offset = 0 38 | offsets = [] 39 | while offset < len(demoFileData): 40 | checkbytes = demoFileData[offset:offset + 4] 41 | if checkbytes == header: 42 | offsets.append(offset) 43 | offset += chunkSize # All demo files are aligned to 0x800, SIGNIFICANTLY faster to do this than +8! Credit to Green Goblin 44 | else: 45 | offset += chunkSize 46 | return offsets 47 | 48 | def parseDemoFile(demoDatData: bytes) -> dict [str, demoCtrl.demo]: 49 | demoOffsets = findDemoOffsets(demoDatData, DEMO_HEADER, DEMO_CHUNKSIZE) 50 | demos: dict [str, demoCtrl.demo] = {} 51 | for i in range(len(demoOffsets) - 1): 52 | demoData = demoDatData[demoOffsets[i]:demoOffsets[i + 1]] 53 | demos[str(demoOffsets[i])] = demoCtrl.demo(demoOffsets[i], demoData) 54 | demos[str(demoOffsets[-1])] = demoCtrl.demo(demoOffsets[-1], demoData) 55 | 56 | return demos 57 | # Add the final demo 58 | 59 | if __name__ == "__main__": 60 | # TESTING BRANCH 61 | print(f'This is a test!!!') 62 | 63 | 64 | import audioTools.vagAudioTools as VAG 65 | 66 | voxTestFilename = "workingFiles/usa-d1/demo/bins/demo-01.bin" 67 | # voxTestFilename = "workingFiles/usa-d1/vox/bins/vox-0035.bin" 68 | voxData = open(voxTestFilename, 'rb').read() 69 | vox = demoCtrl.demo(demoData=voxData) 70 | fileWritten = demoCtrl.outputVagFile(vox, 'demo-1', 'workingFiles/vag-examples/') 71 | print(f'Wrote file: {fileWritten}') 72 | 73 | jsonList = {} 74 | offset, subdata = vox.toJson() 75 | jsonList[offset] = subdata 76 | print(jsonList) 77 | 78 | VAG.playVagFile(fileWritten) 79 | 80 | # # JSON output 81 | # jsonList = {} 82 | # for demo in demos: 83 | # # Get demo json data here. 84 | # offset, subdata = demo.toJson() 85 | # jsonList[offset] = subdata 86 | 87 | with open("workingfiles/vag-testing.json", "w") as f: 88 | json.dump(jsonList, f, ensure_ascii=False, indent=2) 89 | 90 | 91 | """# XML Output 92 | allDemos = ET.Element("DemoDat") 93 | # allDemos.append(demos[0].structure) 94 | for demo in demos: 95 | allDemos.append(demo.structure) 96 | 97 | # TESTING BRANCH 98 | # testDemoExport = demos[1].structure 99 | xmlstr = parseString(ET.tostring(allDemos)).toprettyxml(indent=" ") 100 | xmlFile = open(outputFilename, 'w', encoding='utf8') 101 | xmlFile.write(xmlstr) 102 | xmlFile.close()""" 103 | 104 | """ 105 | stringOut = ET.tostring(testDemoExport, encoding='utf-8') 106 | parseString(stringOut) 107 | xmlstr = stringOut.toprettyxml(indent=" ") 108 | # xmlstr = parseString(ET.tostring(allDemos)).toprettyxml(indent=" " 109 | xmlFile = open(f'{outputFilename}.xml', 'wb') 110 | xmlFile.write(stringOut) 111 | xmlFile.close() 112 | """ -------------------------------------------------------------------------------- /graphicsExport/KanjiStillMissing.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drsparklegasm/mgs1-scripts/fba25e409c5ad49938ba0b6d60e1e48a9c37fc7a/graphicsExport/KanjiStillMissing.txt -------------------------------------------------------------------------------- /graphicsExport/contextList.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drsparklegasm/mgs1-scripts/fba25e409c5ad49938ba0b6d60e1e48a9c37fc7a/graphicsExport/contextList.txt -------------------------------------------------------------------------------- /insertVox.py: -------------------------------------------------------------------------------- 1 | """ 2 | This rough script inserts a different VAG audio file into a VOX file. 3 | """ 4 | 5 | # import demoManager as DM 6 | from demoClasses import * 7 | 8 | voxFilename = "" 9 | vagFilename = "" 10 | 11 | VAG_HEADER_LENGTH = 0x40 12 | 13 | originalDemo = demo(0, open(voxFilename, 'rb').read()) 14 | 15 | # Create the vag header: 16 | 17 | -------------------------------------------------------------------------------- /itemDescriptionFinder.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | """ 4 | This is a quick script to pull the item description areas out of the binary. 5 | We're working with the japanese version, SLPM-861.11 6 | 7 | Will add a re-injector later. For safety, will not excceed original length -1 8 | 9 | """ 10 | 11 | import os, struct, re, sys, json 12 | sys.path.append(os.path.abspath('./myScripts')) 13 | import translation.radioDict as RD 14 | 15 | execFilename = "/home/solidmixer/projects/mgs1-undub/build-src/usa-d1/MGS/SLUS_861.11" 16 | execFilename = "/home/solidmixer/projects/mgs1-undub/build-src/jpn-d1/MGS/SLPM_861.11" 17 | execData = open(execFilename, 'rb').read() 18 | 19 | outputJsonFilename = 'build-proprietary/itemDesc-jpn.json' 20 | newDescriptionjson = 'build-proprietary/itemDesc-inject.json' 21 | 22 | newBinaryFilename = 'build/jpn-d1/MGS/SLPM_861.11' 23 | 24 | # Load new data 25 | injectItemData: dict = json.load(open(newDescriptionjson, 'r')) 26 | 27 | def getOffsets(data: bytes) -> list [tuple [int, int]]: 28 | offset = 0 29 | offsets = [] 30 | while True: 31 | if execData[offset: offset + 2] != bytes.fromhex("B014") and offset < len(execData): 32 | offset += 2 33 | elif offset >= 0x2500: 34 | break 35 | else: 36 | endbyte = bytes.find(execData[offset:], b'\x00') 37 | endbyte = endbyte + (4 - (endbyte % 4)) 38 | print(f'{struct.pack(">I", offset).hex()}: {endbyte}') 39 | offsets.append((offset, endbyte, execData[offset: offset + endbyte])) 40 | offset += endbyte 41 | 42 | return offsets 43 | 44 | # Injection logic 45 | 46 | if __name__ == "__main__": 47 | # Turn it into a list 48 | offsets = [] 49 | for key in injectItemData.keys(): 50 | print(key) 51 | offsets.append(int(key)) 52 | newBinData = execData[:offsets[0]] 53 | # iterate through Keys... 54 | for i in range(len(offsets) - 1): 55 | length, data = injectItemData.get(str(offsets[i])) 56 | injectDesc = RD.encodeJapaneseHex(data) 57 | if len(injectDesc[0]) > length: 58 | print(f'ERROR! Offset {offsets[i]} is too long! Revise... Length = {length}, currently {len(injectDesc[0])}\n{data}') 59 | exit(2) 60 | else: 61 | newBinData += injectDesc[0] 62 | newBinData += bytes(1) * (length - len(injectDesc[0])) 63 | if len(newBinData) == offsets[i + 1]: 64 | continue 65 | else: 66 | newBinData += execData[offsets[i] + length: offsets[i + 1]] 67 | 68 | # Resolve final offset: 69 | length, data = injectItemData.get(str(offsets[-1])) 70 | injectDesc = RD.encodeJapaneseHex(data) 71 | if len(injectDesc[0]) > length: 72 | print(f'ERROR! Offset {offsets[-1]} is too long! Revise...\n{data}') 73 | exit(2) 74 | else: 75 | newBinData += injectDesc[0] 76 | newBinData += bytes(1) * (length - len(injectDesc[0])) 77 | 78 | # Finish the file 79 | newBinData += execData[offsets[-1] + length: ] 80 | 81 | if len(newBinData) == len(execData): 82 | print(f'Success!! Files have same length! Outputting new binary....') 83 | else: 84 | print(f'ERROR! New binary is a different length. Please check!') 85 | 86 | with open(newBinaryFilename, 'wb') as f: 87 | f.write(newBinData) 88 | f.close 89 | # End! 90 | exit(0) 91 | 92 | 93 | # Extractor logic 94 | 95 | """ 96 | if __name__ == "__main__": 97 | offset = 0 98 | offsets = getOffsets(execData[0:0x2500]) 99 | 100 | descriptions = {} 101 | for item in offsets: 102 | descriptions.update({item[0]: [item[1], RD.translateJapaneseHex(item[2]).strip('\x00')]}) 103 | 104 | with open(outputJsonFilename, 'w') as f: 105 | json.dump(descriptions, f, ensure_ascii=False)""" 106 | 107 | -------------------------------------------------------------------------------- /jsonTools.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a collection of methods to modify json files. 3 | 4 | Some english calls match the japanese ones enough to zip the english lines in with the japanese offsets. 5 | This will also help do other json modifications as needed. 6 | 7 | """ 8 | 9 | import os, sys, struct 10 | import argparse 11 | import json 12 | import xml.etree.ElementTree as ET 13 | from xml.dom.minidom import parseString 14 | 15 | # import xmlModifierTools as xmlin 16 | 17 | # flags 18 | debug = True 19 | 20 | # This should be the format moving forward 21 | newjson = { 22 | "calls": {}, 23 | "saves": {}, 24 | "freqAdd": {}, 25 | "prompts": {} 26 | } 27 | 28 | codecNames = { 29 | "メリル" : "MERYL", 30 | "キャンベル" : "CAMPBELL", 31 | "メイ・リン" : "MEI LING", 32 | "オタコン" : "OTACON", 33 | "マスター" : "MASTER", 34 | "ナスターシャ" : "NASTASHA", 35 | "ディープスロート" : "DEEPTHROAT", 36 | "STAFF" : "STAFF", 37 | } 38 | 39 | matchingCalls = { 40 | "0": "0", 41 | "505": "910", # Meryl call 42 | "26537": "42370", 43 | "38411": "59333", 44 | "41131": "63533", 45 | "94514": "105879", 46 | "179885": "177459", 47 | "293536": "283744", # 140.85 48 | } 49 | 50 | def replaceJsonText(callOffsetA: str, callOffsetB: str): 51 | """ 52 | Replaces the subtitles in jsonB with the subtitles from jsonA while keeping the offsets the same. 53 | Each Call Offset is the (original) call offset as seen in the key of the json format. 54 | """ 55 | global jsonContentA 56 | global jsonContentB 57 | global newjson 58 | 59 | newCallSubs = dict(zip(jsonContentB["calls"][callOffsetB].keys(), jsonContentA["calls"][callOffsetA].values())) 60 | jsonContentB[callOffsetB] = newCallSubs 61 | newjson["calls"].update({"0": newCallSubs}) 62 | 63 | def writeJsonToFile(outputFilename: str): 64 | """ 65 | Writes the new json file to output 66 | """ 67 | global newjson 68 | 69 | newCall = open(outputFilename, 'w') 70 | newCall.write(json.dumps(newjson)) 71 | newCall.close 72 | 73 | # test 74 | 75 | """ 76 | if __name__ == '__main__': 77 | """ 78 | # This one is for the whole json with all call information 79 | """ 80 | parser = argparse.ArgumentParser(description=f'Zip subtitles json offsets from another. \nUsage: main.py subs.json:callOffsetA offsets.json:callOffsetB [outputFilename.json]') 81 | 82 | # REQUIRED 83 | parser.add_argument('subsJson', type=str, help="json including the subtitles we want to zip, ex: filename.json:12345") 84 | parser.add_argument('offsetsJson', type=str, help="json including the offsets we want to zip, ex: filename.json:12345") 85 | # Optionals 86 | parser.add_argument('output', nargs="?", type=str, help="Output Filename (.json)") 87 | 88 | args = parser.parse_args() 89 | 90 | subsInFilename = args.subsJson.split(':')[0] 91 | offsetsInFilename = args.offsetsJson.split(':')[0] 92 | 93 | subsCall = args.subsJson.split(':')[1] 94 | offsetsCall = args.offsetsJson.split(':')[1] 95 | 96 | jsonContentA = json.load(open(subsInFilename, 'r')) 97 | jsonContentB = json.load(open(offsetsInFilename, 'r')) 98 | 99 | matchingCalls.update({subsCall: offsetsCall}) 100 | 101 | for key in matchingCalls: 102 | # If we need to do only one, you can do {"0": "0"} 103 | replaceJsonText(key, matchingCalls.get(key)) 104 | 105 | outputFilename = "recompiledCallBins/modifiedCall.json" 106 | writeJsonToFile(outputFilename) 107 | """ 108 | 109 | jsonA = open("recompiledCallBins/RADIO-usa-d1-Iseeva.json", 'r') 110 | jsonB = open("recompiledCallBins/RADIO-jpn-d1-Iseeva.json", 'r') 111 | 112 | outputFilename = 'recompiledCallBins/modifiedCalls.json' 113 | 114 | inputJson = json.load(jsonA) 115 | modJson = json.load(jsonB) 116 | 117 | # Def put calls together 118 | for call in inputJson['calls'].keys(): 119 | newSubs: dict = inputJson['calls'][call] 120 | destCall = matchingCalls.get(call) 121 | if destCall is None: 122 | continue 123 | newOffsets: dict = modJson['calls'][destCall] 124 | newCall = dict(zip(newOffsets.keys(), newSubs.values())) 125 | newjson['calls'][destCall] = newCall 126 | 127 | # Save file names (Dock, heliport, etc) 128 | # is coming out as unicode for some reason... 129 | newSaves: dict = next(iter(inputJson['saves'].values())) 130 | for save in modJson['saves'].keys(): 131 | newjson['saves'][save] = newSaves 132 | 133 | # Save options (SAVE / DO NOT SAVE) 134 | options: dict = next(iter(inputJson['prompts'].values())) 135 | for opt in modJson['prompts'].keys(): 136 | newjson['prompts'][opt] = options 137 | 138 | # Codec frequency names 139 | for name in modJson['freqAdd'].keys(): 140 | newName = codecNames.get(modJson['freqAdd'].get(name)) 141 | newjson['freqAdd'].update({name: newName}) 142 | 143 | """matches = zip(inputJson['calls'].keys(), modJson['calls'].keys()) 144 | for item in matches: 145 | print(item) 146 | 147 | zippedOffsets = { 148 | {'0': '0'}, 149 | {'505': '910'}, 150 | {'671': '1143'}, 151 | {'3326': '5833'}, 152 | {'11590': '20491'}, 153 | {'26537': '42370'}, 154 | {'26940': '43010'}, 155 | {'35756': '58410'}, 156 | {'38411': '59333'}, 157 | {'41131': '63533'}, 158 | {'43872': '67711'}, 159 | {'69134': '100554'}, 160 | {'69320': '101971'}, 161 | {'94514': '105879'}, 162 | {'97082': '109999'}, 163 | {'122241': '113848'}, 164 | {'126772': '118092'}, 165 | {'129537': '122322'}, 166 | {'179885': '177459'}, 167 | {'182648': '181774'}, 168 | {'229267': '261556'}, 169 | {'271289': '272965'}, 170 | {'282526': '278558'}, 171 | {'287169': '283744'}, 172 | {'293536': '285449'}, 173 | {'294379': '288512'}, 174 | {'295704': '291664'}, 175 | {'298506': '295376'}, 176 | {'301847': '297431'}, 177 | {'302703': '298504'}, 178 | {'303179': '300837'}, 179 | {'304549': '303506'}, 180 | {'305678': '308961'}, 181 | {'308325': '309595'}, 182 | {'308699': '311129'}, 183 | {'309558': '315529'}, 184 | {'311736': '333438'}, 185 | {'321920': '345760'}, 186 | {'328919': '346636'}, 187 | {'329312': '353110'}, 188 | {'332769': '354836'}, 189 | {'333817': '357134'}, 190 | {'335020': '360648'}, 191 | {'337120': '364457'}, 192 | {'338913': '370275'}, 193 | {'342170': '373157'}, 194 | {'343739': '373922'}, 195 | {'344231': '382452'}, 196 | {'348314': '384737'}, 197 | {'349360': '387533'}, 198 | {'350796': '392774'}, 199 | {'353441': '404827'}, 200 | {'360036': '411172'}, 201 | {'363076': '420535'}, 202 | {'367351': '424648'}, 203 | {'369290': '435981'}, 204 | {'376677': '437859'}, 205 | {'377402': '438879'}, 206 | {'378647': '440301'}, 207 | {'379873': '442271'}, 208 | {'380406': '442769'}, 209 | {'381165': '443783'}, 210 | {'382244': '444479'}, 211 | {'382543': '444665'}, 212 | {'383629': '500848'}, 213 | {'384599': '501542'}, 214 | {'384716': '502253'}, 215 | {'432135': '503414'}, 216 | {'432611': '507374'}, 217 | {'433194': '511743'}, 218 | {'433843': '516278'}, 219 | {'436675': '521476'}, 220 | {'439333': '522080'}, 221 | {'442577': '523100'}, 222 | {'446412': '523558'}, 223 | {'446777': '528381'}, 224 | {'447310': '537194'}, 225 | {'448486': '541583'}, 226 | {'451001': '542961'}, 227 | {'456523': '553357'}, 228 | {'458986': '578094'}, 229 | {'459677': '613767'}, 230 | {'466198': '655215'}, 231 | {'489546': '705182'}, 232 | {'516272': '746540'}, 233 | {'549024': '759232'}, 234 | {'591025': '764099'}, 235 | {'623761': '835965'}, 236 | {'631792': '838702'}, 237 | {'634999': '842905'}, 238 | {'699738': '846865'}, 239 | {'701246': '850748'}, 240 | {'703536': '853313'}, 241 | {'705785': '857004'}, 242 | {'707757': '862252'}, 243 | {'709148': '864340'}, 244 | {'711264': '864934'}, 245 | {'713923': '865585'}, 246 | {'714032': '871462'}, 247 | {'714385': '871740'}, 248 | {'714712': '872031'}, 249 | {'717673': '872900'}, 250 | {'717843': '873258'}, 251 | {'718013': '873491'}, 252 | {'718622': '874414'}, 253 | {'718803': '874749'}, 254 | {'718943': '875056'}, 255 | {'719453': '876158'}, 256 | {'719679': '876792'}, 257 | {'719832': '878450'}, 258 | {'720492': '878723'}, 259 | {'720677': '878944'}, 260 | {'721483': '880788'}, 261 | {'721707': '881135'}, 262 | {'721832': '882585'}, 263 | {'722648': '885749'}, 264 | {'722890': '886218'}, 265 | {'723789': '886375'}, 266 | {'725700': '886608'}, 267 | {'726078': '886801'}, 268 | {'726179': '887492'}, 269 | {'726304': '973398'}, 270 | {'726461': '975232'}, 271 | {'726815': '976020'}, 272 | {'770767': '978487'}, 273 | {'807721': '979250'}, 274 | {'808970': '980047'}, 275 | {'809469': '980510'}, 276 | {'811155': '1002600'}, 277 | {'811467': '1004940'}, 278 | {'811916': '1005403'}, 279 | {'812065': '1010981'}, 280 | {'831617': '1012874'}, 281 | {'832981': '1013171'}, 282 | {'833159': '1049352'}, 283 | {'836088': '1085533'}, 284 | {'836918': '1121714'}, 285 | {'837068': '1157895'}, 286 | {'867121': '1194076'}, 287 | {'897174': '1230257'}, 288 | {'927227': '1266438'}, 289 | {'957280': '1302619'}, 290 | {'987333': '1303528'}, 291 | {'1017386': '1306954'}, 292 | {'1047439': '1307107'}, 293 | {'1077492': '1308686'}, 294 | {'1077803': '1309090'}, 295 | {'1079275': '1310166'}, 296 | {'1079502': '1310397'}, 297 | {'1080525': '1310526'}, 298 | {'1080808': '1311009'}, 299 | {'1081462': '1311570'}, 300 | {'1081608': '1312516'}, 301 | {'1081700': '1312937'}, 302 | {'1081866': '1313084'}, 303 | {'1082197': '1315869'}, 304 | {'1082768': '1316466'}, 305 | {'1083011': '1322167'}, 306 | {'1083177': '1322481'}, 307 | {'1084424': '1325742'}, 308 | {'1084770': '1328330'}, 309 | {'1087728': '1329057'}, 310 | {'1087971': '1356954'}, 311 | {'1089994': '1357344'}, 312 | {'1091343': '1367614'}, 313 | {'1091760': '1371192'}, 314 | {'1111668': '1373669'}, 315 | {'1111875': '1374442'}, 316 | {'1119995': '1375262'}, 317 | {'1121944': '1375619'}, 318 | {'1123640': '1382995'}, 319 | {'1123962': '1383704'}, 320 | {'1124391': '1384423'}, 321 | {'1124548': '1389728'}, 322 | {'1128535': '1392298'}, 323 | {'1128893': '1392479'}, 324 | {'1129204': '1393925'}, 325 | {'1129641': '1423837'}, 326 | {'1130932': '1470339'}, 327 | {'1131055': '1519040'}, 328 | {'1131722': '1572335'}, 329 | {'1152131': '1629598'}, 330 | {'1191276': '1692839'}, 331 | {'1234214': '1758627'}, 332 | {'1283463': '1821547'}, 333 | {'1323905': '1879728'}, 334 | {'1371648': '1911349'}, 335 | {'1416778': '1942970'}, 336 | {'1467474': '1974591'}, 337 | {'1510221': '2006212'}, 338 | {'1532572': '2037833'}, 339 | {'1554923': '2069454'}, 340 | {'1577274': '2101075'}, 341 | {'1599625': '2132696'}, 342 | {'1621976': '2136524'}, 343 | {'1644327': '2141301'}, 344 | {'1666678': '2145475'}, 345 | {'1689029': '2150117'}, 346 | {'1690966': '2179852'}, 347 | {'1693308': '2191515'}, 348 | {'1695364': '2195443'}, 349 | {'1697474': '2200316'}, 350 | {'1717658': '2204609'}, 351 | {'1725045': '2209223'}, 352 | {'1727078': '2213285'}, 353 | {'1729279': '2216663'}, 354 | {'1731618': '2221409'}, 355 | {'1734005': '2225332'}, 356 | {'1735898': '2276631'}, 357 | {'1737574': '2277523'}, 358 | {'1739868': '2278723'}, 359 | {'1741893': '2280299'}, 360 | {'1773680': '2280829'}, 361 | } 362 | """ 363 | 364 | writeJsonToFile(outputFilename) -------------------------------------------------------------------------------- /quickTranslate.py: -------------------------------------------------------------------------------- 1 | import translation.radioDict as RD 2 | 3 | text = "" 4 | textToPrint = RD.translateJapaneseHex(bytes.fromhex(text), callDict = {} ) 5 | print(textToPrint) 6 | -------------------------------------------------------------------------------- /radioModule.py: -------------------------------------------------------------------------------- 1 | import RadioDatTools as RDT 2 | import xml.etree.ElementTree as ET 3 | import os, sys, json 4 | 5 | class radioDataEditor(): 6 | radioXMLData: ET.Element 7 | calls: list[ET.Element] 8 | workingCall: ET.Element 9 | workingVox: ET.Element 10 | 11 | 12 | def __init__(self) -> None: 13 | """ 14 | Initialize the class. Load the radio data as persistent for reading/editing. 15 | """ 16 | self.radioXMLData = None 17 | self.calls = [] 18 | self.workingCall = None 19 | self.workingVox = None 20 | pass 21 | 22 | def loadRadioXmlFile(self, filename: str) -> None: 23 | try: 24 | self.radioXMLData = ET.parse(filename).getroot() 25 | self.calls = self.radioXMLData.findall("Call") 26 | except FileNotFoundError: 27 | print(f"Error: File not found: {filename}") 28 | self.radioXMLData = None 29 | except ET.ParseError: 30 | print(f"Error: Could not parse XML File {filename}. Ensure we've loaded an XML file created from RadioDatTools.") 31 | self.radioXMLData = None 32 | # Done 33 | return 34 | 35 | def setWorkingCall(self, offset: str): 36 | for call in self.calls: 37 | if call.get("offset") == offset: 38 | self.workingCall = call 39 | print(f'RDE: Working call was set to offset {offset}') 40 | break 41 | pass 42 | 43 | def setWorkingVox(self, offset: str): 44 | # self.workingVox = self.workingCall.find(f".//VOX_CUES[@offset='{offset}']") 45 | voxes = self.workingCall.findall(f".//VOX_CUES") 46 | for vox in voxes: 47 | if vox.get("offset") == offset: 48 | self.workingVox = vox 49 | print(f'VOX {offset} identified and selected') 50 | break 51 | pass 52 | 53 | def getCallOffsets(self) -> list[str]: 54 | """ 55 | Returns call offsets found 56 | """ 57 | callOffsets = [] 58 | for callElem in self.calls: 59 | callOffsets.append(callElem.get("offset")) 60 | return callOffsets 61 | 62 | def getCall(self, offset: int) -> ET.Element: 63 | """ 64 | Returns the call element for a given offset. 65 | If offset does not exist, throw error! 66 | """ 67 | try: 68 | call = self.radioXMLData.find(f".//Call[@offset='{offset}']") 69 | except Exception as e: 70 | print(f'Error: {e}') 71 | return call 72 | 73 | def getVoxOffsets(self) -> list[str]: 74 | """ 75 | Returns a list of Vox elements in the call. 76 | """ 77 | try: 78 | voxList = [] 79 | audios = self.workingCall.findall(f".//VOX_CUES") 80 | for vox in audios: 81 | voxList.append(vox.get("offset")) 82 | except Exception as e: 83 | print(f'Error: {e}') 84 | return voxList 85 | 86 | def getSubs(self) -> list[str]: 87 | """ 88 | Returns a list of Subtitles elements in the VOX element. 89 | """ 90 | dialogue = [] 91 | for sub in self.workingVox.findall("SUBTITLE"): 92 | dialogue.append(sub.get("text")) 93 | return dialogue 94 | 95 | def replaceVox(newVoxElem: ET.Element) -> None: 96 | """ 97 | Replaces the modified element into the element tree (Radio Data) 98 | """ 99 | pass -------------------------------------------------------------------------------- /radioTools/__init__.py: -------------------------------------------------------------------------------- 1 | # radioTools/__init__.py 2 | PACKAGE_NAME = "Radio Tools" 3 | VERSION = "1.0" -------------------------------------------------------------------------------- /radioTools/callExtactor.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | """ 3 | This script is to extact a specific radio call to another file. 4 | I find it's easier to study the raw hex on a subset of it, rather than the whole file. 5 | Have offsets ready or refer to the bin file. Modify the filename for your Radio.dat file. 6 | """ 7 | 8 | import os, struct, re, argparse 9 | 10 | filename = "RADIO-jpn.DAT" 11 | """ 12 | def __init__(self, radioFilename: str) -> None: 13 | os.makedirs("Extracted-Calls", 755, exist_ok=True) 14 | if not os.path.exists(filename): 15 | print(f'File {radioFilename} does not exist! Check path and try again.\n') 16 | else: 17 | radioFile = open(radioFilename, 'rb') 18 | radioData = radioFilename.read() 19 | print(f'Exporter Ready!') 20 | 21 | """ 22 | 23 | 24 | 25 | 26 | def main(filename, offset, length): 27 | 28 | print("Please provide offsets for the call in decimal forrmat (not hex)!") 29 | 30 | # Get in/out from user 31 | startOffset = offset 32 | endOffset = offset + length 33 | outputFile = str(offset) + '.bin' 34 | 35 | radioFile = open(filename, 'rb') 36 | output = open(outputFile, 'wb') 37 | 38 | # fileSize = len(radioFile) 39 | 40 | radioFile.seek(startOffset) 41 | output.write(radioFile.read(endOffset - startOffset)) 42 | 43 | output.close() 44 | return 45 | 46 | 47 | def splitCall(offset: int, length: int) -> None: 48 | global radioData 49 | splitCall = radioData[offset:offset+length] 50 | filename = str(offset) + '.bin' 51 | f = open(filename, 'wb') 52 | f.write(splitCall) 53 | f.close() 54 | return 0 55 | 56 | 57 | if __name__ == '__main__': 58 | """parser = argparse.ArgumentParser(description=f'Parse a binary file for Codec call GCL. Ex. script.py ') 59 | 60 | # REQUIRED 61 | parser.add_argument('offset', type=int, help="Offset of the start of the") 62 | parser.add_argument('output', type=str, help="Output Filename (.txt)") 63 | """ 64 | main("", int(), int()) 65 | # main("radioDatFiles/RADIO-usa-d1.DAT", int(26213), int(324)) -------------------------------------------------------------------------------- /radioTools/callInsertor.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | """ 3 | This script is to extact a specific radio call to another file. 4 | I find it's easier to study the raw hex on a subset of it, rather than the whole file. 5 | Have offsets ready or refer to the bin file. Modify the filename for your Radio.dat file. 6 | """ 7 | 8 | import os, struct, re, argparse 9 | 10 | filename = "14085-testing/RADIO-jpn-d1.DAT" 11 | updatedCall = "283744-new-mod.bin" 12 | offset = 283744 13 | 14 | radioFile = open(filename, 'rb') 15 | radioData = radioFile.read() 16 | 17 | newCallFile = open(updatedCall, 'rb') 18 | newCallData = newCallFile.read() 19 | 20 | newFileData = radioData[0: offset] 21 | newFileData += newCallData 22 | newFileData += radioData[ len(newFileData) : len(radioData) ] 23 | 24 | if len(newFileData) == len(radioData): 25 | print(f'Success! Files are the same length') 26 | newFile = open('14085-testing/RADIO.DAT-modified', 'wb') 27 | newFile.write(newFileData) 28 | newFile.close 29 | else: 30 | print(f'File lengths differ! New: {len(newFileData)}, old: {len(radioData)}') 31 | 32 | 33 | def splitCall(offset: int, length: int) -> None: 34 | global radioData 35 | splitCall = radioData[offset:offset+length] 36 | filename = str(offset) + '.bin' 37 | f = open(filename, 'wb') 38 | f.write(splitCall) 39 | f.close() 40 | return 0 41 | 42 | -------------------------------------------------------------------------------- /radioTools/jsonToCSV.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import json 3 | 4 | jsonData = json.load(open('recompiledCallBins/RADIO-jpn-d1-Iseeva.json', 'r')) 5 | 6 | with open('callDialogue.csv', 'w') as f: 7 | f.write(f'offset, dialogue\n') 8 | for key in jsonData["calls"].keys(): 9 | callDict: dict = jsonData["calls"][key] 10 | for key in callDict: 11 | f.write(f'{key},{callDict.get(key)}\n') 12 | 13 | f.close() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ProgressBar 2 | Pillow 3 | ffmpeg-python 4 | -------------------------------------------------------------------------------- /testing/compareDemos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script runs the python script recursively, first to export all calls, then translate individual calls 4 | 5 | input_dir='demoWorkingDir/usa/bins' 6 | output_dir='demoWorkingDir/usa/newBins' 7 | 8 | same_count=0 9 | different_count=0 10 | 11 | for original in "$input_dir"/*; do 12 | base_filename=$(basename "$original" .bin) 13 | if diff "$original" "$output_dir/$base_filename.bin" >/dev/null; then 14 | # echo "Files are the same: $original" 15 | ((same_count++)) 16 | else 17 | echo "Files are different: $original" 18 | ((different_count++)) 19 | fi 20 | done 21 | 22 | echo "Total files that are the same: $same_count" 23 | echo "Total files that are different: $different_count" 24 | -------------------------------------------------------------------------------- /testing/convertImage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Directory containing the TGA files 4 | input_dir="graphicsExport" 5 | output_dir="graphicsExport/output" 6 | 7 | # Create output directory if it doesn't exist 8 | mkdir -p "$output_dir" 9 | 10 | # Loop through all TGA files in the input directory 11 | for input_tga in "$input_dir"/*.tga; do 12 | # Extract the base filename without extension 13 | base_filename=$(basename "$input_tga" .tga) 14 | 15 | # Set the output PNG and text file paths 16 | output_png="$output_dir/$base_filename.png" 17 | output_txt="$output_dir/$base_filename" 18 | 19 | # Convert the TGA file to PNG using ImageMagick 20 | # convert "$input_tga" -resize 300% -colorspace Gray -contrast-stretch 0 "$output_png" 21 | convert "$input_tga" "$output_png" 22 | # Perform OCR using Tesseract with Japanese language data and additional options 23 | # tesseract "$output_png" "$output_dir/$base_filename" -l jpn --psm 6 24 | # tesseract "$output_png" $output_txt -l jpn --psm 6 25 | # echo "Printed image $output_txt\n" 26 | done 27 | -------------------------------------------------------------------------------- /testing/demoBinChecker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | oldBinDir="demoWorkingDir/usa/bins/" 4 | newBinDir="demoWorkingDir/usa/newBins/" 5 | 6 | for file in "$oldBinDir"/*; do 7 | BASENAME=$(basename $file) 8 | diff "$oldBinDir$BASENAME" "$newBinDir$BASENAME" 9 | done 10 | 11 | -------------------------------------------------------------------------------- /testing/exportAndAnalyze.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script runs the python script recursively, first to export all calls, then translate individual calls 4 | 5 | SCRIPT="./myScripts/RadioDatTools.py" 6 | RADIODAT="radioDatFiles/RADIO-usa-d1.DAT" 7 | input_dir='extractedCallBins/usa-d1' 8 | 9 | python3 $SCRIPT $RADIODAT Headers -sH 10 | 11 | for input in "$input_dir"/*.bin; do 12 | base_filename=$(basename "$input" .bin) 13 | output="$input_dir/$base_filename-decrypted" 14 | 15 | python3 $SCRIPT $input $output -xz 16 | 17 | done -------------------------------------------------------------------------------- /testing/extractALLmaterials.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This is the extraction area: 4 | # /bin/python3 /home/solidmixer/projects/mgs1-undub/myScripts/RadioDatTools.py -jzx build-src/jpn-d1/MGS/RADIO.DAT radioWorkingDir/jpn-d1/RADIO-d1 5 | 6 | # VOX editing here! 7 | 8 | # This area compiles the new DEMO.DAT and adds it to the disk image (D1) 9 | 10 | # # Extracting and automating translation (disk 1) 11 | # /bin/python3 /home/solidmixer/projects/mgs1-undub/myScripts/RadioDatTools.py -jzx build-src/jpn-d1/MGS/RADIO.DAT radioWorkingDir/jpn-d1/RADIO 12 | 13 | # This area re-compiles a RADIO file for jpn 14 | 15 | # Move all files into the build folder. 16 | 17 | mkpsxiso build/jpn-d1/rebuild.xml -o mgsJpnMod-d1.bin -c mgsJpnMod-d1.cue -y 18 | # mkpsxiso build/jpn-d2/rebuild.xml -o mgsJpnMod-d2.bin -c mgsJpnMod-d2.cue -y 19 | flatpak run org.duckstation.DuckStation mgsJpnMod-d1.cue 20 | 21 | -------------------------------------------------------------------------------- /testing/extractAllCalls.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script runs the python script recursively, first to export all calls, then translate individual calls 4 | 5 | SPLITSCRIPT="myScripts/RadioDatTools.py" 6 | RECOMPILESCRIPT="myScripts/RadioDatRecompiler.py" 7 | RADIODAT="radioDatFiles/$1" 8 | input_dir="extractedCallBins/$2" 9 | # output_dir="recompiledCallBins/$2" 10 | 11 | rm $input_dir/* 12 | # rm $output_dir/* 13 | 14 | same_count=0 15 | different_count=0 16 | 17 | python3 $SPLITSCRIPT $RADIODAT Headers -s 18 | 19 | for input in "$input_dir"/*.bin; do 20 | base_filename=$(basename "$input" .bin) 21 | # echo $base_filename 22 | output="$input_dir/$base_filename" 23 | python3 $SPLITSCRIPT $input $output -xz 24 | done 25 | 26 | echo "Total files that are the same: $same_count" 27 | echo "Total files that are different: $different_count" 28 | 29 | # rm $output_dir/*.log 30 | -------------------------------------------------------------------------------- /testing/findEndings.py: -------------------------------------------------------------------------------- 1 | import os, struct, re 2 | import radioDict # May remove later 3 | import argparse 4 | 5 | offset = 0 6 | 7 | radioFile = open("radioDatFiles/RADIO-usa-d1.DAT", 'rb') 8 | radioData = radioFile.read() 9 | 10 | def getLength(offset: int): 11 | length = struct.unpack('>H', radioData[offset + 2:offset + 4])[0] + 2 12 | return length 13 | 14 | while offset < len(radioData): 15 | if radioData[offset:offset+2] == bytes.fromhex("ff10") or radioData[offset:offset+2] == bytes.fromhex("ff11"): 16 | length = getLength(offset) 17 | print(f'{offset}, {length}, {offset + length}, \'{radioData[offset + length - 6: offset + length].hex()}\', \'{radioData[offset + length: offset + length + 4].hex()}\'') 18 | offset += 1 -------------------------------------------------------------------------------- /testing/goblin.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | setlocal 3 | 4 | REM This script runs the python script recursively, first to export all calls, then translate individual calls 5 | 6 | set SCRIPT=myScripts\RadioDatToolsXMLoutput.py 7 | set RADIODAT=%1 8 | set input_dir=extractedCallBins 9 | 10 | python %SCRIPT% %RADIODAT% Headers.txt -sH 11 | 12 | for %%f in (%input_dir%\*.bin) do ( 13 | set base_filename=%%~nf 14 | set output=%input_dir%\%base_filename%-decrypted.txt 15 | 16 | python myScripts\RadioDatTools.py %%f %output% -zx 17 | ) 18 | 19 | endlocal -------------------------------------------------------------------------------- /testing/incorrectRecompileCheck.py: -------------------------------------------------------------------------------- 1 | """ 2 | Compares two binary files and reports back the offset where the file breaks 3 | """ 4 | 5 | import os, struct 6 | # import translation.radioDict as radioDict 7 | import argparse 8 | 9 | # Start by parsing old and new files 10 | 11 | parser = argparse.ArgumentParser("Compare two binary files and figure out where they differ") 12 | 13 | parser.add_argument('input', type=str, help="Input Filename from script (.bin).") 14 | parser.add_argument('output', type=str, help="Output Filename from script (.bin).") 15 | parser.add_argument('-a','--allDiffs', action='store_true', help="Prints all errors (as opposed to breaking at the first one)") 16 | 17 | args = parser.parse_args() 18 | 19 | ########################################## 20 | if args.output: 21 | originalFile = open(args.input, 'rb') 22 | originalData = originalFile.read() 23 | else: 24 | originalFile = open(f'extractedCallBins/{args.input}.bin', 'rb') 25 | originalData = originalFile.read() 26 | 27 | if args.output: 28 | compareFile = open(args.output, 'rb') 29 | compareData = compareFile.read() 30 | else: 31 | compareFile = open(f'recompiledCallBins/{args.input}-mod.bin', 'rb') 32 | compareData = compareFile.read() 33 | 34 | print(f'Original file: {len(originalData)} bytes. New file: {len(compareData)} bytes') 35 | 36 | # Main comparison loop 37 | offset = 0 38 | 39 | if len(originalData) > len(compareData): 40 | size = len(originalData) 41 | print("Original Data is larger!") 42 | elif len(compareData) > len(originalData): 43 | size = len(compareData) 44 | print("New Data is larger!") 45 | else: 46 | print("The files are equal size!") 47 | size = len(compareData) 48 | 49 | while offset < size: 50 | if originalData[offset] == compareData[offset]: 51 | offset += 4 52 | elif originalData[offset : offset + 2] == bytes.fromhex("9016") and compareData[offset : offset + 2] == bytes.fromhex("d016"): 53 | print(f'Character mismatch! {originalData[offset : offset + 2]} {compareData[offset : offset + 2]} ') 54 | offset += 2 55 | else: 56 | differ = True 57 | print(f"Files break at offset {offset}") 58 | offsetHex = struct.pack('>L', offset) 59 | print(f'Offset in hex: 0x{offsetHex.hex()}') 60 | print(f'Original: \n{originalData[offset - 10 : offset + 10].hex()}') 61 | print(f'New Data: \n{compareData[offset - 10 : offset + 10].hex()}') 62 | offset += 1 63 | if not args.allDiffs: 64 | break 65 | print(f"Checking offset = {offset}\r") 66 | 67 | 68 | -------------------------------------------------------------------------------- /testing/patternChecker.py: -------------------------------------------------------------------------------- 1 | import os, struct 2 | from datetime import datetime 3 | import radioDict 4 | import argparse 5 | import xml.etree.ElementTree as ET 6 | 7 | radioFile = "radioDatFiles/RADIO-usa-d1.DAT" 8 | radioData = open(radioFile, 'rb').read() 9 | 10 | size = len(radioData) 11 | patterns = {} 12 | 13 | def getLength(offset: int) -> int: # Returns COMMAND length, offset must be at the 0xff bytes, length is bytes 1 and 2. 14 | global radioData 15 | 16 | lengthBytes = radioData[offset + 2: offset + 4] 17 | length = struct.unpack('>H', lengthBytes)[0] 18 | return length + 2 19 | 20 | def getLengthManually(offset: int) -> int: 21 | length = 0 22 | while True: 23 | length += 1 24 | if radioData[offset + length].to_bytes() == b'\xff' and radioData[offset + length - 3].to_bytes() == b'\x80': 25 | return length 26 | 27 | pattern = 'ff10' 28 | command = bytes.fromhex(pattern) 29 | 30 | offset = 0 31 | 32 | while offset < size: 33 | if radioData[offset : offset + 2] == command: 34 | header = getLengthManually(offset) 35 | line = radioData[offset : offset + header] 36 | 37 | # print(f'Offset: {offset}, Header: {header}') 38 | lengthA = getLength(offset) 39 | lengthB = getLength(offset + header - 4) 40 | lABytes = radioData[offset + 2: offset + 4].hex() 41 | lBBytes = radioData[offset + header - 2: offset + header].hex() 42 | """ 43 | print(lengthA) 44 | print(lengthB) 45 | print(lABytes) 46 | print(lBBytes) 47 | """ 48 | 49 | if lengthA == lengthB + header - 3: 50 | print(f'FF10 at offset {offset} length matched!') 51 | else: 52 | elseOffset = offset + header + lengthB - 4 53 | if radioData[elseOffset: elseOffset + 2] in [bytes.fromhex("ff11"), bytes.fromhex("ff12")]: 54 | # print(f'0x{radioData[elseOffset: elseOffset + 2].hex()}') 55 | elseLength = getLengthManually(elseOffset) 56 | print(f"FF10 at offset {offset} has a subclause. Else statement matched!") 57 | # else: 58 | # print(f'MO MATCH! 0x{radioData[elseOffset: elseOffset + 2].hex()}') 59 | # print(f"FF10 at offset {offset} has a subclause. Else statement WAS NOT MATCHED! \n\tBytes: {radioData[elseOffset : elseOffset + 5].hex()}") 60 | 61 | offset += 1 62 | 63 | for line in patterns: 64 | print(line) 65 | 66 | #print(patterns) 67 | 68 | -------------------------------------------------------------------------------- /testing/radioDatUSAChecker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DEPRECATED! Need to update pathing. 4 | python3 myScripts/RadioDatTools.py radioDatFiles/RADIO-usa-d1.DAT -zx 5 | python3 myScripts/RadioDatRecompiler.py RADIO-usa-d1-output.xml RADIO-usa-d1-recomp.DAT -x 6 | python3 myScripts/incorrectRecompileCheck.py radioDatFiles/RADIO-usa-d1.DAT RADIO-usa-d1-recomp.DAT -------------------------------------------------------------------------------- /testing/runJpnBuildTest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Rebuild japanese iso and launch in duckstation 4 | # Argument parser by chatGPT 5 | 6 | set -e # Exit if we hit a script error. 7 | 8 | # Parse arguments 9 | SKIP_EXTRACTION=false 10 | SKIP_GRAPHICS=false 11 | SKIP_VOX=false 12 | SKIP_DEMO=false 13 | SKIP_RADIO=false 14 | 15 | 16 | while [[ "$#" -gt 0 ]]; do 17 | case $1 in 18 | --skip-extraction) 19 | SKIP_EXTRACTION=true 20 | shift 21 | ;; 22 | --skip-graphics) 23 | SKIP_GRAPHICS=true 24 | shift 25 | ;; 26 | --skip-vox) 27 | SKIP_VOX=true 28 | shift 29 | ;; 30 | --skip-demo) 31 | SKIP_DEMO=true 32 | shift 33 | ;; 34 | --skip-radio) 35 | SKIP_RADIO=true 36 | shift 37 | ;; 38 | --help) 39 | echo "Usage: $0 [--skip-extraction] [--skip-graphics] [--skip-vox] [--skip-demo]" 40 | exit 0 41 | ;; 42 | *) 43 | echo "Unknown option: $1" 44 | exit 1 45 | ;; 46 | esac 47 | done 48 | 49 | # Graphics Injection Step 50 | if [ "$SKIP_GRAPHICS" = false ]; then 51 | echo "Injecting graphics data..." 52 | # Inject graphics data (STAGE.DIR) for disk 1 ONLY for now 53 | echo "Inject D1 with ninja..." 54 | wine goblin-tools/ninja.exe -i /home/solidmixer/projects/mgs1-undub/workingFiles/jpn-d1/stage/ -pack -o /home/solidmixer/projects/mgs1-undub/workingFiles/jpn-d1/stage/STAGE-j1.DIR -img 1>/dev/null 55 | # Disk 2 temp disable 56 | # echo "Inject D2 with ninja..." 57 | # wine goblin-tools/ninja.exe -i /home/solidmixer/projects/mgs1-undub/workingFiles/jpn-d2/stage/ -pack -o stageGraphicsWorking/out/STAGE-j2.DIR -img >/dev/null 58 | echo "New Stage.dir files created." 59 | fi 60 | sleep 2 61 | 62 | # VOX Editing Step 63 | if [ "$SKIP_VOX" = false ]; then 64 | echo "Processing VOX data..." 65 | python3 myScripts/voxTools/voxTextInjector.py 66 | python3 myScripts/voxTools/voxRejoiner.py 67 | fi 68 | sleep 2 69 | 70 | # Demo Compilation Step 71 | if [ "$SKIP_DEMO" = false ]; then 72 | echo "Compiling new DEMO.DAT..." 73 | python3 myScripts/DemoTools/demoTextInjector.py 74 | python3 myScripts/DemoTools/demoRejoiner.py 75 | fi 76 | sleep 2 77 | 78 | # # Extracting and automating translation (disk 1) 79 | # /bin/python3 /home/solidmixer/projects/mgs1-undub/myScripts/RadioDatTools.py -jzx build-src/jpn-d1/MGS/RADIO.DAT radioWorkingDir/jpn-d1/RADIO 80 | 81 | if [ "$SKIP_RADIO" = false ]; then 82 | # This area re-compiles a RADIO file for jpn 83 | # use Programatic replacement 84 | python3 build-proprietary/radio/dialogueSwap.py 85 | python3 myScripts/xmlModifierTools.py inject workingFiles/jpn-d1/radio/injected-Iseeva.json workingFiles/jpn-d1/radio/RADIO.xml 86 | # python3 myScripts/RadioDatRecompiler.py -p radioWorkingDir/jpn-d1/RADIO-merged.xml radioWorkingDir/jpn-d1/new-RADIO.DAT -s build-src/jpn-d1/MGS/STAGE.DIR -S radioWorkingDir/jpn-d1/new-STAGE.DIR 87 | python3 myScripts/RadioDatRecompiler.py -p workingFiles/jpn-d1/radio/RADIO-merged.xml workingFiles/jpn-d1/radio/new-RADIO.DAT -s workingFiles/jpn-d1/stage/STAGE-j1.DIR -S workingFiles/jpn-d1/stage/new-STAGE.DIR 88 | fi 89 | sleep 2 90 | 91 | echo "Moving files into position" 92 | # Move all files into the build folder. 93 | # rm build/jpn-d1/MGS/RADIO.DAT 94 | cp -v workingFiles/jpn-d1/radio/new-RADIO.DAT build/jpn-d1/MGS/RADIO.DAT 95 | # rm build/jpn-d1/MGS/STAGE.DIR 96 | cp -v workingFiles/jpn-d1/stage/new-STAGE.DIR build/jpn-d1/MGS/STAGE.DIR 97 | # rm build/jpn-d1/MGS/DEMO.DAT 98 | cp -v workingFiles/jpn-d1/demo/new-DEMO.DAT build/jpn-d1/MGS/DEMO.DAT 99 | # rm build/jpn-d1/MGS/VOX.DAT 100 | cp -v workingFiles/jpn-d1/vox/new-VOX.DAT build/jpn-d1/MGS/VOX.DAT 101 | # 102 | 103 | echo "READY TO BUILD ISO!" 104 | sleep 2 105 | 106 | mkpsxiso build/jpn-d1/rebuild.xml -o mgsJpnMod-d1.bin -c mgsJpnMod-d1.cue -y 107 | # mkpsxiso build/jpn-d2/rebuild.xml -o mgsJpnMod-d2.bin -c mgsJpnMod-d2.cue -y 108 | if [ $(uname) = "Linux" ]; then 109 | flatpak run org.duckstation.DuckStation mgsJpnMod-d1.cue >/dev/null 2>&1 ; 110 | elif [ $(uname) = "Darwin" ]; then 111 | /Applications/DuckStation.app/Contents/MacOS/DuckStation mgsJpnMod-d1.cue >/dev/null 2>&1 ; 112 | fi -------------------------------------------------------------------------------- /testing/runusaBuildTest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Rebuild japanese iso and launch in duckstation 4 | 5 | 6 | # Here is the section to rebuild demo.dat and add it to the files. 7 | # python3 myScripts/DemoTools/demoTextInjector.py 8 | # cp -n demoWorkingDir/usa/bins/* demoWorkingDir/usa/newBins/ 9 | python3 myScripts/DemoTools/demoRejoiner.py 10 | cp demoWorkingDir/usa/new-DEMO.DAT build/usa-d1/MGS/DEMO.DAT 11 | 12 | mkpsxiso build/usa-d1/rebuild.xml -o mgsUSAMod-d1.bin -c mgsUSAMod-d1.cue -y 13 | # mkpsxiso build/usa-d2/rebuild.xml -o mgsUSAMod-d2.bin -c mgsUSAMod-d2.cue -y 14 | flatpak run org.duckstation.DuckStation mgsUSAMod-d1.cue -------------------------------------------------------------------------------- /testing/testAllRadioFiles.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script runs the python script recursively, first to export all calls, then translate individual calls 4 | 5 | SPLITSCRIPT="./myScripts/RadioDatTools.py" 6 | RECOMPILESCRIPT="./myScripts/RadioDatRecompiler.py" 7 | input_dir='radioDatFiles' 8 | output_dir='recompiledCallBins' 9 | 10 | same_count=0 11 | different_count=0 12 | 13 | for input in "$input_dir"/*.DAT; do 14 | base_filename=$(basename "$input" .DAT) 15 | echo $base_filename 16 | python3 $SPLITSCRIPT $input "$output_dir/$base_filename" -xz 17 | done 18 | 19 | for original in "$output_dir"/*.xml; do 20 | base_filename=$(basename "$original" .xml) 21 | python3 $RECOMPILESCRIPT -D "$output_dir/$base_filename.xml" "$output_dir/$base_filename-mod.DAT" -x 22 | if diff "$input_dir/$base_filename.DAT" "$output_dir/$base_filename-mod.DAT" >/dev/null; then 23 | echo "Files are the same: $original" 24 | ((same_count++)) 25 | else 26 | echo "Files are different: $original" 27 | ((different_count++)) 28 | fi 29 | done 30 | 31 | echo "Total files that are the same: $same_count" 32 | echo "Total files that are different: $different_count" 33 | 34 | rm recompiledCallBins/*.log -------------------------------------------------------------------------------- /testing/testRecompileAll.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script runs the python script recursively, first to export all calls, then translate individual calls 4 | 5 | SPLITSCRIPT="myScripts/RadioDatTools.py" 6 | RECOMPILESCRIPT="myScripts/RadioDatRecompiler.py" 7 | RADIODAT="radioDatFiles/RADIO-usa-d1.DAT" 8 | input_dir='extractedCallBins' 9 | output_dir='recompiledCallBins' 10 | 11 | rm $input_dir/* 12 | rm $output_dir/* 13 | 14 | same_count=0 15 | different_count=0 16 | 17 | python3 $SPLITSCRIPT $RADIODAT Headers -s 18 | 19 | for input in "$input_dir"/*.bin; do 20 | base_filename=$(basename "$input" .bin) 21 | # echo $base_filename 22 | output="$output_dir/$base_filename" 23 | python3 $SPLITSCRIPT $input $output -xz 24 | done 25 | 26 | for original in "$input_dir"/*.bin; do 27 | base_filename=$(basename "$original" .bin) 28 | input="$base_filename-mod.bin" 29 | python3 $RECOMPILESCRIPT "$output_dir/$base_filename.xml" "$output_dir/$base_filename-mod.bin" 30 | if diff "$original" "$output_dir/$base_filename-mod.bin" >/dev/null; then 31 | # echo "Files are the same: $original" 32 | ((same_count++)) 33 | else 34 | echo "Files are different: $original" 35 | ((different_count++)) 36 | fi 37 | done 38 | 39 | echo "Total files that are the same: $same_count" 40 | echo "Total files that are different: $different_count" 41 | 42 | rm $output_dir/*.log 43 | -------------------------------------------------------------------------------- /translation/combine.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | a = open('unique graphics', 'r') 4 | b = open('kanji.txt', 'r') 5 | c = open('Output.txt', 'w') 6 | 7 | for lineA, lineB in zip(a, b): 8 | c.write(f'\t"{lineA.strip()}": "{lineB.strip()}",\n') 9 | 10 | c.close() 11 | -------------------------------------------------------------------------------- /translation/graphicShower.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | def display_graphic(hex_string): 5 | """Generate and display a character image from the graphics hex with correct scaling (12x12 grid).""" 6 | file_data = bytes.fromhex(hex_string) 7 | 8 | # Convert binary data to bit string 9 | bit_string = ''.join(format(byte, '08b') for byte in file_data) 10 | 11 | # Fixed 12x12 grid 12 | width, height = 12, 12 13 | 14 | # Convert bit string to 2D pixel array 15 | pixel_grid = np.zeros((height, width), dtype=np.uint8) 16 | 17 | for i in range(len(bit_string) // 2): 18 | x, y = i % width, i // width 19 | bits = bit_string[i * 2 : i * 2 + 2] 20 | 21 | # Match the original TGA color mapping 22 | if bits == "00": 23 | pixel_grid[y, x] = 0 # Black 24 | elif bits == "01": 25 | pixel_grid[y, x] = 85 # Dark gray 26 | elif bits == "10": 27 | pixel_grid[y, x] = 170 # Light gray 28 | else: 29 | pixel_grid[y, x] = 255 # White 30 | 31 | # Display image with proper scaling (400% zoom) 32 | fig, ax = plt.subplots(figsize=(4, 4)) # 400% zoom 33 | ax.imshow(pixel_grid, cmap="gray", interpolation="nearest") 34 | ax.axis("off") 35 | plt.show() 36 | 37 | print(f'Character display! Will loop and display a graphic per the hex. ') 38 | 39 | while True: 40 | hexCharacters = input(f'\nPlease paste hex character string: ') 41 | if len(hexCharacters) == 72: 42 | display_graphic(hexCharacters) -------------------------------------------------------------------------------- /translation/kanji.txt: -------------------------------------------------------------------------------- 1 | 気 2 | 仕 3 | 掛 4 | 肉 5 | 壁 6 | 何 7 | 本 8 | 出 9 | 触 10 | 屋 11 | 闘 12 | 吹 13 | 命 14 | 進 15 | 戦 16 | 車 17 | 思 18 | 倒 19 | 事 20 | 君 21 | 罰 22 | 練 23 | 単 24 | 独 25 | 立 26 | 勿 27 | 論 28 | 潜 29 | 特 30 | 殊 31 | 部 32 | 隊 33 | 員 34 | 協 35 | 現 36 | 実 37 | 予 38 | 想 39 | 起 40 | 場 41 | 私 42 | 頭 43 | 棟 44 | 博 45 | 士 46 | 先 47 | 早 48 | 来 49 | 開 50 | 助 51 | 戻 52 | 今 53 | 奴 54 | 言 55 | 借 56 | 下 57 | 一 58 | 階 59 | 北 60 | 束 61 | 研 62 | 究 63 | 室 64 | 捕 65 | 込 66 | 切 67 | 抜 68 | 所 69 | 二 70 | 格 71 | ? 72 | 羞 73 | 飛 74 | 細 75 | 逃 76 | ? 77 | ? 78 | 房 79 | 佐 80 | 姪 81 | 誰 82 | 伯 83 | 父 84 | 僻 85 | 哀 86 | 男 87 | 英 88 | ? 89 | 武 90 | 器 91 | 時 92 | 礼 93 | 説 94 | 教 95 | 議 96 | 合 97 | ? 98 | 縁 99 | 名 100 | 呼 101 | 伝 102 | 目 103 | 新 104 | 兵 105 | 慈 106 | ロ 107 | 逢 108 | 直 109 | 面 110 | 幻 111 | 滅 112 | 滝 113 | 頭 114 | ? 115 | 兄 116 | 弟 117 | 家 118 | 族 119 | 情 120 | 報 121 | 欲 122 | 最 123 | 初 124 | ? 125 | 加 126 | 当 127 | 日 128 | 流 129 | 轟 130 | 廃 131 | ? 132 | 中 133 | 表 134 | 社 135 | 会 136 | 民 137 | 間 138 | 模 139 | 擬 140 | 次 141 | 世 142 | 代 143 | 召 144 | 集 145 | 極 146 | 毯 147 | ? 148 | 密 149 | 験 150 | 限 151 | 正 152 | 式 153 | 採 154 | 決 155 | 終 156 | 的 157 | 径 158 | 蜂 159 | 後 160 | 緒 161 | 牢 162 | 獄 163 | 解 164 | 隙 165 | 鍵 166 | 預 167 | 等 168 | 女 169 | ? 170 | 引 171 | 幾 172 | 護 173 | 彼 174 | 死 175 | 心 176 | 騰 177 | 局 178 | 同 179 | 病 180 | 偶 181 | 然 182 | 者 183 | 強 184 | 生 185 | 糞 186 | 壌 187 | 法 188 | 監 189 | 禁 190 | 建 191 | 横 192 | ? 193 | ? 194 | 米 195 | ? 196 | 幸 197 | 金 198 | 相 199 | 怖 200 | 訳 201 | 軍 202 | 夢 203 | 毎 204 | 殺 205 | 受 206 | 異 207 | 常 208 | 罪 209 | 悪 210 | 感 211 | 戮 212 | 昔 213 | 封 214 | 印 215 | 残 216 | 虐 217 | 闘 218 | 争 219 | 能 220 | ? 221 | 緩 222 | 和 223 | 高 224 | 掲 225 | 反 226 | ? 227 | 分 228 | 泌 229 | ? 230 | 始 231 | 結 232 | 急 233 | ? 234 | ? 235 | 考 236 | 帰 237 | 魔 238 | 通 239 | 閏 240 | 詳 241 | 絡 242 | 焦 243 | 少 244 | 待 245 | 耐 246 | 箱 247 | 話 248 | 差 249 | 控 250 | 位 251 | 示 252 | 追 253 | 道 254 | 音 255 | 痴 256 | 辿 257 | 足 258 | 選 259 | 茶 260 | 理 261 | ? 262 | 上 263 | 攻 264 | ? 265 | 火 266 | ? 267 | 危 268 | 忍 269 | 山 270 | ? 271 | 床 272 | 歩 273 | 鳴 274 | 平 275 | 医 276 | 療 277 | 南 278 | 洞 279 | ? 280 | 路 281 | 屋 282 | ? 283 | 廊 284 | 溶 285 | 鉱 286 | 炉 287 | 貸 288 | 昇 289 | 隙 290 | 倉 291 | 司 292 | 令 293 | 脱 294 | ? 295 | 品 296 | ? 297 | 負 298 | 勝 299 | 記 300 | 録 301 | 狭 302 | ? 303 | 共 304 | 激 305 | 波 306 | 輯 307 | ? 308 | 折 309 | 広 310 | 我 311 | 慢 312 | 国 313 | 匹 314 | 勇 315 | 求 316 | 愚 317 | 惧 318 | 育 319 | 籍 320 | 省 321 | 母 322 | 文 323 | ? 324 | 勉 325 | 煙 326 | 草 327 | 吸 328 | 唯 329 | 普 330 | 尽 331 | 注 332 | 派 333 | 烏 334 | 美 335 | 食 336 | 深 337 | 泉 338 | 魚 339 | 芳 340 | 餌 341 | 底 342 | 住 343 | 釣 344 | 潔 345 | 失 346 | 敗 347 | 例 348 | 多 349 | 則 350 | 傷 351 | 取 352 | 張 353 | 禍 354 | 買 355 | 恵 356 | 点 357 | ? 358 | 良 359 | 把 360 | 握 361 | 便 362 | 利 363 | 全 364 | 嫁 365 | 旦 366 | 那 367 | 浮 368 | 間 369 | 溺 370 | 尋 371 | 浅 372 | 瀬 373 | 自 374 | 過 375 | 他 376 | ? 377 | 狼 378 | 衆 379 | 数 380 | 絶 381 | 送 382 | 盗 383 | 色 384 | 端 385 | ? 386 | 小 387 | 忘 388 | 交 389 | 刻 390 | 打 391 | 六 392 | 治 393 | 治 394 | 院 395 | 百 396 | 其 397 | 至 398 | 乃 399 | 為 400 | 菜 401 | 乗 402 | 映 403 | 画 404 | 効 405 | 果 406 | 判 407 | 告 408 | 標 409 | 認 410 | ? 411 | 専 412 | 門 413 | 野 414 | ? 415 | 複 416 | ? 417 | 倍 418 | 役 419 | 頼 420 | 断 421 | 真 422 | ? 423 | 難 424 | 好 425 | 傍 426 | 聴 427 | 楽 428 | 戻 429 | 可 430 | 退 431 | 樹 432 | 泳 433 | 油 434 | 快 435 | 活 436 | 千 437 | 年 438 | 愉 439 | ? 440 | 遊 441 | 杯 442 | 遠 443 | 偏 444 | 般 445 | 辺 446 | ? 447 | 飢 448 | 鼠 449 | 啄 450 | 渇 451 | 腹 452 | 水 453 | ? 454 | 因 455 | 転 456 | 臨 457 | 任 458 | 務 459 | 志 460 | 換 461 | 消 462 | 去 463 | 句 464 | ? 465 | 技 466 | 術 467 | 天 468 | 順 469 | 逆 470 | 亡 471 | 従 472 | 在 473 | 惑 474 | 戸 475 | 陣 476 | 詐 477 | 欺 478 | 厭 479 | 榮 480 | ? 481 | 成 482 | 途 483 | 善 484 | 秋 485 | 義 486 | 此 487 | 之 488 | 程 489 | 価 490 | 値 491 | 得 492 | 易 493 | 簡 494 | 雲 495 | 海 496 | 苦 497 | 詞 498 | 捉 499 | 緑 500 | 源 501 | ? 502 | 乱 503 | 放 504 | 吐 505 | 許 506 | 謝 507 | ? 508 | 突 509 | 嫌 510 | 居 511 | 割 512 | 周 513 | 弱 514 | 践 515 | 工 516 | 盛 517 | ? 518 | 創 519 | 具 520 | 造 521 | 揮 522 | 嬉 523 | 将 524 | 喜 525 | 泣 526 | 声 527 | 恕 528 | ? 529 | 瀕 530 | 誠 531 | ? 532 | 如 533 | 約 534 | 束 535 | 元 536 | 様 537 | 拘 538 | 援 539 | 逮 540 | 険 541 | 虜 542 | 況 543 | 耳 544 | 骨 545 | 埋 546 | 軽 547 | 蔑 548 | ? 549 | 甘 550 | ? 551 | 央 552 | 振 553 | 嵐 554 | 紹 555 | 介 556 | 処 557 | 期 558 | 辞 559 | 屈 560 | 互 561 | 職 562 | ? 563 | 既 564 | 寂 565 | 談 566 | 担 567 | 覚 568 | 以 569 | 丸 570 | 裸 571 | 携 572 | 帯 573 | ? 574 | 胃 575 | 液 576 | 抑 577 | ? 578 | 資 579 | 権 580 | 官 581 | 防 582 | 与 583 | 塩 584 | 検 585 | 冷 586 | 静 587 | 服 588 | 配 589 | 番 590 | 馬 591 | 鹿 592 | ? 593 | 故 594 | 陥 595 | 蘇 596 | 玩 597 | 弄 598 | ? 599 | 黙 600 | 項 601 | 望 602 | 洗 603 | 脳 604 | 鬼 605 | 致 606 | 避 607 | 組 608 | 久 609 | 週 610 | 観 611 | ? 612 | 距 613 | 傾 614 | 含 615 | 救 616 | 列 617 | 蛮 618 | 由 619 | 政 620 | 府 621 | 稼 622 | 試 623 | 型 624 | 統 625 | 領 626 | 昨 627 | 計 628 | 未 629 | 騷 630 | 繊 631 | 厄 632 | 第 633 | 三 634 | 削 635 | 条 636 | 公 637 | 批 638 | 准 639 | 承 640 | 域 641 | 衛 642 | 題 643 | 燕 644 | 返 645 | 威 646 | 墜 647 | 部 648 | 内 649 | 容 650 | ? 651 | 迫 652 | 呑 653 | 倫 654 | 厳 655 | ? 656 | 経 657 | 血 658 | 喋 659 | 緊 660 | 託 661 | 苦 662 | 痛 663 | 親 664 | 歳 665 | 随 666 | 恋 667 | 態 668 | 友 669 | 称 670 | 号 671 | 皆 672 | 係 673 | 殴 674 | ? 675 | 健 676 | 暴 677 | 両 678 | 詰 679 | 白 680 | ? 681 | 科 682 | 腕 683 | ? 684 | ? 685 | 眠 686 | 余 687 | ? 688 | 毛 689 | 筋 690 | 維 691 | 刺 692 | 類 693 | 売 694 | 悼 695 | 似 696 | 醒 697 | 狩 698 | 併 699 | 宣 700 | 倍 701 | 侵 702 | 済 703 | 再 704 | ? 705 | 材 706 | ? 707 | 総 708 | 主 709 | 件 710 | 荒 711 | 清 712 | 染 713 | 買 714 | 固 715 | 執 716 | ? 717 | 種 718 | 拾 719 | 孤 720 | 児 721 | 頃 722 | 肌 723 | 川 724 | 餌 725 | 寸 726 | 幼 727 | 個 728 | 証 729 | 讐 730 | 誓 731 | 憎 732 | 誤 733 | 仇 734 | 蔽 735 | ? 736 | ? 737 | ? 738 | 胞 739 | 完 740 | 了 741 | 恩 742 | 粗 743 | 末 744 | 読 745 | 嚇 746 | 八 747 | ? 748 | 叶 749 | 賭 750 | 才 751 | ? 752 | 札 753 | 拷 754 | ? 755 | ? 756 | 宅 757 | 僕 758 | 隣 759 | 駐 760 | 西 761 | ? 762 | 赦 763 | 悔 764 | 充 765 | 仲 766 | ? 767 | 環 768 | 境 769 | 植 770 | 犬 771 | 街 772 | 靱 773 | 及 774 | 更 775 | 亜 776 | 詞 777 | 詳 778 | 挙 779 | 詩 780 | 吉 781 | 祖 782 | 捕 783 | 囮 784 | 捜 785 | 校 786 | 洋 787 | 腑 788 | 繋 789 | 農 790 | 永 791 | 土 792 | 零 793 | 寒 794 | 暖 795 | ? 796 | 雑 797 | 凝 798 | ? 799 | 側 800 | 純 801 | 鍛 802 | 仮 803 | 午 804 | 鈍 805 | 睡 806 | 排 807 | 尿 808 | 満 809 | 休 810 | 息 811 | 語 812 | 栄 813 | 養 814 | 給 815 | 併 816 | 候 817 | 測 818 | 挑 819 | 功 820 | 銃 821 | 嗅 822 | 左 823 | 右 824 | 算 825 | 低 826 | 修 827 | 羅 828 | 散 829 | 古 830 | 典 831 | ? 832 | 巡 833 | 率 834 | ? 835 | 慌 836 | 璧 837 | 姿 838 | 遂 839 | 駄 840 | 曳 841 | 航 842 | 紛 843 | 狂 844 | 懇 845 | ? 846 | 摂 847 | 氏 848 | 氷 849 | 賞 850 | 緯 851 | 超 852 | 綻 853 | 帽 854 | 被 855 | 汗 856 | ? 857 | 図 858 | 肺 859 | 炎 860 | 症 861 | 袋 862 | 湯 863 | 耗 864 | 徐 865 | 淀 866 | 恐 867 | 幅 868 | ? 869 | 膜 870 | 焼 871 | 跡 872 | 悟 873 | 偽 874 | 踏 875 | 爪 876 | ? 877 | 靴 878 | ? 879 | 熟 880 | 欠 881 | 隙 882 | 抗 883 | 級 884 | 月 885 | 怒 886 | 歌 887 | ? 888 | 制 889 | 御 890 | 依 891 | 旋 892 | 骸 893 | 沙 894 | 汰 895 | 席 896 | 吊 897 | 戒 898 | 怠 899 | 渉 900 | 阻 901 | 歪 902 | 拠 903 | ? 904 | 首 905 | 伺 906 | 香 907 | 炭 908 | 囲 909 | ? 910 | 念 911 | 択 912 | 万 913 | 音 914 | 絞 915 | 陰 916 | 角 917 | 背 918 | 覗 919 | 叩 920 | 甲 921 | 斐 922 | 寄 923 | ? 924 | 輪 925 | 営 926 | 汚 927 | 鮮 928 | 膨 929 | 費 930 | 陽 931 | ? 932 | 布 933 | 系 934 | 皮 935 | ? 936 | 浸 937 | 透 938 | 懐 939 | 枢 940 | 弛 941 | 律 942 | 酔 943 | 若 944 | 干 945 | 尊 946 | 敬 947 | 詭 948 | ? 949 | 穴 950 | 崩 951 | 税 952 | 管 953 | 旧 954 | 劇 955 | ? 956 | 刑 957 | 泡 958 | 黄 959 | 縛 960 | ? 961 | 裁 962 | 繕 963 | 辛 964 | 抱 965 | 看 966 | 免 967 | 星 968 | 獲 969 | 揃 970 | 継 971 | 刃 972 | 斬 973 | 尾 974 | 範 975 | ? 976 | 索 977 | 譜 978 | 紫 979 | 副 980 | 折 981 | 軟 982 | 痺 983 | 駆 984 | 訴 985 | ? 986 | 市 987 | 融 988 | 疎 989 | 康 990 | 怯 991 | 呪 992 | 努 993 | 腺 994 | 悩 995 | 妊 996 | 短 997 | 臭 998 | 径 999 | 夜 1000 | 扱 1001 | 凄 1002 | 秒 1003 | 械 1004 | 樹 1005 | 脂 1006 | 塑 1007 | 粘 1008 | 掃 1009 | ? 1010 | 鉄 1011 | 球 1012 | 施 1013 | 偵 1014 | 搭 1015 | 載 1016 | 燃 1017 | 四 1018 | 黒 1019 | 徴 1020 | 層 1021 | 隔 1022 | 座 1023 | 比 1024 | 板 1025 | 紙 1026 | 芯 1027 | 木 1028 | 荷 1029 | 梱 1030 | 包 1031 | 輸 1032 | 頑 1033 | ? 1034 | ? 1035 | 疲 1036 | 濡 1037 | ? 1038 | 損 1039 | 裂 1040 | 肝 1041 | 殖 1042 | 課 1043 | 鋭 1044 | 災 1045 | 抑 1046 | 伏 1047 | 圧 1048 | ? -------------------------------------------------------------------------------- /voxTools/vagOutput.py: -------------------------------------------------------------------------------- 1 | from demoClasses import * 2 | 3 | if __name__ == "__main__": 4 | demoData: bytes 5 | # Add in and out file names here 6 | demoFilename = "example.vox" 7 | newFileName = demoFilename.split("/")[-1].split(".")[0] + ".vag" 8 | 9 | with open(demoFilename, "rb") as f: 10 | demoData = f.read() 11 | demoItems = parseDemoData(demoData) 12 | 13 | outputVagFile(demoItems, newFileName) -------------------------------------------------------------------------------- /voxTools/voxRejoiner.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.abspath('./myScripts')) 3 | import re 4 | import glob 5 | import struct 6 | import progressbar 7 | import translation.radioDict as RD 8 | import json 9 | 10 | import voxTools.voxTextExtractor as DTE 11 | 12 | version = "usa" 13 | version = "jpn" 14 | disc = 1 15 | 16 | # Toggles 17 | debug = True 18 | 19 | # Directory configs 20 | originalVox = f'build-src/{version}-d{disc}/MGS/VOX.DAT' 21 | inputDir = f'workingFiles/{version}-d{disc}/vox/bins' 22 | outputDir = f'workingFiles/{version}-d{disc}/vox/newBins' 23 | outputvoxFile = f'workingFiles/{version}-d{disc}/vox/new-VOX.DAT' 24 | os.makedirs(outputDir, exist_ok=True) 25 | 26 | origBinFiles = glob.glob(os.path.join(inputDir, '*.bin')) 27 | origBinFiles.sort(key=lambda f: int(f.split('-')[-1].split('.')[0])) 28 | 29 | newBinFiles = glob.glob(os.path.join(outputDir, '*.bin')) 30 | origBinFiles.sort(key=lambda f: int(f.split('-')[-1].split('.')[0])) 31 | 32 | print(f'Building New VOX File...') 33 | newvoxBytes = b'' 34 | 35 | count = 0 36 | with open(outputvoxFile, 'wb') as f: 37 | for file in origBinFiles: 38 | if count == len(newBinFiles): 39 | print(f'\nAll new files injected. Using the remainder of original file...') 40 | with open(originalVox, 'rb') as originalVox: 41 | originalVox.seek(len(newvoxBytes)) 42 | newvoxBytes += originalVox.read() 43 | break 44 | elif file.replace('bins', 'newBins') in newBinFiles: 45 | file = file.replace('bins', 'newBins') 46 | basename = file.split("/")[-1].split(".")[0] 47 | print(f'{basename}: Using new {basename}...') 48 | count += 1 49 | else: 50 | basename = file.split("/")[-1].split(".")[0] 51 | print(f'{basename}: Using old version...\r', end="") 52 | voxBytes = open(file, 'rb') 53 | newvoxBytes += voxBytes.read() 54 | voxBytes.close() 55 | f.write(newvoxBytes) 56 | f.close() 57 | 58 | print(f'{outputvoxFile} was written!') 59 | 60 | 61 | -------------------------------------------------------------------------------- /voxTools/voxSplit.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pretty much follows the same rules as demo.dat for chunking 3 | 4 | """ 5 | 6 | import os 7 | 8 | # Config 9 | version = 'usa' 10 | version = 'jpn' 11 | disc = 1 12 | 13 | filename = f'build-src/{version}-d{disc}/MGS/VOX.DAT' 14 | outputDir = f'workingFiles/{version}-d{disc}/vox/bins' 15 | 16 | demoFile = open(filename, 'rb') 17 | demoData = demoFile.read() 18 | 19 | debug = True 20 | 21 | offsets = [] 22 | os.makedirs(outputDir, exist_ok=True) 23 | opening = b'\x10\x08\x00\x00' # Adjusted opening pattern 24 | 25 | def findDemoOffsets(): 26 | offset = 0 27 | while offset < len(demoData) - 4: # Adjusted for the new opening length 28 | checkbytes = demoData[offset:offset + 4] # Check the first 4 bytes 29 | if checkbytes == opening: 30 | offsets.append(offset) 31 | offset += 2048 # Continue using 2048 or 0x800 as the increment step for speed 32 | else: 33 | offset += 2048 34 | 35 | def splitDemoFiles(): 36 | global debug 37 | 38 | for i in range(len(offsets)): 39 | start = offsets[i] 40 | if i < len(offsets) - 1: 41 | end = offsets[i + 1] 42 | else: 43 | end = len(demoData) # Include the last byte 44 | f = open(f'{outputDir}/vox-{i + 1:04}.vox', 'wb') 45 | f.write(demoData[start:end]) 46 | f.close() 47 | if debug: 48 | print(f'Wrote VOX file {i}') 49 | 50 | if __name__ == '__main__': 51 | findDemoOffsets() 52 | splitDemoFiles() 53 | -------------------------------------------------------------------------------- /voxTools/voxTextExtractor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from Green Goblins scripts. 3 | This is really heavily based on his awesome work. 4 | 5 | # Script for working with Metal Gear Solid data 6 | # 7 | # Copyright (C) 2023 Green_goblin (https://mgsvm.blogspot.com/) 8 | # 9 | # Permission to use, copy, modify, and/or distribute this software for any 10 | # purpose with or without fee is hereby granted, provided that the above 11 | # copyright notice and this permission notice appear in all copies. 12 | 13 | """ 14 | 15 | import os, sys 16 | sys.path.append(os.path.abspath('./myScripts')) 17 | import re 18 | import glob 19 | import struct 20 | import progressbar 21 | import translation.radioDict as RD 22 | import json 23 | 24 | voxScriptData: dict = {} 25 | 26 | bar = progressbar.ProgressBar() 27 | 28 | version = "usa" 29 | version = "jpn" 30 | disc = 1 31 | 32 | # Create a directory to store the extracted texts 33 | # Get the files from the folder directory 34 | inputDir = f'workingFiles/{version}-d{disc}/vox/bins' 35 | outputDir = f'workingFiles/{version}-d{disc}/vox/texts' 36 | os.makedirs(outputDir, exist_ok=True) 37 | outputJsonFile = f"workingFiles/{version}-d{disc}/vox/voxText-{version}.json" 38 | 39 | # Grab all files in the directory and sort into order. 40 | bin_files = glob.glob(os.path.join(inputDir, '*.bin')) 41 | bin_files.sort(key=lambda f: int(f.split('-')[-1].split('.')[0])) 42 | 43 | # flags 44 | debug = True 45 | 46 | # List of files to skip (Ex: 005.bin does not contain texts) 47 | skipFilesListD1 = [ 48 | 49 | ] 50 | 51 | # Set up progress bar 52 | # bar.maxval = len(bin_files) 53 | # barCount = 0 54 | # bar.start() 55 | 56 | # DEBUG 57 | # if debug: 58 | # print(f'Only doing vox-1.bin!') 59 | # bin_files = [f'voxWorkingDir/{version}/bins/vox-25.bin'] 60 | 61 | def getTextHexes(textToAnalyze: bytes) -> tuple[list, bytes, list]: 62 | """ 63 | This just grabs all the text from each sector of the text area. 64 | We just grab the hex and return it. We also return the custom 65 | character bytes at the end, which should always make a dictionary. 66 | """ 67 | global debug 68 | 69 | #startingPoint = struct.unpack(" list: 116 | """ 117 | This is awful, but it should to a certain degree find vox offset spots. 118 | If there's a better way to do this lmk, but it's not too inefficient. 119 | """ 120 | patternA = b"\x03..." + b"...." + b"...." + b"...." + bytes(4) + bytes.fromhex("FF FF FF 7F 10 00") # Figured out the universal pattern. 121 | # 03 ?? ?? ?? ?? ?? ?? 00 ?? ?? ?? ?? 10 00 14 00 >> For IMHEX usage 122 | # patternB = bytes.fromhex("FF FF FF 7F 10 00") 123 | # This is actually the indication a dialogue area runs to end of vox (until frame 0x7FFFFF) 124 | 125 | matches = re.finditer(patternA, voxData, re.DOTALL) 126 | offsets = [match.start() for match in matches] 127 | 128 | finalMatches = [] 129 | for offset in offsets: 130 | # Extract size of the area 131 | length = 12 + struct.unpack(' list: 152 | global debug 153 | global filename 154 | global version 155 | 156 | dialogue = [] 157 | 158 | if graphicsData is not None and filename is not None: 159 | voxDict = RD.makeCallDictionary(filename, graphicsData) 160 | else: 161 | voxDict = {} 162 | 163 | # Loop for all text, offsets, etc. 164 | for dialogueHex in textHexes: 165 | text = RD.translateJapaneseHex(dialogueHex, voxDict) 166 | # text = text.encode(encoding='utf8', errors='ignore') 167 | if debug: 168 | print(text) 169 | text = text.replace('\x00', "") 170 | dialogue.append(text) 171 | return dialogue 172 | 173 | def textToDict(dialogue: list) -> dict: 174 | i = 1 175 | textDict = {} 176 | for text in dialogue: 177 | textDict[f'{i:02}'] = text 178 | i += 1 179 | 180 | return textDict 181 | 182 | def writeTextToFile(filename: str, dialogue: list) -> None: 183 | global debug 184 | with open(filename, 'w', encoding='utf8') as f: 185 | for text in dialogue: 186 | f.write(f'{text}\n') 187 | f.close() 188 | 189 | def findOffsets(byteData: bytes, pattern: bytes) -> list: 190 | """ 191 | Find patterns in the byte data. 192 | """ 193 | foundPatterns = [] 194 | offset = 0 195 | while offset != -1: 196 | offset = byteData.find(pattern, offset) 197 | if offset != -1: 198 | foundPatterns.append(pattern) 199 | return foundPatterns 200 | 201 | if __name__ == "__main__": 202 | # Loop through each .bin file in the folder 203 | for bin_file in bin_files: 204 | # Skip files in the skip list 205 | filename = os.path.basename(bin_file) 206 | 207 | # Manual override to skip certain voxs 208 | if filename in skipFilesListD1: 209 | continue 210 | 211 | if debug: 212 | print(f"Processing file: {bin_file}") 213 | 214 | # Open the binary file for reading in binary mode 215 | with open(bin_file, 'rb') as binary_file: 216 | voxData = binary_file.read() 217 | 218 | textOffsets = getTextAreaOffsets(voxData) 219 | 220 | print(f'{os.path.basename(bin_file)}: {textOffsets}') 221 | 222 | texts = [] 223 | timings = [] # list of timings (start time, duration) 224 | timingCount = 1 225 | 226 | for offset in textOffsets: 227 | subset = getTextAreaBytes(offset, voxData) 228 | textHexes, graphicsBytes, coords = getTextHexes(subset) 229 | texts.extend(getDialogue(textHexes, graphicsBytes)) 230 | timings.extend(coords) 231 | 232 | basename = filename.split('.')[0] 233 | voxScriptData[basename] = [textToDict(texts), textToDict(timings)] 234 | writeTextToFile(f'{outputDir}/vox-{basename}.txt', texts) 235 | # writeTextToFile(f'{outputDir}/{basename}-timings.txt', timings) 236 | 237 | with open(outputJsonFile, 'w') as f: 238 | f.write(json.dumps(voxScriptData, ensure_ascii=False)) 239 | f.close() -------------------------------------------------------------------------------- /voxTools/voxTextInjector.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from Green Goblins scripts. 3 | This is really heavily based on his awesome work. 4 | 5 | Script for working with Metal Gear Solid data 6 | 7 | Copyright (C) 2023 Green_goblin (https://mgsvm.blogspot.com/) 8 | 9 | Permission to use, copy, modify, and/or distribute this software for any 10 | purpose with or without fee is hereby granted, provided that the above 11 | copyright notice and this permission notice appear in all copies. 12 | 13 | """ 14 | 15 | import os, sys 16 | sys.path.append(os.path.abspath('./myScripts')) 17 | import re 18 | import glob 19 | import struct 20 | import progressbar 21 | import translation.radioDict as RD 22 | import json 23 | 24 | import voxTools.voxTextExtractor as DTE # Leave for referential 25 | from common.structs import subtitle 26 | 27 | version = "usa" 28 | version = "jpn" 29 | disc = 1 30 | 31 | # Toggles 32 | debug = True 33 | 34 | # Directory configs 35 | inputDir = f'workingFiles/{version}-d{disc}/vox/bins' 36 | outputDir = f'workingFiles/{version}-d{disc}/vox/newBins' 37 | injectJson = f'build-proprietary/vox/voxText-{version}-d{disc}.json' 38 | os.makedirs(outputDir, exist_ok=True) 39 | 40 | # Collect files to use 41 | bin_files = glob.glob(os.path.join(inputDir, '*.bin')) 42 | bin_files.sort(key=lambda f: int(f.split('-')[-1].split('.')[0])) 43 | 44 | # Collect source json to inject 45 | injectTexts = json.load(open(injectJson, 'r')) 46 | 47 | 48 | # Defs below 49 | 50 | def assembleTitles(texts: dict, timings: dict) -> list [subtitle]: 51 | subsList = [] 52 | for i in range(len(texts)): 53 | index = "{:02}".format(i + 1) 54 | start = timings.get(index).split(",")[0] 55 | duration = timings.get(index).split(",")[1] 56 | a = subtitle(texts.get(index), start, duration) 57 | subsList.append(a) 58 | 59 | return subsList 60 | 61 | def genSubBlock(subs: list [subtitle] ) -> bytes: 62 | """ 63 | Injects the new text to the original data, returns the bytes. 64 | Also returns the index we were at when we finished. 65 | 66 | """ 67 | newBlock = b'' 68 | for i in range(len(subs) -1): 69 | length = struct.pack("I", len(bytes(subs[i])) + 4) 70 | newBlock += length + bytes(subs[i]) 71 | 72 | # Add the last one 73 | newBlock += bytes(4) + bytes(subs[-1]) 74 | 75 | return newBlock 76 | 77 | def injectSubtitles(originalBinary: bytes, newTexts: dict, frameLimit: int = 1, timings: dict = None) -> bytes: 78 | """ 79 | Injects the new text to the original data, returns the bytes. 80 | Also returns the index we were at when we finished. 81 | 82 | New vers: Framelimit is the end of a cutscene segment. 83 | """ 84 | 85 | def encodeNewText(text: str, timing: str): 86 | """ 87 | Simple. Encodes the dialogue as bytes. 88 | Adds the buffer we need to be divisible by 4... 89 | Return the new bytes. 90 | """ 91 | timings = int(timing.split(',')) 92 | start = timings[0] 93 | duration = timings[1] 94 | 95 | subtitleBytes: bytes = struct.pack("III", start, duration, 0) 96 | subtitleBytes += RD.encodeJapaneseHex(text)[0] 97 | bufferNeeded = 4 - (len(subtitleBytes) % 4) 98 | for j in range(bufferNeeded): 99 | newBytes += b'\x00' 100 | j += 1 101 | 102 | return subtitleBytes 103 | 104 | 105 | 106 | newBytes = b"" 107 | firstLengthBytes = originalBinary[18:20] 108 | firstLength = struct.unpack(' bytes: 146 | """ 147 | Returns the header portion only for a given dialogue section. 148 | """ 149 | headerLength = struct.unpack("H", data[14:16])[0] + 4 150 | return data[:headerLength] 151 | 152 | # if debug: 153 | # print(f'Only injecting vox 29!') 154 | # bin_files = ['workingFiles/jpn-d1/vox/bins/vox-0029.bin'] 155 | 156 | if __name__ == "__main__": 157 | """ 158 | Main logic is here. 159 | """ 160 | for file in bin_files: 161 | print(os.path.basename(f"{file}: "), end="") 162 | filename = os.path.basename(file) 163 | basename = filename.split(".")[0] 164 | 165 | # if injectTexts[basename] is None: 166 | if basename not in injectTexts: 167 | print(f'{basename} was not in the json. Skipping...\r', end="") 168 | continue 169 | 170 | # Initialize the vox data and the dictionary we're using to replace it. 171 | origvoxData = open(file, 'rb').read() 172 | origBlocks = len(origvoxData) // 0x800 # Use this later to check we hit the same length! 173 | voxDict: dict = injectTexts[basename][0] 174 | voxTimings: dict = injectTexts[basename][1] 175 | 176 | subtitles = assembleTitles(voxDict, voxTimings) 177 | 178 | offsets = DTE.getTextAreaOffsets(origvoxData) 179 | # nextStart = 1 # index of subtitle to encode. No longer needed. 180 | newvoxData = origvoxData[0 : offsets[0]] # UNTIL the header 181 | 182 | for Num in range(len(offsets)): 183 | oldHeader = getvoxDiagHeader(origvoxData[offsets[Num]:]) 184 | oldLength = struct.unpack("H", oldHeader[1:3])[0] 185 | frameStart = struct.unpack("I", oldHeader[4:8])[0] 186 | frameLimit = struct.unpack("I", oldHeader[8:12])[0] 187 | # Get only subtitles in this section. 188 | subsForSection = [] 189 | for sub in subtitles: 190 | if frameStart <= sub.startFrame < frameLimit: 191 | subsForSection.append(sub) 192 | newSubBlock = genSubBlock(subsForSection) # TODO: CODE THIS DEF 193 | newLength = len(oldHeader) + len(newSubBlock) 194 | 195 | newHeader = bytes.fromhex("03") + struct.pack("H", newLength) + bytes(1) + struct.pack("II", frameStart, frameLimit) + oldHeader[12:16] + struct.pack("I", len(oldHeader) + len(newSubBlock) - 4) + oldHeader[20:] 196 | newvoxData += newHeader + newSubBlock 197 | # Add the rest of the data from this to the next offset OR until end of original vox. 198 | if Num < len(offsets) - 1: # if it is NOT the last... 199 | newvoxData += origvoxData[offsets[Num] + oldLength: offsets[Num + 1]] 200 | else: 201 | newvoxData += origvoxData[offsets[Num] + oldLength: ] 202 | # if debug: 203 | # print(newSubBlock.hex(sep=" ", bytes_per_sep=4)) 204 | 205 | """# Buffer the vox to 0x800 block 206 | if len(newvoxData) % 0x800 != 0: 207 | if len(newvoxData) // 0x800 < len(origvoxData) // 0x800: 208 | newvoxData += bytes(len(newvoxData) % 0x800) 209 | else: 210 | checkBytes = newvoxData[len(newvoxData) - len(origvoxData):] 211 | if checkBytes == bytes(len(checkBytes)): 212 | newvoxData = newvoxData[:len(newvoxData) - len(checkBytes)]""" 213 | 214 | # Adjust length to match original file. 215 | if len(newvoxData) == len(origvoxData): 216 | print("Alignment correct!") 217 | elif len(newvoxData) < len(origvoxData): # new vox shorter 218 | newvoxData += bytes(len(origvoxData) - len(newvoxData)) 219 | if len(newvoxData) % 0x800 == 0: 220 | print("Alignment correct!") 221 | else: 222 | checkBytes = newvoxData[len(newvoxData) - len(origvoxData):] 223 | if checkBytes == bytes(len(checkBytes)): 224 | newvoxData = newvoxData[:len(newvoxData) - len(checkBytes)] 225 | else: 226 | print(f'CRITICAL ERROR! New vox cannot be truncated to original length!') 227 | exit(2) 228 | 229 | newBlocks = len(newvoxData) // 0x800 230 | if newBlocks != origBlocks: 231 | print(f"{len(newvoxData)} / {len(origvoxData)}") 232 | print(f'BLOCK MISMATCH!\nNew data is {newBlocks} blocks, old was {origBlocks} blocks.\nTHERE COULD BE PROBLEMS IN RECOMPILE!!') 233 | 234 | # Finished work! Write the new file. 235 | newFile = open(f'{outputDir}/{basename}.bin', 'wb') 236 | newFile.write(newvoxData) 237 | newFile.close() 238 | print(f'VOX Data successfully Output to new files!') 239 | 240 | 241 | 242 | 243 | """ 244 | # not really needed just for reference. 245 | for key in injectTexts: 246 | print(key) 247 | voxDict: dict = injectTexts[key] 248 | 249 | """ -------------------------------------------------------------------------------- /zmovieTools/movieSplitter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from Green Goblins scripts. Very similar to demo 3 | only alignments are 0x920 4 | """ 5 | 6 | import os, struct, re, sys, glob, json 7 | sys.path.append(os.path.abspath('./myScripts')) 8 | sys.path.append(os.path.abspath('.')) 9 | import DemoTools.demoTextExtractor as DTE 10 | 11 | version = "usa" 12 | filename = f"build-src/{version}-d1/MGS/ZMOVIE.STR" 13 | outputDir = f"zMovieWorkingDir/{version}/bins" 14 | 15 | zMovieScript = {} 16 | 17 | zmFile = open(filename, 'rb') 18 | zmData = zmFile.read() 19 | 20 | 21 | offsets = [] 22 | os.makedirs(outputDir, exist_ok=True) 23 | 24 | def getOffsets(toc: bytes) -> list: 25 | demoNum = 4 # If we figure out where this is we can implement it. 26 | offsets = [] 27 | counter = 16 28 | for i in range(demoNum): 29 | offset = struct.unpack("