├── .DS_Store
├── .gitignore
├── DemoTools
    ├── __init__.py
    ├── demoRejoiner.py
    ├── demoSplitter.py
    ├── demoTextExtractor.py
    ├── demoTextInjector.py
    └── demoTextTesting.py
├── Old vers
    ├── ExtractRadioDatV0.4.1.py
    ├── ExtractRadioDatV0.4.py
    ├── ExtractTextsV0.1.py
    ├── ExtractTextsV0.2.py
    ├── ExtractTextsV0.3..py
    ├── ExtractTextsV0.3.5 copy.py
    ├── ExtractTextsV0.3.5.py
    ├── ExtractTextsV0.3.6.py
    ├── ExtractTextsV0.3.8 Line by line.py
    ├── ExtractTextsV0.3.9.py
    ├── REMOVEDCHARS.PY
    ├── RadioDatTools--preXML.py
    ├── RadioDatToolsv0.4.5 backupcopy.py
    ├── characters_old.py
    ├── demoTextInjector_old.py
    ├── main.py
    ├── stageCalls-before-0A.ods
    ├── vagToWav.py
    ├── vagToWav2.py
    └── xmltest.py
├── README.md
├── RadioDatRecompiler.py
├── RadioDatTools.py
├── StageDirTools
    ├── Notes.txt
    ├── analyzeStageDirFiles.sh
    ├── assmembleDar.py
    ├── callsInStageDirFinder.py
    ├── extractDar.py
    └── stageDirFileExtractor.py
├── audioTools
    ├── sub-test-2.py
    ├── subtitle display test.py
    └── vagAudioTools.py
├── common
    └── structs.py
├── creditsHacking
    ├── creditsHacking.py
    ├── decryptionDiagram.md
    ├── imageComparison.sh
    ├── imageEncoder.py
    ├── imhex patterns 00eae8rar.txt
    ├── lz77-test.py
    ├── lzss-test.py
    ├── newCompressionTest.py
    ├── scra.py
    └── scratchpad.py
├── demoClasses.py
├── demoManager.py
├── graphicsExport
    ├── KanjiStillMissing.txt
    └── contextList.txt
├── insertVox.py
├── itemDescriptionFinder.py
├── jsonTools.py
├── quickTranslate.py
├── radioModule.py
├── radioTools
    ├── __init__.py
    ├── callExtactor.py
    ├── callInsertor.py
    └── jsonToCSV.py
├── requirements.txt
├── testing
    ├── compareDemos.sh
    ├── convertImage.sh
    ├── demoBinChecker.sh
    ├── exportAndAnalyze.sh
    ├── extractALLmaterials.sh
    ├── extractAllCalls.sh
    ├── findEndings.py
    ├── goblin.bat
    ├── incorrectRecompileCheck.py
    ├── patternChecker.py
    ├── radioDatUSAChecker.sh
    ├── runJpnBuildTest.sh
    ├── runusaBuildTest.sh
    ├── testAllRadioFiles.sh
    └── testRecompileAll.sh
├── translation
    ├── Output.txt
    ├── characters.py
    ├── combine.py
    ├── graphicShower.py
    ├── kanji.txt
    ├── radioDict.py
    └── unique graphics
├── voxTools
    ├── vagOutput.py
    ├── voxRejoiner.py
    ├── voxSplit.py
    ├── voxTextExtractor.py
    └── voxTextInjector.py
├── xmlModifierTools.py
└── zmovieTools
    └── movieSplitter.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drsparklegasm/mgs1-scripts/fba25e409c5ad49938ba0b6d60e1e48a9c37fc7a/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ./RADIO*.DAT
2 | .DS*
3 | RADIO-jpn.DAT
4 | RADIO-usa.DAT
5 | Scratchpad.py
6 | mgs1-undub-docs/.obsidian
7 | mgs1-undub-docs/.trash
8 | __pycache__/
9 | 


--------------------------------------------------------------------------------
/DemoTools/__init__.py:
--------------------------------------------------------------------------------
1 | import sys, os
2 | 
3 | # Necessary to find all modules in this package... doesnt really work as expected.
4 | sys.path.append(os.path.abspath('./myScripts'))


--------------------------------------------------------------------------------
/DemoTools/demoRejoiner.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | sys.path.append(os.path.abspath('./myScripts'))
 3 | import re
 4 | import glob
 5 | import struct
 6 | import progressbar
 7 | import translation.radioDict as RD
 8 | import json
 9 | 
10 | import DemoTools.demoTextExtractor as DTE
11 | 
12 | version = "usa"
13 | version = "jpn"
14 | disc = 1
15 | 
16 | # Toggles
17 | debug = True
18 | 
19 | 
20 | # Directory configs
21 | inputDir = f'workingFiles/{version}-d{disc}/demo/bins'
22 | outputDir = f'workingFiles/{version}-d{disc}/demo/newBins'
23 | outputDemoFile = f'workingFiles/{version}-d{disc}/demo/new-DEMO.DAT'
24 | os.makedirs(outputDir, exist_ok=True)
25 | 
26 | origBinFiles = glob.glob(os.path.join(inputDir, '*.dmo'))
27 | origBinFiles.sort(key=lambda f: int(f.split('-')[-1].split('.')[0]))
28 | 
29 | newBinFiles = glob.glob(os.path.join(outputDir, '*.dmo'))
30 | origBinFiles.sort(key=lambda f: int(f.split('-')[-1].split('.')[0]))
31 | 
32 | newDemoBytes = b''
33 | 
34 | with open(outputDemoFile, 'wb') as f:
35 |     for file in origBinFiles:
36 |         if file.replace('bins', 'newBins') in newBinFiles:
37 |             file = file.replace('bins', 'newBins') 
38 |             basename = file.split("/")[-1].split(".")[0]
39 |             print(f'{basename}: Using new version of the demo...')
40 |         else:
41 |             basename = file.split("/")[-1].split(".")[0]
42 |             print(f'{basename}: Using old file...\r', end="")
43 |         demoBytes = open(file, 'rb')
44 |         newDemoBytes += demoBytes.read()
45 |         demoBytes.close()
46 |     f.write(newDemoBytes)
47 |     f.close()
48 | 
49 | print(f'{outputDemoFile} was written!')
50 | 
51 |             
52 | 


--------------------------------------------------------------------------------
/DemoTools/demoSplitter.py:
--------------------------------------------------------------------------------
 1 | import os, struct
 2 | # import progressbar, time
 3 | 
 4 | version = "usa"
 5 | disc = 1
 6 | filename = f"build-src/{version}-d{disc}/MGS/DEMO.DAT"
 7 | outputDir = f"workingFiles/{version}-d{disc}/demo/bins"
 8 | 
 9 | demoFile = open(filename, 'rb')
10 | demoData = demoFile.read()
11 | 
12 | offsets = []
13 | os.makedirs(outputDir, exist_ok=True)
14 | opening = b'\x10\x08\x00\x00'
15 | # opening = b'\x10\x08\x00\x00\x05\x00\x00\x00'
16 | 
17 | def findDemoOffsets():
18 |     offset = 0
19 |     while offset < len(demoData) - 8:
20 |         # print(f'We\'re at {offset}\n')
21 |         checkbytes = demoData[offset:offset + 4]
22 |         if checkbytes == opening:
23 |             print(f'Offset found at offset {offset}!')
24 |             offsets.append(offset)
25 |             offset += 2048 # All demo files are aligned to 0x800, SIGNIFICANTLY faster to do this than +8! Credit to Green Goblin
26 |         else:
27 |             offset += 2048
28 | 
29 |     print(f'Ending! {len(offsets)} offsets found:')
30 |     for offset in offsets:
31 |         print(offset.to_bytes(4, 'big').hex())
32 | 
33 | def splitDemoFiles():
34 |     i = 0
35 |     offsetFile = open(f'{outputDir}/demoOffsets.txt', 'w')
36 |     for i in range(len(offsets)):  
37 |         start = offsets[i] 
38 |         if i < len(offsets) - 1:
39 |             end = offsets[i + 1]
40 |         else:
41 |             end = len(demoData) 
42 |         f = open(f'{outputDir}/demo-{i + 1:02}.dmo', 'wb')
43 |         offsetFile.write(f'{i + 1:02}: {start:08x} - {end:08x}, length: {end - start}\n')
44 |         f.write(demoData[start:end])
45 |         f.close()
46 |         print(f'Demo {i + 1} written!')
47 |     
48 |     print(f'{len(offsets)} demo files written!')
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     findDemoOffsets()
53 |     splitDemoFiles()


--------------------------------------------------------------------------------
/DemoTools/demoTextExtractor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Adapted from Green Goblins scripts. 
  3 | This is really heavily based on his awesome work. 
  4 | 
  5 | # Script for working with Metal Gear Solid data
  6 | #
  7 | # Copyright (C) 2023 Green_goblin (https://mgsvm.blogspot.com/)
  8 | #
  9 | # Permission to use, copy, modify, and/or distribute this software for any
 10 | # purpose with or without fee is hereby granted, provided that the above
 11 | # copyright notice and this permission notice appear in all copies.
 12 | 
 13 | """
 14 | 
 15 | import os, sys
 16 | sys.path.append(os.path.abspath('./myScripts'))
 17 | import re
 18 | import glob
 19 | import struct
 20 | import progressbar
 21 | import translation.radioDict as RD
 22 | import json
 23 | 
 24 | demoScriptData: dict = {}
 25 | 
 26 | bar = progressbar.ProgressBar()
 27 | 
 28 | version = "usa"
 29 | version = "jpn"
 30 | disc = 1
 31 | 
 32 | # Create a directory to store the extracted texts
 33 | # Get the files from the folder directory
 34 | inputDir = f'workingFiles/{version}-d{disc}/demo/bins'
 35 | outputDir = f'workingFiles/{version}-d{disc}/demo/texts'
 36 | os.makedirs(outputDir, exist_ok=True)
 37 | outputJsonFile = f"workingFiles/{version}-d{disc}/demo/demoText-{version}.json"
 38 | 
 39 | # Grab all files in the directory and sort into order.
 40 | bin_files = glob.glob(os.path.join(inputDir, '*.dmo'))
 41 | bin_files.sort(key=lambda f: int(f.split('-')[-1].split('.')[0]))
 42 | 
 43 | # flags
 44 | debug = True
 45 | 
 46 | # List of files to skip (Ex: 005.bin does not contain texts)
 47 | skipFilesListD1 = [
 48 |     'demo-05',
 49 |     'demo-06',
 50 |     'demo-31',
 51 |     'demo-33',
 52 |     'demo-35',
 53 |     'demo-63',
 54 |     'demo-67',
 55 |     'demo-71',
 56 |     'demo-72',
 57 | ]
 58 | 
 59 | # Set up progress bar
 60 | bar.maxval = len(bin_files)
 61 | barCount = 0
 62 | bar.start()
 63 | 
 64 | # DEBUG
 65 | # if debug:
 66 | #     print(f'Only doing demo-1.bin!')
 67 |     # bin_files = [f'demoWorkingDir/{version}/bins/demo-25.bin']
 68 | 
 69 | def getTextHexes(textToAnalyze: bytes) -> tuple[list, bytes, list]: 
 70 |     """
 71 |     This just grabs all the text from each sector of the text area.
 72 |     We just grab the hex and return it. We also return the custom 
 73 |     character bytes at the end, which should always make a dictionary.
 74 |     """
 75 |     global debug
 76 |     
 77 |     #startingPoint = struct.unpack("<H", textToAnalyze[18:20])[0]
 78 |     
 79 |     segments = []
 80 |     # Coords = dict of Starting time, length to display
 81 |     coords = []
 82 |     # graphics are only for japanese vers. generally. init here so that we can pass back something even if no graphics found. 
 83 |     graphics = b'' 
 84 |     offset = 0
 85 | 
 86 |     # Search for the second pattern while looking for size pointers
 87 |     while offset < len(textToAnalyze):
 88 |         if debug:
 89 |             print(f'Offset: {offset}')
 90 |         # If loop to determine if we hit the last one. 
 91 |         if textToAnalyze[offset] == 0x00: # This is the last segment, always the same length? # TODO CLEAN THIS UP
 92 |             # All this nonsense finds the last segment since the length bytes are null.
 93 |             lastEnd = textToAnalyze.find(bytes.fromhex('00'), offset + 16)
 94 |             subset = textToAnalyze[offset: lastEnd]
 95 |             evenBytes = (4 - (len(subset) % 4))
 96 |             subset = textToAnalyze[offset: lastEnd + evenBytes]
 97 |             textSize = len(subset)
 98 |             # Get timings
 99 |             appearTime = struct.unpack("I", textToAnalyze[offset + 4: offset + 8])[0]
100 |             appearDuration = struct.unpack("I", textToAnalyze[offset + 8: offset + 12])[0]
101 |             coords.append(f'{appearTime},{appearDuration}')
102 | 
103 |             print(f'Final length = {textSize}') 
104 |             segments.append(textToAnalyze[offset + 16: offset + textSize])
105 |             graphics = textToAnalyze[offset + textSize: -4]
106 |             break
107 |         else:
108 |             # Extract the double byte value (little-endian) as a pointer to the size
109 |             textSize = struct.unpack('<H', textToAnalyze[offset:offset + 2])[0]
110 |             appearTime = struct.unpack("I", textToAnalyze[offset + 4: offset + 8])[0]
111 |             appearDuration = struct.unpack("I", textToAnalyze[offset + 8: offset + 12])[0]
112 |             dialogueBytes = textToAnalyze[offset + 16: offset + textSize]
113 | 
114 |         # Append the size pointer and its offset to the list
115 |         segments.append(dialogueBytes)
116 |         coords.append(f'{appearTime},{appearDuration}')
117 | 
118 |         # Move to the next size pointer
119 |         offset += textSize
120 | 
121 |     return segments, graphics, coords
122 | 
123 | def getTextAreaOffsets(demoData: bytes) -> list:
124 |     """
125 |     This is awful, but it should to a certain degree find demo offset spots.
126 |     If there's a better way to do this lmk, but it's not too inefficient. 
127 |     """
128 |     patternA = b"\x03..." + b"...\x00" + b"....\x10\x00" # Figured out the universal pattern. 
129 |     # 03 ?? ?? ?? ?? ?? ?? 00 ?? ?? ?? ?? 10 00 14 00 >> For IMHEX usage
130 |     # patternB = bytes.fromhex("FF FF FF 7F 10 00") 
131 |     # This is actually the indication a dialogue area runs to end of demo (until frame 0x7FFFFF)
132 | 
133 |     matches = re.finditer(patternA, demoData, re.DOTALL)
134 |     offsets = [match.start() for match in matches]
135 | 
136 |     finalMatches = []
137 |     for offset in offsets:
138 |         # Extract size of the area
139 |         length = struct.unpack('<H', demoData[offset + 1: offset + 3])[0]
140 |         
141 |         # This is just an alignment check. Last 4 should always be this constant.
142 |         bytesToCheck = demoData[offset + length : offset + 4 + length] # 4 bytes at head are included.
143 |         if bytesToCheck == bytes.fromhex("01 04 20 00"):
144 |             finalMatches.append(offset)
145 | 
146 |     return finalMatches
147 | 
148 | def getTextAreaBytes(offset, demoData):
149 |     """
150 |     Returns the data from that offset found in the amount we expect 
151 |     for processing. 
152 |     """
153 |     length = struct.unpack('<H', demoData[offset + 1: offset + 3])[0]
154 |     exBuffer = struct.unpack('<H', demoData[offset + 14: offset + 16])[0] # Japanese has extra data here ?
155 |     subset = demoData[offset + 4 + exBuffer: offset + 4 + length] # Includes the tail bytes 0x[01 04 20 00]
156 | 
157 |     return subset
158 | 
159 | def getDialogue(textHexes: list [bytes], graphicsData: bytes = None) -> list:
160 |     global debug
161 |     global filename
162 |     global version
163 |     
164 |     dialogue = []
165 | 
166 |     if graphicsData is not None and filename is not None:
167 |         demoDict = RD.makeCallDictionary(filename, graphicsData)
168 |     else:
169 |         demoDict = {}
170 | 
171 |     # Loop for all text, offsets, etc.
172 |     for dialogueHex in textHexes:
173 |             text = RD.translateJapaneseHex(dialogueHex, demoDict)
174 |             # text = text.encode(encoding='utf8', errors='ignore')
175 |             if debug:
176 |                 print(text)
177 |             text = text.replace('\x00', "")
178 |             dialogue.append(text)
179 |     return dialogue
180 | 
181 | def textToDict(dialogue: list) -> dict:
182 |     i = 1
183 |     textDict = {}
184 |     for text in dialogue:
185 |         textDict[f'{i:02}'] = text
186 |         i += 1
187 |     
188 |     return textDict
189 |             
190 | def writeTextToFile(filename: str, dialogue: list) -> None:
191 |     global debug
192 |     with open(filename, 'w', encoding='utf8') as f:
193 |         for text in dialogue:
194 |             f.write(f'{text}\n')
195 |         f.close()
196 | 
197 | def findOffsets(byteData: bytes, pattern: bytes) -> list:
198 |     """
199 |     Find patterns in the byte data. 
200 |     """
201 |     foundPatterns = []
202 |     offset = 0
203 |     while offset != -1:
204 |         offset = byteData.find(pattern, offset)
205 |         if offset != -1:
206 |             foundPatterns.append(pattern)
207 |     return foundPatterns
208 | 
209 | if __name__ == "__main__":
210 |     # Loop through each .bin file in the folder
211 |     for bin_file in bin_files:
212 |         # Skip files in the skip list
213 |         filename = os.path.basename(bin_file)
214 | 
215 |         # Manual override to skip certain demos
216 |         if filename in skipFilesListD1:
217 |             continue
218 | 
219 |         if debug:
220 |             print(f"Processing file: {bin_file}")
221 | 
222 |         # Open the binary file for reading in binary mode
223 |         with open(bin_file, 'rb') as binary_file:
224 |             demoData = binary_file.read()
225 |         
226 |         textOffsets = getTextAreaOffsets(demoData)
227 | 
228 |         print(f'{os.path.basename(bin_file)}: {textOffsets}')
229 | 
230 |         texts = []
231 |         timings = [] # list of timings (start time, duration)
232 |         timingCount = 1
233 | 
234 |         for offset in textOffsets:
235 |             subset = getTextAreaBytes(offset, demoData)
236 |             textHexes, graphicsBytes, coords = getTextHexes(subset)
237 |             texts.extend(getDialogue(textHexes, graphicsBytes))
238 |             timings.extend(coords)
239 |         
240 |         basename = filename.split('.')[0]
241 |         demoScriptData[basename] = [textToDict(texts), textToDict(timings)]
242 |         writeTextToFile(f'{outputDir}/{basename}.txt', texts)
243 |         # writeTextToFile(f'{outputDir}/{basename}-timings.txt', timings) 
244 |         
245 |     with open(outputJsonFile, 'w') as f:
246 |         f.write(json.dumps(demoScriptData, ensure_ascii=False))
247 |         f.close()


--------------------------------------------------------------------------------
/DemoTools/demoTextInjector.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Adapted from Green Goblins scripts. 
  3 | This is really heavily based on his awesome work. 
  4 | 
  5 | Script for working with Metal Gear Solid data
  6 | 
  7 | Copyright (C) 2023 Green_goblin (https://mgsvm.blogspot.com/)
  8 | 
  9 | Permission to use, copy, modify, and/or distribute this software for any
 10 | purpose with or without fee is hereby granted, provided that the above
 11 | copyright notice and this permission notice appear in all copies.
 12 | 
 13 | """
 14 | 
 15 | import os, sys
 16 | sys.path.append(os.path.abspath('./myScripts'))
 17 | import re
 18 | import glob
 19 | import struct
 20 | import progressbar
 21 | import translation.radioDict as RD
 22 | import json
 23 | 
 24 | import DemoTools.demoTextExtractor as DTE
 25 | from common.structs import subtitle
 26 | 
 27 | version = "usa"
 28 | version = "jpn"
 29 | disc = 1
 30 | 
 31 | # Toggles
 32 | debug = True
 33 | 
 34 | # Directory configs
 35 | inputDir = f'workingFiles/{version}-d{disc}/demo/bins'
 36 | outputDir = f'workingFiles/{version}-d{disc}/demo/newBins'
 37 | injectJson = f'build-proprietary/demo/demoText-{version}-undub.json'
 38 | os.makedirs(outputDir, exist_ok=True)
 39 | 
 40 | # Collect files to use
 41 | bin_files = glob.glob(os.path.join(inputDir, '*.dmo'))
 42 | bin_files.sort(key=lambda f: int(f.split('-')[-1].split('.')[0]))
 43 | 
 44 | # Collect source json to inject
 45 | injectTexts = json.load(open(injectJson, 'r'))
 46 | 
 47 | """class subtitle:
 48 |     text: str
 49 |     startFrame: int
 50 |     duration: int
 51 | 
 52 |     def __init__(self, dialogue, b, c) -> None:
 53 |         self.text = dialogue
 54 |         self.startFrame = int(b)
 55 |         self.duration = int(c)
 56 | 
 57 |         return
 58 |     
 59 |     def __str__(self) -> str:
 60 |         a = f'Subtitle contents: Start: {self.startFrame} Duration: {self.duration} Text: {self.text}'
 61 |         return a
 62 |     
 63 |     def __bytes__(self) -> bytes:
 64 |         
 65 |         # Simple. Encodes the dialogue as bytes. 
 66 |         # Adds the buffer we need to be divisible by 4...
 67 |         # Return the new bytes.
 68 |         
 69 |         subtitleBytes: bytes = struct.pack("III", self.startFrame, self.duration, 0)
 70 |         subtitleBytes += RD.encodeJapaneseHex(self.text)[0]
 71 |         bufferNeeded = 4 - (len(subtitleBytes) % 4)
 72 |         subtitleBytes += bytes(bufferNeeded)
 73 |         
 74 |         return subtitleBytes"""
 75 | 
 76 | def assembleTitles(texts: dict, timings: dict) -> list [subtitle]:
 77 |     subsList = []
 78 |     for i in range(len(texts)):
 79 |         index = "{:02}".format(i + 1)
 80 |         start = timings.get(index).split(",")[0]
 81 |         duration = timings.get(index).split(",")[1]
 82 |         a = subtitle(texts.get(index), start, duration)
 83 |         subsList.append(a)
 84 |     
 85 |     return subsList
 86 | """
 87 | # TODO:
 88 | - change key to int
 89 | - make sure range hits all texts
 90 | """
 91 | skipFilesListD1 = [
 92 |     'demo-05',
 93 |     'demo-06',
 94 |     'demo-31',
 95 |     'demo-33',
 96 |     'demo-35',
 97 |     'demo-63',
 98 |     'demo-67',
 99 |     'demo-71',
100 |     'demo-72',
101 | ]
102 | 
103 | def genSubBlock(subs: list [subtitle] ) -> bytes:
104 |     """
105 |     Injects the new text to the original data, returns the bytes. 
106 |     Also returns the index we were at when we finished. 
107 | 
108 |     """ 
109 |     newBlock = b''
110 |     for i in range(len(subs) -1):
111 |         length = struct.pack("I", len(bytes(subs[i])) + 4)
112 |         newBlock += length + bytes(subs[i])
113 |     
114 |     # Add the last one
115 |     newBlock += bytes(4) + bytes(subs[-1])
116 |     
117 |     return newBlock
118 | 
119 | def injectSubtitles(originalBinary: bytes, newTexts: dict, frameLimit: int = 1, timings: dict = None) -> bytes:
120 |     """
121 |     Injects the new text to the original data, returns the bytes. 
122 |     Also returns the index we were at when we finished. 
123 | 
124 |     New vers: Framelimit is the end of a cutscene segment.
125 |     """ 
126 | 
127 |     def encodeNewText(text: str, timing: str):
128 |         """
129 |         Simple. Encodes the dialogue as bytes. 
130 |         Adds the buffer we need to be divisible by 4...
131 |         Return the new bytes.
132 |         """
133 |         timings = int(timing.split(','))
134 |         start = timings[0]
135 |         duration = timings[1]
136 | 
137 |         subtitleBytes: bytes = struct.pack("III", start, duration, 0)
138 |         subtitleBytes += RD.encodeJapaneseHex(text)[0]
139 |         bufferNeeded = 4 - (len(subtitleBytes) % 4)
140 |         for j in range(bufferNeeded):
141 |             newBytes += b'\x00'
142 |             j += 1
143 |         
144 |         return subtitleBytes
145 |     
146 | 
147 |     
148 |     newBytes = b""
149 |     firstLengthBytes = originalBinary[18:20]
150 |     firstLength = struct.unpack('<H', firstLengthBytes)[0]
151 |     offset = 8 + firstLength # This is our starting point for the dialogue.
152 | 
153 |     newBytes += originalBinary[0: offset]
154 | 
155 |     # i = startingNum
156 |     while i <= len(newTexts):
157 |         start, duration = timings.get(f"{i}").split(",")
158 |         start = int(start)
159 |         duration = int(duration)
160 |         if originalBinary[offset] == 0x00:
161 |             # Find the length here (This is stupid!)
162 |             origTextData = originalBinary[offset: offset + originalBinary.find(b'\x00', offset + 16)] # We can add the buffer later
163 |             bufferNeeded = 4 - (len(origTextData) % 4)
164 |             origTextLength = len(origTextData) + bufferNeeded
165 |             origTextData = originalBinary[offset: offset + origTextLength]
166 | 
167 |             # Now create the new one.
168 |             newText = encodeNewText(newTexts[str(i)])
169 |             newBytes = newBytes + origTextData[0:4] + struct.pack("<I", start) + struct.pack("<I", duration) + origTextData[12:16] + newText
170 |             i += 1
171 |             offset += origTextLength
172 |             break
173 |         else:
174 |             origLength = originalBinary[offset]
175 |             origTextData = originalBinary[offset: offset + origLength]
176 |             origTextLength = len(origTextData)
177 |             # New Text
178 |             newText = encodeNewText(newTexts[str(i)])
179 |             newLength = len(newText) + 16
180 |             newBytes += newLength.to_bytes() + origTextData[1:4] + struct.pack("<I", start) + struct.pack("<I", duration) + origTextData[12:16] + newText
181 |         
182 |             i += 1
183 |             offset += origTextLength
184 | 
185 |     return newBytes
186 | 
187 | def getDemoDiagHeader(data: bytes) -> bytes:
188 |     """
189 |     Returns the header portion only for a given dialogue section.
190 |     """
191 |     headerLength = struct.unpack("H", data[14:16])[0] + 4
192 |     return data[:headerLength]
193 | 
194 | # if debug:
195 | #     print(f'Only injecting Demo 25!')
196 | #     bin_files = ['demoWorkingDir/usa/bins/demo-25.dmo']
197 | 
198 | if __name__ == "__main__":
199 |     """
200 |     Main logic is here.
201 |     """
202 |     for file in bin_files:
203 |         print(os.path.basename(f"{file}: "), end="")
204 |         filename = os.path.basename(file)
205 |         basename = filename.split(".")[0]
206 | 
207 |         # if debug:
208 |         #     print(f'Processing {basename}')
209 | 
210 |         if basename in skipFilesListD1:
211 |             if debug:
212 |                 print(f'{basename} in skip list. Continuing...         ')
213 |             continue
214 | 
215 |         # if injectTexts[basename] is None:
216 |         if basename not in injectTexts:
217 |             print(f'{basename} was not in the json. Skipping...\r', end="")
218 |             continue
219 |         
220 |         # Initialize the demo data and the dictionary we're using to replace it.
221 |         origDemoData = open(file, 'rb').read()
222 |         origBlocks = len(origDemoData) // 0x800 # Use this later to check we hit the same length!
223 |         demoDict: dict = injectTexts[basename][0]
224 |         demoTimings: dict = injectTexts[basename][1]
225 |         
226 |         subtitles = assembleTitles(demoDict, demoTimings)
227 | 
228 |         offsets = DTE.getTextAreaOffsets(origDemoData)
229 |         # nextStart = 1 # index of subtitle to encode. No longer needed.
230 |         newDemoData = origDemoData[0 : offsets[0]] # UNTIL the header
231 |         
232 |         for Num in range(len(offsets)):
233 |             oldHeader = getDemoDiagHeader(origDemoData[offsets[Num]:])
234 |             oldLength = struct.unpack("H", oldHeader[1:3])[0]
235 |             frameStart = struct.unpack("I", oldHeader[4:8])[0]
236 |             frameLimit = struct.unpack("I", oldHeader[8:12])[0]
237 |             # Get only subtitles in this section.
238 |             subsForSection = []
239 |             for sub in subtitles:
240 |                 if frameStart <= sub.startFrame < frameLimit:
241 |                     subsForSection.append(sub)
242 |             newSubBlock = genSubBlock(subsForSection) # TODO: CODE THIS DEF
243 |             newLength = len(oldHeader) + len(newSubBlock)
244 | 
245 |             newHeader = bytes.fromhex("03") + struct.pack("H", newLength) + bytes(1) + struct.pack("II", frameStart, frameLimit) + oldHeader[12:16] + struct.pack("I", len(oldHeader) + len(newSubBlock) - 4) + oldHeader[20:]
246 |             newDemoData += newHeader + newSubBlock
247 |             # Add the rest of the data from this to the next offset OR until end of original demo. 
248 |             if Num < len(offsets) - 1: # if it is NOT the last... 
249 |                 newDemoData += origDemoData[offsets[Num] + oldLength: offsets[Num + 1]]
250 |             else:
251 |                 newDemoData += origDemoData[offsets[Num] + oldLength: ]
252 |             # if debug:
253 |             #     print(newSubBlock.hex(sep=" ", bytes_per_sep=4))
254 |         
255 |         """# Buffer the demo to 0x800 block
256 |         if len(newDemoData) % 0x800 != 0:
257 |             if len(newDemoData) // 0x800 < len(origDemoData) // 0x800:
258 |                 newDemoData += bytes(len(newDemoData) % 0x800)
259 |             else:
260 |                 checkBytes = newDemoData[len(newDemoData) - len(origDemoData):]
261 |                 if checkBytes == bytes(len(checkBytes)):
262 |                     newDemoData = newDemoData[:len(newDemoData) - len(checkBytes)]"""
263 |         
264 |         # Adjust length to match original file.
265 |         if len(newDemoData) == len(origDemoData):
266 |             print("Alignment correct!")
267 |         elif len(newDemoData) < len(origDemoData): # new demo shorter
268 |             newDemoData += bytes(len(origDemoData) - len(newDemoData)) 
269 |             if len(newDemoData) % 0x800 == 0:
270 |                 print("Alignment correct!")
271 |         else:
272 |             checkBytes = newDemoData[len(newDemoData) - len(origDemoData):]
273 |             if checkBytes == bytes(len(checkBytes)):
274 |                 newDemoData = newDemoData[:len(newDemoData) - len(checkBytes)]
275 |             else:
276 |                 print(f'CRITICAL ERROR! New demo cannot be truncated to original length!')
277 |                 exit()
278 |         
279 |         newBlocks = len(newDemoData) // 0x800
280 |         if newBlocks != origBlocks:
281 |             print(f"{len(newDemoData)} / {len(origDemoData)}") 
282 |             print(f'BLOCK MISMATCH!\nNew data is {newBlocks} blocks, old was {origBlocks} blocks.\nTHERE COULD BE PROBLEMS IN RECOMPILE!!')
283 | 
284 |         # Finished work! Write the new file. 
285 |         newFile = open(f'{outputDir}/{basename}.dmo', 'wb')
286 |         newFile.write(newDemoData)
287 |         newFile.close()
288 |     print(f'New Demo Files have been injected!')
289 |     exit(0)


--------------------------------------------------------------------------------
/DemoTools/demoTextTesting.py:
--------------------------------------------------------------------------------
1 | import DemoTools.demoTextExtractor as DTE
2 | import re, struct
3 | 
4 | inputFile = 'demoWorkingDir/jpn/bins/demo-6.dmo'
5 | demoFile = open(inputFile, 'rb')
6 | demoData = demoFile.read()
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/Old vers/ExtractTextsV0.1.py:
--------------------------------------------------------------------------------
 1 | ##!/bin/python3
 2 | 
 3 | # Assumes RADIO.DAT for filename
 4 | 
 5 | import os
 6 | import struct
 7 | 
 8 | filename = "RADIO.DAT"
 9 | offset = 0
10 | 
11 | radioFile = open(filename, 'rb')
12 | 
13 | """
14 | freq = struct.unpack('>h', radioFile.read(2))
15 | 
16 | print(type(freq))
17 | if 14000 < freq[0] < 14300:    print(str(freq[0]) + f' is the first call')
18 | """
19 | 
20 | def checkIsFreq(checkByte):
21 |     global radioFile
22 |     bytes = struct.unpack('>h', checkByte)
23 |     freq = bytes[0] / 100
24 |     print(freq)
25 |     if 140 < freq < 143:
26 |         return True
27 |     else:
28 |         return False
29 | 
30 | checkByte = radioFile.read(2)
31 | offset += 2
32 | if checkIsFreq(checkByte):
33 |     callHeader = radioFile.read(10)
34 |     offset += 10
35 |     # Perform 3 additional operations
36 |     unk0 = callHeader[0:2]
37 |     unk1 = callHeader[2:4]
38 |     unk2 = callHeader[4:6]
39 |     print(callHeader)
40 |     print(unk0)
41 |     print(unk1)
42 |     print(unk2)
43 |     # Optional check that unk2 is always 0x00 0x00 ?
44 |     buffer = callHeader[7:9]
45 |     length = struct.unpack('>h', radioFile.read(offset))
46 |     print(length[0]) # this is the length we need to pull next 
47 |     
48 | 
49 |     
50 | 
51 | 


--------------------------------------------------------------------------------
/Old vers/ExtractTextsV0.2.py:
--------------------------------------------------------------------------------
  1 | ##!/bin/python3
  2 | 
  3 | # Assumes RADIO.DAT for filename
  4 | """
  5 | At this point we're just ensuring that each call has a correct length variable at the 9th byte
  6 | 
  7 | """
  8 | 
  9 | import os
 10 | import struct
 11 | 
 12 | filename = "RADIO-usa.DAT"
 13 | #filename = "RADIO-jpn.DAT"
 14 | 
 15 | offset = 0
 16 | # offset = 293536 # Freq 140.85
 17 | 
 18 | radioFile = open(filename, 'rb')
 19 | 
 20 | radioData = radioFile.read()
 21 | offset = 0
 22 | fileSize = radioData.__len__()
 23 | 
 24 | # print(fileSize) 1776859! 
 25 | 
 26 | def getFreq(offsetCheck):
 27 |     global radioData
 28 |     global radioFile
 29 |     radioFile.seek(offsetCheck)
 30 |     bytes = radioFile.read(2)
 31 |     freq = struct.unpack('>h', bytes)
 32 |     return freq[0] / 100
 33 | 
 34 | def getCallLength(offset):
 35 |     global radioFile
 36 |     radioFile.seek(offset + 9)
 37 |     lengthBytes = radioFile.read(2)
 38 |     lengthT = struct.unpack('>h', lengthBytes)
 39 |     return lengthT[0]
 40 | 
 41 | 
 42 | # Right now this iterates how many calls match a pattern before this breaks
 43 | while offset < fileSize:
 44 |     if offset == fileSize:
 45 |         print("Offset and fileSize match!!!")
 46 |         break
 47 |     
 48 |     i = getFreq(offset)
 49 |     length = getCallLength(offset)
 50 | 
 51 |     if 140 < i < 143:
 52 |         print(f'Call from {i} found! Offset is {hex(offset)}')
 53 |         offset += length + 9
 54 |     else:
 55 |         print(f"Something went wrong at offset {hex(offset)}!\nWe did not find a call!")
 56 |         byteTup = struct.unpack('s', radioFile.read(1))
 57 |         command = byteTup[0]
 58 |         print(command)
 59 |         offset += length + 9 + 36
 60 |     
 61 |     
 62 |     print(hex(offset)) 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | """
 68 | Going specifically by call won't work... let's try going by command one at a time. 
 69 | """
 70 | 
 71 | 
 72 | 
 73 | """
 74 | freq = struct.unpack('>h', radioFile.read(2))
 75 | 
 76 | print(type(freq))
 77 | if 14000 < freq[0] < 14300:    print(str(freq[0]) + f' is the first call')
 78 | 
 79 | 
 80 | def checkIsFreq(checkByte):
 81 |     global radioFile
 82 |     bytes = struct.unpack('>h', checkByte)
 83 |     freq = bytes[0] / 100
 84 |     print(freq)
 85 |     if 140 < freq < 143:
 86 |         return True
 87 |     else:
 88 |         return False
 89 | 
 90 | checkByte = radioFile.read(2)
 91 | offset += 2
 92 | if checkIsFreq(checkByte):
 93 |     callHeader = radioFile.read(10)
 94 |     offset += 10
 95 |     # Perform 3 additional operations
 96 |     unk0 = callHeader[0:2]
 97 |     unk1 = callHeader[2:4]
 98 |     unk2 = callHeader[4:6]
 99 |     print(callHeader)
100 |     print(unk0)
101 |     print(unk1)
102 |     print(unk2)
103 |     # Optional check that unk2 is always 0x00 0x00 ?
104 |     buffer = callHeader[7:9]
105 |     length = struct.unpack('>h', buffer)
106 |     print(length[0]) # this is the length we need to pull next 
107 |     
108 | 
109 | """
110 | 
111 | 


--------------------------------------------------------------------------------
/Old vers/ExtractTextsV0.3..py:
--------------------------------------------------------------------------------
  1 | ##!/bin/python3
  2 | 
  3 | # Assumes RADIO.DAT for filename
  4 | """
  5 | We can't get all the way through, so let's try parsing some calls.
  6 | """
  7 | 
  8 | import os
  9 | import struct
 10 | 
 11 | filename = "RADIO-usa.DAT"
 12 | #filename = "RADIO-jpn.DAT"
 13 | 
 14 | offset = 0
 15 | # offset = 293536 # Freq 140.85
 16 | 
 17 | radioFile = open(filename, 'rb')
 18 | output = open("output.txt", '+a')
 19 | 
 20 | offset = 0
 21 | fileSize = radioData.__len__()
 22 | 
 23 | # print(fileSize) # Result is 1776859! 
 24 | 
 25 | def checkFreq(offsetCheck):
 26 |     global radioFile
 27 |     radioFile.seek(offsetCheck)
 28 |     bytes = radioFile.read(2)
 29 |     freq = struct.unpack('>h', bytes)
 30 |     if 14000 < freq[0] < 14300:
 31 |         return True
 32 |     else: 
 33 |         return False
 34 | 
 35 | def getFreq(offsetCheck):
 36 |     global radioFile
 37 |     radioFile.seek(offsetCheck)
 38 |     bytes = radioFile.read(2)
 39 |     freq = struct.unpack('>h', bytes)
 40 |     return freq[0] / 100
 41 | 
 42 | def getCallLength(offset):
 43 |     global radioFile
 44 |     radioFile.seek(offset + 9) # Call length is after 8 bytes, then 0x80, then the length of the script?
 45 | 
 46 |     lengthBytes = radioFile.read(2)
 47 |     lengthT = struct.unpack('>h', lengthBytes)
 48 |     return lengthT[0]
 49 | 
 50 | 
 51 | # Right now this iterates how many calls match a pattern before this breaks
 52 | 
 53 | """
 54 | def checkCalls():
 55 |     global offset
 56 |     global fileSize
 57 |     
 58 |     while offset < fileSize:
 59 |         
 60 |         
 61 |         i = getFreq(offset)
 62 |         length = getCallLength(offset)
 63 | 
 64 |         if 140 < i < 143:
 65 |             print(f'Call from {i} found! Offset is {hex(offset)}')
 66 |             offset += length + 9
 67 |         else:
 68 |             print(f"Something went wrong at offset {hex(offset)}!\nWe did not find a call!")
 69 |             byteTup = struct.unpack('s', radioFile.read(1))
 70 |             command = byteTup[0]
 71 |             print(hex(command))
 72 |         
 73 |         
 74 |         print(hex(offset)) 
 75 |     return
 76 | """
 77 | 
 78 | def getBytesAtOffset(offset):
 79 |     global radioFile
 80 |     radioFile.seek(offset)
 81 |     byte = radioFile.read(1)
 82 |     return byte
 83 | 
 84 | def handleCall(offsetCheck): # Assume call is just an 8 byte header for now
 85 |     global radioFile
 86 |     global output
 87 |     radioFile.seek(offset)
 88 |     header = radioFile.read(8)
 89 | 
 90 |     # Separate the header
 91 |     Freq = header[0:2]
 92 |     unk0 = header[2:4]
 93 |     unk1 = header[4:6]
 94 |     unk2 = header[6:8]
 95 |     output.write(f'Call Header: {Freq}, {unk0}, {unk1}, {unk2} ')
 96 |                                  
 97 |     return
 98 | 
 99 | def handleCommand(offsetCheck):
100 |     global radioFile
101 |     global output
102 | 
103 |     output.write(f'Handling the command...\n')
104 |     radioFile.seek(offsetCheck)
105 |     commandByte = radioFile.read(1)
106 |     command = commandByte.hex()
107 |     output.write(f'command is {command}\n')
108 | 
109 | 
110 |     match command: 
111 |         case b'\x31':
112 |             return "Switch Op?\n"
113 |         case _:
114 |             return "UNKNOWN!\n"
115 | 
116 | while offset < fileSize:
117 |     if offset == fileSize:
118 |             print("Offset and fileSize match!!!\n END PROGRAM")
119 |             break
120 |     if checkFreq(offset):
121 |         freq = getFreq(offset)
122 |         print(f"Call found! Frequency is {freq}\n")
123 |         output.write(f'Call {freq}')
124 |         handleCall(offset)
125 |         offset += 8
126 |     else:
127 |         byte = getBytesAtOffset(offset)
128 |         thisCommand = commandToEnglish(byte)
129 |         print(thisCommand + " is the command to handle with value: " + str(byte))
130 |         output.write(f'Command is {handleCommand}')
131 |         commandToEnglish(byte)
132 |         handleCommand(offset)
133 |         offset += 1
134 |         break
135 | 
136 | 
137 | output.close()


--------------------------------------------------------------------------------
/Old vers/ExtractTextsV0.3.5 copy.py:
--------------------------------------------------------------------------------
 1 | #!/bin/python
 2 | 
 3 | # Assumes RADIO.DAT for filename
 4 | """
 5 | We can't get all the way through, so let's try parsing some calls.
 6 | 
 7 | Switching commands as I go to use the radioData as that would be in memory...
 8 | """
 9 | 
10 | import os
11 | import struct
12 | 
13 | filename = "RADIO-usa.DAT"
14 | #filename = "RADIO-jpn.DAT"
15 | 
16 | offset = 0
17 | # offset = 293536 # Freq 140.85
18 | 
19 | radioFile = open(filename, 'rb')
20 | output = open("output.txt", '+a')
21 | 
22 | offset = 0
23 | radioData = radioFile.read()
24 | fileSize = radioData.__len__()
25 | 
26 | Header = radioData[ offset : offset + 8]
27 | print(type(Header))
28 | print(Header)
29 | 
30 | freq = struct.unpack('>h', Header[0:2])[0]
31 | print(freq)
32 | print(hex(freq))
33 | 
34 | command = b'\x80'
35 | hex = command.decode('utf-8','')
36 | print(f'Command: {command}, hex = {hex}')


--------------------------------------------------------------------------------
/Old vers/ExtractTextsV0.3.5.py:
--------------------------------------------------------------------------------
  1 | #!/bin/python
  2 | 
  3 | # Assumes RADIO.DAT for filename
  4 | """
  5 | We can't get all the way through, so let's try parsing some calls.
  6 | """
  7 | 
  8 | import os
  9 | import struct
 10 | 
 11 | filename = "RADIO-usa.DAT"
 12 | #filename = "RADIO-jpn.DAT"
 13 | 
 14 | offset = 0
 15 | # offset = 293536 # Freq 140.85
 16 | 
 17 | radioFile = open(filename, 'rb')
 18 | output = open("output.txt", 'w')
 19 | 
 20 | offset = 0
 21 | radioData = radioFile.read() # The byte stream is better to use than the file on disk if you can. 
 22 | fileSize = radioData.__len__()
 23 | 
 24 | 
 25 | 
 26 | commandNamesEng = {b'\x01':'SUBTITLE', b'\x02':'VOX_CUES', b'\x03':'ANI_FACE', b'\x04':'ADD_FREQ',
 27 |                 b'\x05':'MEM_SAVE', b'\x06':'AUD_CUES', b'\x07':'ASK_USER', b'\x08':'SAVEGAME',
 28 |                 b'\x10':'IF_CHECK', b'\x11':'ELSE', b'\x12':'ELSE_IFS', b'\x30':'SWITCH',
 29 |                 b'\x31':'SWITCHOP', b'\x80':'GCL_SCPT', b'\xFF':'ANIMATION', b'\x00':'NULL' 
 30 | }
 31 | 
 32 | def commandToEnglish(hex):
 33 |     try: 
 34 |         commandNamesEng[hex]
 35 |         return commandNamesEng[hex]
 36 |     except:
 37 |         return "BYTE WAS NOT DEFINED!!!!" 
 38 | 
 39 | 
 40 | # print(fileSize) # Result is 1776859! 
 41 | 
 42 | def checkFreq(offsetCheck):
 43 |     # Checks if the next two bytes are a codec number or not.
 44 |     global radioData
 45 |     freq = struct.unpack('>h', radioData[ offset : offset + 2])[0] # INT from two bytes
 46 | 
 47 |     if 14000 < freq < 14300:
 48 |         return True
 49 |     else: 
 50 |         return False
 51 | 
 52 | def getFreq(offsetCheck):
 53 |     global radioFile
 54 | 
 55 |     radioFile.seek(offsetCheck)
 56 |     bytes = radioFile.read(2)
 57 | 
 58 |     freq = struct.unpack('>h', radioData[ offset : offset + 2])[0]
 59 |     return freq / 100
 60 | 
 61 | def getCallLength(offset):
 62 |     global radioFile
 63 |     radioFile.seek(offset + 9) # Call length is after 8 bytes, then 0x80, then the length of the script?
 64 | 
 65 |     lengthBytes = radioFile.read(2)
 66 |     lengthT = struct.unpack('>h', lengthBytes)
 67 |     return lengthT[0]
 68 | 
 69 | 
 70 | def getBytesAtOffset(offset):
 71 |     global radioFile
 72 |     radioFile.seek(offset)
 73 |     byte = radioFile.read(1)
 74 |     return byte
 75 | 
 76 | def handleCall(offsetCheck): # Assume call is just an 8 byte header for now
 77 |     global radioFile
 78 |     global output
 79 |     radioFile.seek(offset)
 80 |     header = radioFile.read(8)
 81 | 
 82 |     # Separate the header
 83 |     Freq = header[0:2]
 84 |     unk0 = header[2:4]
 85 |     unk1 = header[4:6]
 86 |     unk2 = header[6:8]
 87 |     output.write(f'Call Header: {Freq}, {unk0}, {unk1}, {unk2} \n')
 88 |     return
 89 | 
 90 | def handleCommand(offsetCheck):
 91 |     global radioFile
 92 |     global output
 93 | 
 94 |     output.write(f'Handling the command...\n')
 95 |     commandByte = radioData[offsetCheck].to_bytes()
 96 |     output.write(f'command is {commandByte}\n')
 97 |     
 98 |     match commandByte:
 99 |         case b'\x80':
100 |             offsetCheck += 1
101 |             length = struct.unpack('>h', radioData[ offsetCheck : offsetCheck + 2])[0]
102 |             output.write(f'Length of command is {length}\n')
103 |             
104 |             return length + 1
105 |         case _:
106 |             return 8
107 | 
108 | 
109 | while offset < fileSize:
110 |     offsetHex = hex(offset)
111 |     output.write(f'Loop start! Offset is currently {offset} or {offsetHex}\n')
112 |     if offset == fileSize:
113 |             print("Offset and fileSize match!!!\n END PROGRAM")
114 |             break
115 |     if checkFreq(offset):
116 |         freq = getFreq(offset)
117 |         print(f"Call found! Frequency is {freq}\n")
118 |         handleCall(offset)
119 |         offset += 8
120 |     else:
121 |         byte = getBytesAtOffset(offset)
122 |         thisCommand = commandToEnglish(byte)
123 |         print(thisCommand + " is the command to handle with value: " + str(byte))
124 |         byteInt = byte[0]
125 | 
126 | 
127 |         commandToEnglish(byte)
128 |         length = handleCommand(offset)
129 |         offset += length 
130 | 
131 | 
132 | output.close()


--------------------------------------------------------------------------------
/Old vers/ExtractTextsV0.3.6.py:
--------------------------------------------------------------------------------
  1 | #!/bin/python
  2 | 
  3 | # Assumes RADIO.DAT for filename
  4 | """
  5 | We can't get all the way through, so let's try parsing some calls.
  6 | 
  7 | v0.3.6: Adding a "Chunk pull" and "chunk analyzer"
  8 | """
  9 | 
 10 | 
 11 | import os
 12 | import struct
 13 | 
 14 | filename = "/Users/solidmixer/projects/mgs1-undub/RADIO-usa.DAT"
 15 | #filename = "RADIO-jpn.DAT"
 16 | 
 17 | offset = 0
 18 | # offset = 293536 # Freq 140.85
 19 | 
 20 | radioFile = open(filename, 'rb')
 21 | output = open("output.txt", 'w')
 22 | 
 23 | offset = 0
 24 | radioData = radioFile.read() # The byte stream is better to use than the file on disk if you can. 
 25 | fileSize = radioData.__len__()
 26 | 
 27 | 
 28 | 
 29 | commandNamesEng = {b'\x01':'SUBTITLE', b'\x02':'VOX_CUES', b'\x03':'ANI_FACE', b'\x04':'ADD_FREQ',
 30 |                 b'\x05':'MEM_SAVE', b'\x06':'AUD_CUES', b'\x07':'ASK_USER', b'\x08':'SAVEGAME',
 31 |                 b'\x10':'IF_CHECK', b'\x11':'ELSE', b'\x12':'ELSE_IFS', b'\x30':'SWITCH',
 32 |                 b'\x31':'SWITCHOP', b'\x80':'GCL_SCPT', b'\xFF':'ANIMATION', b'\x00':'NULL' 
 33 | }
 34 | 
 35 | def commandToEnglish(hex):
 36 |     try: 
 37 |         commandNamesEng[hex]
 38 |         return commandNamesEng[hex]
 39 |     except:
 40 |         return "BYTE WAS NOT DEFINED!!!!" 
 41 | 
 42 | 
 43 | # print(fileSize) # Result is 1776859! 
 44 | 
 45 | def checkFreq(offsetCheck): # Checks if the next two bytes are a codec number or not. Returns True or False.
 46 |     global radioData
 47 |     freq = struct.unpack('>h', radioData[ offset : offset + 2])[0] # INT from two bytes
 48 | 
 49 |     if 14000 < freq < 14300:
 50 |         return True
 51 |     else: 
 52 |         return False
 53 | 
 54 | def getFreq(offsetCheck): # If freq is at offset, return frequency as 140.15
 55 |     global radioFile
 56 | 
 57 |     radioFile.seek(offsetCheck)
 58 |     bytes = radioFile.read(2)
 59 | 
 60 |     freq = struct.unpack('>h', radioData[ offset : offset + 2])[0]
 61 |     return freq / 100
 62 | 
 63 | def getCallLength(offset): # Returns the length of the call, offset must be at the freq bytes
 64 |     global radioFile
 65 |     radioFile.seek(offset + 9) # Call length is after 8 bytes, then 0x80, then the length of the script in 2x bytes, then FF
 66 | 
 67 |     lengthBytes = radioFile.read(2)
 68 |     lengthT = struct.unpack('>h', lengthBytes)
 69 |     return lengthT[0]
 70 | 
 71 | def getLength(offsetCheck): # Returns the length of the command, offset must be at the freq bytes
 72 |     global radioData
 73 |     
 74 |     lengthBytes = radioData[offsetCheck + 1: offsetCheck + 3]
 75 |     lengthT = struct.unpack('>H', lengthBytes)[0]
 76 |     return lengthT
 77 | 
 78 | def getByteAtOffset(offsetCheck): # Returns a single byte, probably redundant
 79 |     global radioData
 80 |     return radioData[offsetCheck]
 81 | 
 82 | def handleCallHeader(offsetCheck): # Assume call is just an 8 byte header for now
 83 |     global radioFile
 84 |     global output
 85 |     radioFile.seek(offset)
 86 |     header = radioFile.read(12)
 87 | 
 88 |     # Separate the header
 89 |     Freq = header[0:2]
 90 |     unk0 = header[2:4]
 91 |     unk1 = header[4:6]
 92 |     unk2 = header[6:8]
 93 | 
 94 |     if header[8:9] == b'\x80':
 95 |         callLength = header[9:11]
 96 |         numBytes = struct.unpack('>h', callLength)
 97 |     else:
 98 |         output.write(f'ERROR AT HEX {callLength}! ')
 99 | 
100 |     # Quick check we ended with an FF
101 |     if header[11] == b'\xFF':
102 |         output.write('Call intro nded with FF successfully\n')
103 |     else:
104 |         output.write(f'Call header DID NOT end in FF! Check hex at {callLength}')
105 | 
106 |     output.write(f'Call Header: {Freq}, {unk0}, {unk1}, {unk2}, Call is {numBytes[0]} bytes long, hex {callLength}:\n')
107 |     return
108 | 
109 | def handleCommand(offsetCheck): # We get through the file! But needs refinement... We're not ending evenly and lengths are too long. 
110 |     # global radioFile
111 |     global radioData
112 |     global output
113 | 
114 |     output.write(f'Handling the command... ')
115 |     commandByte = radioData[offsetCheck] #.to_bytes()?
116 |     output.write(f'Command is {commandByte}\n')
117 | 
118 |     if commandByte == b'\x00':
119 |         return 1
120 | 
121 |     length = getLength(offsetCheck)
122 |     output.write(f'Length of command is {length}\n')
123 |     commandBytes = radioData[offset : offset + length + 2]
124 |     print(commandByte, ": Offset: ", offsetCheck, " // Content: ", commandBytes, end="\n\n")
125 |     return length + 2
126 |     """
127 |     match commandByte:
128 |         case b'\x80':
129 |             offsetCheck += 1
130 |             length = getLength(offsetCheck)
131 |             output.write(f'Length of command is {length}\n')
132 |             commandBytes = radioData[offset:offset + length + 1]
133 |             print(commandBytes, end="\n")
134 |             return length + 1
135 |         case _:
136 |             return 8 #  We'll hope whatever we run into is just 8 bytes long. """
137 | 
138 | def getChunk(offsetCheck): # THIS IS NOT RETURNING A SUBSET OF THE BYTES! WTF!
139 |     global radioFile
140 |     global fileSize
141 | 
142 |     start = offsetCheck
143 |     radioFile.seek(offsetCheck)
144 |     for byte in radioFile.read():
145 |         if byte == '\xFF':
146 |             end = offsetCheck
147 |             return radioData[start : end +1]
148 |         else:
149 |             offsetCheck += 1
150 |     return b'\x00'
151 | 
152 | 
153 | while offset < fileSize:
154 |     offsetHex = hex(offset)
155 |     output.write(f'Loop start! Offset is currently {offset} or {offsetHex}\n')
156 |     if offset == fileSize:
157 |         print("Offset and fileSize match!!!\n END PROGRAM")
158 |         break
159 |     if checkFreq(offset):
160 |         freq = getFreq(offset)
161 |         output.write(f"Call found! Frequency is {freq}\n")
162 |         callLength = getCallLength(offset)
163 |         output.write(f'Call is {callLength} bytes long')
164 |         handleCallHeader(offset)
165 |         offset += 12
166 |     else:
167 |         # phrase = getChunk(offset)
168 |         offset += handleCommand(offset)
169 | 
170 | 
171 | # Close output file
172 | output.close()


--------------------------------------------------------------------------------
/Old vers/ExtractTextsV0.3.8 Line by line.py:
--------------------------------------------------------------------------------
  1 | #!/bin/python
  2 | 
  3 | # Assumes RADIO.DAT for filename
  4 | """
  5 | We can't get all the way through, so let's try parsing some calls.
  6 | 
  7 | v0.3.6: Adding a "Chunk pull" and "chunk analyzer"
  8 | """
  9 | 
 10 | 
 11 | import os, struct, re
 12 | 
 13 | filename = "/home/solidmixer/projects/mgs1-undub/RADIO-usa.DAT"
 14 | #filename = "RADIO-jpn.DAT"
 15 | 
 16 | offset = 0
 17 | # offset = 293536 # Freq 140.85
 18 | 
 19 | radioFile = open(filename, 'rb')
 20 | output = open("output.txt", 'w')
 21 | 
 22 | offset = 0
 23 | radioData = radioFile.read() # The byte stream is better to use than the file on disk if you can. 
 24 | fileSize = radioData.__len__()
 25 | 
 26 | 
 27 | 
 28 | commandNamesEng = {b'\x01':'SUBTITLE', b'\x02':'VOX_CUES', b'\x03':'ANI_FACE', b'\x04':'ADD_FREQ',
 29 |                 b'\x05':'MEM_SAVE', b'\x06':'AUD_CUES', b'\x07':'ASK_USER', b'\x08':'SAVEGAME',
 30 |                 b'\x10':'IF_CHECK', b'\x11':'ELSE', b'\x12':'ELSE_IFS', b'\x30':'SWITCH',
 31 |                 b'\x31':'SWITCHOP', b'\x80':'GCL_SCPT', b'\xFF':'END_LINE', b'\x00':'NULL' 
 32 | }
 33 | 
 34 | def commandToEnglish(hex):
 35 |     try: 
 36 |         commandNamesEng[hex]
 37 |         return commandNamesEng[hex]
 38 |     except:
 39 |         return "BYTE WAS NOT DEFINED!!!!" 
 40 | 
 41 | 
 42 | # print(fileSize) # Result is 1776859! 
 43 | 
 44 | def checkFreq(offsetCheck): # Checks if the next two bytes are a codec number or not. Returns True or False.
 45 |     global radioData
 46 |     freq = struct.unpack('>h', radioData[ offset : offset + 2])[0] # INT from two bytes
 47 | 
 48 |     if 14000 < freq < 14200:
 49 |         return True
 50 |     else: 
 51 |         return False
 52 | 
 53 | def getFreq(offsetCheck): # If freq is at offset, return frequency as 140.15
 54 |     global radioFile
 55 | 
 56 |     radioFile.seek(offsetCheck)
 57 |     bytes = radioFile.read(2)
 58 | 
 59 |     freq = struct.unpack('>h', radioData[ offset : offset + 2])[0]
 60 |     return freq / 100
 61 | 
 62 | def getCallLength(offset): # Returns the length of the call, offset must be at the freq bytes
 63 |     global radioFile
 64 |     radioFile.seek(offset + 9) # Call length is after 8 bytes, then 0x80, then the length of the script in 2x bytes, then FF
 65 | 
 66 |     lengthBytes = radioFile.read(2)
 67 |     lengthT = struct.unpack('>h', lengthBytes)[0]
 68 |     return lengthT
 69 | 
 70 | def getLength(offsetCheck): # Returns the length of the command, offset must be at the freq bytes
 71 |     global radioData
 72 |     
 73 |     lengthBytes = radioData[offsetCheck + 1: offsetCheck + 3]
 74 |     lengthT = struct.unpack('>H', lengthBytes)[0]
 75 |     return lengthT
 76 | 
 77 | def getByteAtOffset(offsetCheck): # Returns a single byte, probably redundant
 78 |     global radioData
 79 |     return radioData[offsetCheck]
 80 | 
 81 | def handleCallHeader(offsetCheck): # Assume call is just an 8 byte header for now
 82 |     global radioData
 83 |     global output
 84 |     header = radioData[offset: offset + 12 ]
 85 | 
 86 |     # Separate the header
 87 |     Freq = header[0:2]
 88 |     unk0 = header[2:4]
 89 |     unk1 = header[4:6]
 90 |     unk2 = header[6:8]
 91 |     callLength = header[9:11]
 92 |     numBytes = 0
 93 | 
 94 |     if header[8].to_bytes() == b'\x80':
 95 |         numBytes = struct.unpack('>h', callLength)[0]
 96 |     else:
 97 |         output.write(f'ERROR AT byte {offset}! Call length is reading as {numBytes} \n')
 98 | 
 99 |     # Quick check we ended with an FF
100 |     if header[11].to_bytes() == b'\xff': 
101 |         output.write('Call intro ended with FF successfully\n')
102 |     else:
103 |         output.write(f'Call header DID NOT end in FF! Check hex at {offset + 11}')
104 | 
105 |     output.write(f'Call Header: {Freq}, {unk0}, {unk1}, {unk2}, Call is {numBytes} bytes long, hex {callLength}:\n')
106 |     return
107 | 
108 | def handleCommand(offsetCheck): # We get through the file! But needs refinement... We're not ending evenly and lengths are too long. 
109 |     # global radioFile
110 |     global radioData
111 |     global output
112 |     commandByte = radioData[offsetCheck].to_bytes()
113 |     
114 |     match commandByte:
115 |         case b'\x00': # AKA A null
116 |             output.write('NULL!\n')
117 |             return offsetCheck + 1
118 |         case b'\x01':
119 |             output.write('Dialogue! -- ')
120 |             length = getLength(offsetCheck)
121 |             while radioData[offsetCheck + length + 1].to_bytes() != b'\xff':
122 |                 output.write('We have a long one! Length is not FF, adding 1...\n')
123 |                 length += 1
124 |             line = radioData[offsetCheck: offsetCheck + length + 3]
125 |             unk1 = line[3:5]
126 |             unk2 = line[5:7]
127 |             unk3 = line[7:9]
128 |             dialogue = line[9: length + 1]
129 |             # output.write(f'Last byte in line is {line[length + 1].to_bytes()}\n') ## Should always end in FF!
130 |             
131 |             if b'\x80\x23\x80\x4e' in dialogue:
132 |                 dialogue = dialogue.replace(b'\x80\x23\x80\x4e', b'\x5c\x72\x5c\x6e')
133 |                 output.write('Dialogue new line replaced! \n')
134 | 
135 |             writeToFile = f'Length (int) = {length}, UNK1 = {unk1.hex()}, UNK2 = {unk2.hex()}, UNK3 = {unk3.hex()}, Text: {str(dialogue)}\n'
136 |             output.write(writeToFile)
137 |             return offsetCheck + length + 2
138 |         case _:
139 |             output.write('Command is not cased! -- ')
140 |             start = offset 
141 |             while radioData[offsetCheck].to_bytes() != b'\xFF':
142 |                 offsetCheck += 1
143 |             line = radioData[start : offsetCheck + 1]
144 |             writeToFile = str(commandByte) + ": Offset: " + str(offsetCheck) + " // Content: " + str(line.hex()) + "\n\n"
145 |             output.write(writeToFile)
146 |             return offsetCheck + 1 
147 |         
148 |     """
149 |     match commandByte:
150 |         case b'\x80':
151 |             offsetCheck += 1
152 |             length = getLength(offsetCheck)
153 |             output.write(f'Length of command is {length}\n')
154 |             commandBytes = radioData[offset:offset + length + 1]
155 |             print(commandBytes, end="\n")
156 |             return length + 1global output
157 |         case _:
158 |             return 8 #  We'll hope whatever we run into is just 8 bytes long. """
159 | 
160 | """
161 | def getChunk(offsetCheck): # THIS IS NOT RETURNING A SUBSET OF THE BYTES! WTF!
162 |     global radioFile
163 |     global fileSize
164 | 
165 |     start = offsetCheck
166 |     radioFile.seek(offsetCheck)
167 |     for byte in radioFile.read():
168 |         if byte == '\xFF':
169 |             end = offsetCheck
170 |             return radioData[start : end +1]
171 |         else:
172 |             offsetCheck += 1
173 |     return b'\x00'
174 | """
175 | 
176 | while offset < fileSize:
177 |     offsetHex = hex(offset)
178 |     perc = offset / fileSize * 100
179 |     print(f'We are at {perc}% through the file')
180 |     if offset >= fileSize - 1:
181 |         print("Reached end of file!!!\n END PROGRAM")
182 |         break
183 |     if checkFreq(offset):
184 |         freq = getFreq(offset)
185 |         output.write(f"Call found! Frequency is {freq}\n")
186 |         callLength = getCallLength(offset)
187 |         output.write(f'Call is {callLength} bytes long')
188 |         handleCallHeader(offset)
189 |         offset += 12
190 |         start = offset
191 |     else:
192 |         offset = handleCommand(offset)
193 |         """
194 |     else:
195 |         if radioData[offset].to_bytes() == b'\x80': # Expressing FF as a byte string wasnt working :|
196 |             output.write("We matched an FF\n")
197 |             line = radioData[start : offset + 1]
198 |             output.write(line.hex())
199 |             output.write('\n')
200 |             print('Wrote line to file!\n')
201 |             offset += 1
202 |             start = offset
203 |         else:
204 |             offset += 1
205 | """
206 | # Close output file
207 | output.close()


--------------------------------------------------------------------------------
/Old vers/ExtractTextsV0.3.9.py:
--------------------------------------------------------------------------------
  1 | #!/bin/python
  2 | 
  3 | # Assumes RADIO.DAT for filename
  4 | 
  5 | """
  6 | We can't get all the way through, so let's try parsing some calls.
  7 | 
  8 | v0.3.6: Adding a "Chunk pull" and "chunk analyzer"
  9 | v0.3.9: Removed Chunk pull
 10 | """
 11 | 
 12 | 
 13 | import os, struct, re
 14 | import radioDict
 15 | 
 16 | #filename = "/home/solidmixer/projects/mgs1-undub/RADIO-usa.DAT"
 17 | filename = "RADIO-usa.DAT"
 18 | #filename = "RADIO-jpn.DAT"
 19 | 
 20 | # We'll do a better check for this later. 
 21 | if filename.__contains__('jpn'):
 22 |     jpn = True
 23 | else:
 24 |     jpn = False
 25 | 
 26 | offset = 0
 27 | # offset = 293536 # Freq 140.85 Hex 0x47AA0
 28 | # Offset = 1773852 # Deepthroat 140.48 Hex 0x1B111C
 29 | 
 30 | radioFile = open(filename, 'rb')
 31 | output = open("output.txt", 'w')
 32 | 
 33 | offset = 0
 34 | radioData = radioFile.read() # The byte stream is better to use than the file on disk if you can. 
 35 | fileSize = radioData.__len__()
 36 | 
 37 | # print(fileSize) # Result is 1776859! 
 38 | 
 39 | # A lot of this is work in progress or guessing
 40 | commandNamesEng = {b'\x01':'SUBTITLE', b'\x02':'VOX_CUES', b'\x03':'ANI_FACE', b'\x04':'ADD_FREQ',
 41 |                 b'\x05':'MEM_SAVE', b'\x06':'AUD_CUES', b'\x07':'ASK_USER', b'\x08':'SAVEGAME',
 42 |                 b'\x10':'IF_CHECK', b'\x11':'ELSE', b'\x12':'ELSE_IFS', b'\x30':'SWITCH',
 43 |                 b'\x31':'SWITCHOP', b'\x80':'GCL_SCPT', b'\xFF':'CMD_HEDR', b'\x00':'NULL' 
 44 | }
 45 | 
 46 | def commandToEnglish(hex):
 47 |     try: 
 48 |         commandNamesEng[hex]
 49 |         return commandNamesEng[hex]
 50 |     except:
 51 |         return "BYTE WAS NOT DEFINED!!!!" 
 52 | 
 53 | def checkFreq(offsetCheck): # Checks if the next two bytes are a codec number or not. Returns True or False.
 54 |     global radioData
 55 |     freq = struct.unpack('>h', radioData[ offset : offset + 2])[0] # INT from two bytes
 56 | 
 57 |     if 14000 < freq < 14200:
 58 |         return True
 59 |     else: 
 60 |         return False
 61 | 
 62 | def getFreq(offsetCheck): # If freq is at offset, return frequency as 140.15
 63 |     global radioFile
 64 | 
 65 |     radioFile.seek(offsetCheck)
 66 |     bytes = radioFile.read(2)
 67 | 
 68 |     freq = struct.unpack('>h', radioData[ offset : offset + 2])[0]
 69 |     return freq / 100
 70 | 
 71 | def getCallLength(offset): # Returns the length of the call, offset must be at the freq bytes
 72 |     global radioFile
 73 |     radioFile.seek(offset + 9) # Call length is after 8 bytes, then 0x80, then the length of the script in 2x bytes, then FF
 74 | 
 75 |     lengthBytes = radioFile.read(2)
 76 |     lengthT = struct.unpack('>h', lengthBytes)[0]
 77 |     return lengthT
 78 | 
 79 | def getLength(offsetCheck): # Returns the length of the command, offset must be at the freq bytes
 80 |     global radioData
 81 |     
 82 |     lengthBytes = radioData[offsetCheck + 1: offsetCheck + 3]
 83 |     lengthT = struct.unpack('>H', lengthBytes)[0]
 84 |     return lengthT
 85 | 
 86 | def getByteAtOffset(offsetCheck): # Returns a single byte, probably redundant
 87 |     global radioData
 88 |     return radioData[offsetCheck]
 89 | 
 90 | def handleCallHeader(offsetCheck): # Assume call is just an 8 byte header for now
 91 |     global radioData
 92 |     global output
 93 |     header = radioData[offset: offset + 12 ]
 94 | 
 95 |     # Separate the header
 96 |     Freq = header[0:2]
 97 |     unk0 = header[2:4]
 98 |     unk1 = header[4:6]
 99 |     unk2 = header[6:8]
100 |     callLength = header[9:11]
101 |     numBytes = 0
102 | 
103 |     if header[8].to_bytes() == b'\x80':
104 |         numBytes = struct.unpack('>h', callLength)[0]
105 |     else:
106 |         output.write(f'ERROR AT byte {offset}! Call length is reading as {numBytes} \n')
107 | 
108 |     # Quick check we ended with an FF
109 |     if header[11].to_bytes() == b'\xff': 
110 |         output.write('Call intro ended with FF successfully\n')
111 |     else:
112 |         output.write(f'Call header DID NOT end in FF! Check hex at {offset + 11}')
113 | 
114 |     output.write(f'Call Header: {Freq}, {unk0}, {unk1}, {unk2}, Call is {numBytes} bytes long, hex {callLength}\n')
115 |     return
116 | 
117 | def handleCommand(offsetCheck): # We get through the file! But needs refinement... We're not ending evenly and lengths are too long. 
118 |     # global radioFile
119 |     global radioData
120 |     global output
121 |     commandByte = radioData[offsetCheck].to_bytes()
122 |     
123 |     match commandByte:
124 |         case b'\x00': # AKA A null
125 |             output.write('NULL in Command check!\n')
126 |             return offsetCheck + 1
127 |         case b'\x01':
128 |             output.write('Dialogue! -- ')
129 |             length = getLength(offsetCheck)
130 |             while radioData[offsetCheck + length + 1].to_bytes() != b'\xff':
131 |                 print(f'We have a long one at offset {offsetCheck}! Length is not FF, adding 1...\n')
132 |                 length += 1
133 |             line = radioData[offsetCheck: offsetCheck + length + 3]
134 |             unk1 = line[3:5]
135 |             unk2 = line[5:7]
136 |             unk3 = line[7:9]
137 |             dialogue = line[9: length + 1]
138 |             # output.write(f'Last byte in line is {line[length + 1].to_bytes()}\n') ## Should always end in FF!
139 |             
140 |             if b'\x80\x23\x80\x4e' in dialogue:  # this replaces the in-game hex for new line with a \\r\\n
141 |                 dialogue = dialogue.replace(b'\x80\x23\x80\x4e', b'\x5c\x72\x5c\x6e')
142 |                 output.write('Dialogue new line replaced! \n')
143 | 
144 |             if jpn:
145 |                 dialogue = translateJapaneseHex(dialogue)
146 |                 writeToFile = f'Length (int) = {length}, UNK1 = {unk1.hex()}, UNK2 = {unk2.hex()}, UNK3 = {unk3.hex()}, Text: {str(dialogue.hex())}\n'
147 |             else:
148 |                 writeToFile = f'Length (int) = {length}, UNK1 = {unk1.hex()}, UNK2 = {unk2.hex()}, UNK3 = {unk3.hex()}, Text: {str(dialogue)}\n'
149 |             # Write to file
150 |             output.write(writeToFile)
151 |             return offsetCheck + length + 2
152 |         case _:
153 |             output.write('Command is not cased! -- ')
154 |             start = offset 
155 |             while radioData[offsetCheck].to_bytes() != b'\xFF':
156 |                 offsetCheck += 1
157 |             line = radioData[start : offsetCheck + 1]
158 |             writeToFile = str(commandByte) + ": Offset: " + str(offsetCheck) + " // Content: " + str(line.hex()) + "\n\n"
159 |             output.write(writeToFile)
160 |             return offsetCheck + 1 
161 | 
162 | def translateJapaneseHex(bytestring):
163 |     i = 0
164 |     messageString = ''
165 | 
166 |     while i < len(bytestring) - 1:
167 |         messageString += radioDict.getRadioChar(bytestring[i:i+2].hex())
168 |         i += 2
169 |     return messageString
170 | 
171 | if __name__ == '__main__':
172 |     while offset < fileSize:
173 |         offsetHex = hex(offset)
174 |         perc = offset / fileSize * 100
175 |         print(f'We are at {perc}% through the file')
176 |         if offset >= fileSize - 1:
177 |             print("Reached end of file!!!\n END PROGRAM")
178 |             break
179 |         if checkFreq(offset):
180 |             freq = getFreq(offset)
181 |             output.write(f"Call found! Frequency is {freq}\n")
182 |             callLength = getCallLength(offset)
183 |             output.write(f'Call is {callLength} bytes long')
184 |             handleCallHeader(offset)
185 |             offset += 12
186 |             start = offset
187 |         else:
188 |             offset = handleCommand(offset)
189 |     # Close output file
190 | output.close()


--------------------------------------------------------------------------------
/Old vers/demoTextInjector_old.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Adapted from Green Goblins scripts. 
  3 | This is really heavily based on his awesome work. 
  4 | 
  5 | Script for working with Metal Gear Solid data
  6 | 
  7 | Copyright (C) 2023 Green_goblin (https://mgsvm.blogspot.com/)
  8 | 
  9 | Permission to use, copy, modify, and/or distribute this software for any
 10 | purpose with or without fee is hereby granted, provided that the above
 11 | copyright notice and this permission notice appear in all copies.
 12 | 
 13 | """
 14 | 
 15 | import os, sys
 16 | sys.path.append(os.path.abspath('./myScripts'))
 17 | import re
 18 | import glob
 19 | import struct
 20 | import progressbar
 21 | import translation.radioDict as RD
 22 | import json
 23 | 
 24 | import DemoTools.demoTextExtractor as DTE
 25 | 
 26 | version = "usa"
 27 | # version = "jpn"
 28 | 
 29 | # Toggles
 30 | debug = True
 31 | 
 32 | 
 33 | # Directory configs
 34 | inputDir = f'demoWorkingDir/{version}/bins'
 35 | outputDir = f'demoWorkingDir/{version}/newBins'
 36 | injectJson = f'demoWorkingDir/{version}/demoText-{version}.json'
 37 | os.makedirs(outputDir, exist_ok=True)
 38 | 
 39 | bin_files = glob.glob(os.path.join(inputDir, '*.bin'))
 40 | bin_files.sort(key=lambda f: int(f.split('-')[1].split('.')[0]))
 41 | 
 42 | injectTexts = json.load(open(injectJson, 'r'))
 43 | 
 44 | skipFilesListD1 = [
 45 |     'demo-05',
 46 |     'demo-06',
 47 |     'demo-31',
 48 |     'demo-33',
 49 |     'demo-35',
 50 |     'demo-63',
 51 |     'demo-67',
 52 |     'demo-71',
 53 |     'demo-72',
 54 | ]
 55 | 
 56 | def injectSubtitles(originalBinary: bytes, newTexts: dict, startingNum: int = 1, timings: dict = None) -> tuple [bytes, int]:
 57 |     """
 58 |     Injects the new text to the original data, returns the bytes. 
 59 |     Also returns the index we were at when we finished. 
 60 |     """ 
 61 | 
 62 |     def encodeNewText(text: str):
 63 |         """
 64 |         Simple. Encodes the text as bytes. 
 65 |         Adds the buffer we need to be divisible by 4...
 66 |         Return the new bytes.
 67 |         """
 68 |         newBytes: bytes = RD.encodeJapaneseHex(text)[0]
 69 |         bufferNeeded = 4 - (len(newBytes) % 4)
 70 |         for j in range(bufferNeeded):
 71 |             newBytes += b'\x00'
 72 |             j += 1
 73 |         
 74 |         return newBytes
 75 |     
 76 |     newBytes = b""
 77 |     firstLengthBytes = originalBinary[18:20]
 78 |     firstLength = struct.unpack('<H', firstLengthBytes)[0]
 79 |     offset = 8 + firstLength # This is our starting point for the dialogue.
 80 | 
 81 |     newBytes += originalBinary[0: offset]
 82 | 
 83 |     i = startingNum
 84 |     while i <= len(newTexts):
 85 |         start, duration = timings.get(f"{i}").split(",")
 86 |         start = int(start)
 87 |         duration = int(duration)
 88 |         if originalBinary[offset] == 0x00:
 89 |             # Find the length here (This is stupid!)
 90 |             origTextData = originalBinary[offset: offset + originalBinary.find(b'\x00', offset + 16)] # We can add the buffer later
 91 |             bufferNeeded = 4 - (len(origTextData) % 4)
 92 |             origTextLength = len(origTextData) + bufferNeeded
 93 |             origTextData = originalBinary[offset: offset + origTextLength]
 94 | 
 95 |             # Now create the new one.
 96 |             newText = encodeNewText(newTexts[str(i)])
 97 |             newBytes = newBytes + origTextData[0:4] + struct.pack("<I", start) + struct.pack("<I", duration) + origTextData[12:16] + newText
 98 |             i += 1
 99 |             offset += origTextLength
100 |             break
101 |         else:
102 |             origLength = originalBinary[offset]
103 |             origTextData = originalBinary[offset: offset + origLength]
104 |             origTextLength = len(origTextData)
105 |             # New Text
106 |             newText = encodeNewText(newTexts[str(i)])
107 |             newLength = len(newText) + 16
108 |             newBytes += newLength.to_bytes() + origTextData[1:4] + struct.pack("<I", start) + struct.pack("<I", duration) + origTextData[12:16] + newText
109 |         
110 |             i += 1
111 |             offset += origTextLength
112 | 
113 |     return newBytes, i  
114 | 
115 | if debug:
116 |     print(f'Only injecting Demo 25!')
117 |     # bin_files = ['demoWorkingDir/usa/bins/demo-25.bin']
118 | 
119 | for file in bin_files:
120 |     print(os.path.basename(file))
121 |     filename = os.path.basename(file)
122 |     basename = filename.split(".")[0]
123 | 
124 |     if debug:
125 |         print(f'Processing {basename}')
126 | 
127 |     if basename in skipFilesListD1:
128 |         if debug:
129 |             print(f'{basename} in skip list. Continuing...')
130 |         continue
131 | 
132 |     # if injectTexts[basename] is None:
133 |     if basename not in injectTexts:
134 |         print(f'{basename} was not in the json. Skipping...')
135 |         continue
136 |     
137 |     # Initialize the demo data and the dictionary we're using to replace it.
138 |     origDemoData = open(file, 'rb').read()
139 |     demoDict: dict = injectTexts[basename][0]
140 |     timings: dict = injectTexts[basename][1]
141 | 
142 |     offsets = DTE.getTextAreaOffsets(origDemoData)
143 |     nextStart = 1
144 |     newDemoData = origDemoData[0 : offsets[0]]
145 | 
146 |     for Num in range(len(offsets)):
147 |         subset = DTE.getTextAreaBytes(offsets[Num], origDemoData)
148 |         newData, nextStart = injectSubtitles(subset, demoDict, nextStart, timings)
149 |         newDemoData += newData 
150 |         if Num < len(offsets) - 1:
151 |             newDemoData += origDemoData[len(newDemoData): offsets[Num + 1]]
152 |         else:
153 |             newDemoData += origDemoData[len(newDemoData): ]
154 |         print(newData.hex())
155 | 
156 |     newFile = open(f'{outputDir}/{basename}.bin', 'wb')
157 |     newFile.write(newDemoData)
158 |     newFile.close()
159 |     # print(demoDict)
160 | 
161 | 
162 | 
163 | 
164 | """
165 | # not really needed just for reference.
166 | for key in injectTexts:
167 |     print(key)
168 |     demoDict: dict = injectTexts[key]
169 |     
170 | """


--------------------------------------------------------------------------------
/Old vers/main.py:
--------------------------------------------------------------------------------
  1 | #!/bin/python
  2 | import argparse, os
  3 | import RadioDatTools as RDT
  4 | 
  5 | # Globals
  6 | debugOutput = True
  7 | jpn = False
  8 | indentToggle = True
  9 | 
 10 | # File variables
 11 | fileSize = 0
 12 | 
 13 | def analyzeRadioFile() -> None:
 14 |     offset = 0
 15 |     nullCount = 0
 16 |     
 17 |     while offset < fileSize - 1: # We might need to change this to Case When... as well.
 18 |         # Offset Tracking
 19 |         if debugOutput:
 20 |             print(f'Main loop: offset is {offset}')
 21 | 
 22 |         if nullCount == 4:
 23 |             RDT.output.write(f'ALERT!!! We just had 4x Nulls in a row at offset {offset}\n')
 24 |             nullCount = 0
 25 | 
 26 |         # MAIN LOGIC
 27 |         if radioData[offset].to_bytes() == b'\x00': # Add logic to tally the nulls for reading ease
 28 |             RDT.indentLines()
 29 |             if radioData[offset + 1].to_bytes() == b'\x31': # For some reason switch statements don't have an FF
 30 |                 length = RDT.handleCommand(offset)
 31 |             else:
 32 |                 RDT.output.write(f"Null! (Main loop) offset = {offset}\n")
 33 |                 nullCount += 1
 34 |                 if layerNum > 0:
 35 |                     layerNum -= 1
 36 |                 length = 1
 37 |         elif radioData[offset].to_bytes() == b'\xFF': # Commands start with FF
 38 |             nullCount = 0
 39 |             length = RDT.handleCommand(offset)
 40 |         elif RDT.checkFreq(offset): # If we're at the start of a call
 41 |             nullCount = 0
 42 |             RDT.handleCallHeader(offset)
 43 |             length = 11 # In this context, we only want the header
 44 |             layerNum = 1
 45 |         else: # Something went wrong, we need to kinda reset
 46 |             length = RDT.handleUnknown(offset) # This will go until we find a call frequency
 47 |         offset += length
 48 | 
 49 |     RDT.output.close()
 50 | 
 51 | def extractRadioCallHeaders(filename: str) -> None:
 52 |     offset = 0
 53 |     global jpn
 54 |     global indentToggle
 55 |     global debugOutput
 56 |     global fileSize
 57 |     
 58 |     # Handle inputting radio file:
 59 |     global radioFile
 60 |     global radioData
 61 |     """
 62 |     radioFile = open(filename, 'rb')
 63 |     radioData = radioFile.read()
 64 |     fileSize = len(radioData)
 65 |     """
 66 |     RDT.setOutputFile(filename)
 67 | 
 68 |     while offset < fileSize - 1: # We might need to change this to Case When... as well.
 69 |         # Offset Tracking
 70 |         if debugOutput:
 71 |             print(f'offset is {offset}')
 72 | 
 73 |         # MAIN LOGIC
 74 |         if radioData[offset].to_bytes() == b'\x00': # Add logic to tally the nulls for reading ease
 75 |             length = 1
 76 |         elif RDT.checkFreq(offset):
 77 |             length = RDT.handleCallHeader(offset) 
 78 |         else:
 79 |             length = 1
 80 |         offset += length 
 81 |         if offset == fileSize:
 82 |             print(f'File was parsed successfully! Written to {filename}')
 83 |             break
 84 |     
 85 |     RDT.output.close()
 86 | 
 87 | def main():
 88 |     # Parser logic
 89 |     parser = argparse.ArgumentParser(description=f'Parse a binary file for Codec call GCL. Ex. script.py <filename> <output.txt>')
 90 | 
 91 |     parser.add_argument('filename', required=False, type=str, help="The call file to parse. Can be RADIO.DAT or a portion of it.")
 92 |     parser.add_argument('-o', '--output', type=str, required=False, help="(Optional) Provides an output file (.txt)")
 93 |     
 94 |     parser.add_argument('-v', '--verbose', action='store_true', help="Write any errors to stdout for help parsing the file")
 95 |     parser.add_argument('-j', '--japanese', action='store_true', help="Toggles translation for Japanese text strings")
 96 |     parser.add_argument('-i', '--indent', action='store_true', help="Indents container blocks, WORK IN PROGRESS!")
 97 |     args = parser.parse_args()
 98 | 
 99 |     if not args.filename:
100 |         args.filename = os.read(f'Please provide filename: ')
101 |     if args.verbose:
102 |         debugOutput = True
103 |     
104 |     if args.japanese:
105 |         jpn = True
106 |     
107 |     if args.indent:
108 |         indentToggle = True
109 |     
110 |     if args.output:
111 |         output = open(args.output, 'w')
112 |         outputFilename = args.output
113 |     
114 |     # Handle inputting radio file:
115 |     global radioFile
116 |     global radioData
117 |     global fileSize
118 | 
119 |     radioFile = open(args.filename, 'rb')
120 |     #radioFile = open(filename, 'rb')
121 |     radioData = radioFile.read()
122 |     fileSize = len(radioData)
123 | 
124 |     extractRadioCallHeaders('headers.txt')
125 |     analyzeRadioFile()
126 | 
127 |     
128 | 
129 | 
130 | 
131 | # This doesn't work because i did not code with contextual variables in mind >:O
132 | if __name__ == '__main__':
133 |     # We should get args from user. Using argParse
134 |     # main()
135 | 
136 |         # Parser logic
137 |     parser = argparse.ArgumentParser(description=f'Parse a binary file for Codec call GCL. Ex. script.py <filename> <output.txt>')
138 | 
139 |     parser.add_argument('filename', type=str, help="The call file to parse. Can be RADIO.DAT or a portion of it.")
140 |     parser.add_argument('-o', '--output', type=str, required=False, help="(Optional) Provides an output file (.txt)")
141 |     
142 |     parser.add_argument('-v', '--verbose', action='store_true', help="Write any errors to stdout for help parsing the file")
143 |     parser.add_argument('-j', '--japanese', action='store_true', help="Toggles translation for Japanese text strings")
144 |     parser.add_argument('-i', '--indent', action='store_true', help="Indents container blocks, WORK IN PROGRESS!")
145 |     args = parser.parse_args()
146 | 
147 |     if not args.filename:
148 |         args.filename = os.read(f'Please provide filename: ')
149 |     if args.verbose:
150 |         debugOutput = True
151 |     
152 |     if args.japanese:
153 |         jpn = True
154 |     
155 |     if args.indent:
156 |         indentToggle = True
157 |     
158 |     if args.output:
159 |         output = open(args.output, 'w')
160 |         outputFilename = args.output
161 |     
162 |     # Handle inputting radio file:
163 |     global radioFile
164 |     global radioData
165 |     fileSize
166 | 
167 |     radioFile = open(args.filename, 'rb')
168 |     #radioFile = open(filename, 'rb')
169 |     radioData = radioFile.read()
170 |     fileSize = len(radioData)
171 | 
172 |     extractRadioCallHeaders('headers.txt')
173 |     analyzeRadioFile()


--------------------------------------------------------------------------------
/Old vers/stageCalls-before-0A.ods:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drsparklegasm/mgs1-scripts/fba25e409c5ad49938ba0b6d60e1e48a9c37fc7a/Old vers/stageCalls-before-0A.ods


--------------------------------------------------------------------------------
/Old vers/vagToWav.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Basically had LLM generate this from ColdSauce's VAG to WAV converter
  3 | https://github.com/ColdSauce/psxsdk/blob/master/tools/vag2wav.c
  4 | """
  5 | import struct
  6 | import wave
  7 | 
  8 | # Constants for VAG and WAV files
  9 | SAMPLE_RATE = 22050  # Sample rate for PlayStation ADPCM (VAG)
 10 | CHANNELS = 1         # Mono audio
 11 | BITS_PER_SAMPLE = 16 # WAV files typically use 16-bit PCM
 12 | BLOCK_SIZE = 24      # Each block contains 16 samples and 4 coefficients
 13 | 
 14 | # Coefficients for the ADPCM decoding
 15 | ADPCM_COEFFICIENTS = [
 16 |     (0, 0),
 17 |     (60, 0),
 18 |     (115, -52),
 19 |     (98, -55),
 20 |     (122, -60)
 21 | ]
 22 | 
 23 | def adpcm_decode(vag_data):
 24 |     pcm_data = []
 25 |     s_1, s_2 = 0.0, 0.0
 26 |     
 27 |     for i in range(0, len(vag_data), BLOCK_SIZE):
 28 |         block = vag_data[i:i + BLOCK_SIZE]
 29 |         
 30 |         # First byte contains the shift factor and predictor number
 31 |         predictor_number = (block[0] >> 4) & 0x0F
 32 |         shift_factor = block[0] & 0x0F
 33 |         
 34 |         # Coefficients for the ADPCM decoding
 35 |         f1, f2 = ADPCM_COEFFICIENTS[predictor_number]
 36 |         
 37 |         # Decode each sample in the block
 38 |         for j in range(4, BLOCK_SIZE):
 39 |             nibble = (block[j // 2] >> ((j % 2) * 4)) & 0x0F
 40 |             
 41 |             # Sign-extend the 4-bit nibble to a 16-bit signed integer
 42 |             if nibble >= 8:
 43 |                 nibble -= 16
 44 |             
 45 |             # Calculate the predicted sample value
 46 |             predicted_sample = int((f1 * s_1 + f2 * s_2) / 64)
 47 |             
 48 |             # Add the decoded difference to the predicted sample
 49 |             differential_sample = (nibble << shift_factor) + predicted_sample
 50 |             
 51 |             # Clamp the sample to 16-bit signed integer range
 52 |             if differential_sample > 32767:
 53 |                 differential_sample = 32767
 54 |             elif differential_sample < -32768:
 55 |                 differential_sample = -32768
 56 |             
 57 |             # Add the sample to the PCM data list
 58 |             pcm_data.append(differential_sample)
 59 |             
 60 |             # Update the previous two samples for the next iteration
 61 |             s_2, s_1 = s_1, differential_sample
 62 |     
 63 |     return pcm_data
 64 | 
 65 | def vag_to_wav(vag_file_path, wav_file_path):
 66 |     with open(vag_file_path, "rb") as vag_file:
 67 |         # Read the VAG header
 68 |         vag_name = vag_file.read(4)
 69 |         
 70 |         if vag_name != b'VAGp':
 71 |             print(f"{vag_file_path} is not in VAG format. Aborting.")
 72 |             return -1
 73 |         
 74 |         # Skip the version and other metadata (for simplicity, we assume known structure)
 75 |         vag_file.seek(12)
 76 |         data_size = struct.unpack(">I", vag_file.read(4))[0]
 77 |         
 78 |         print(f"Data Size: {data_size} bytes")
 79 |         
 80 |         # Read the VAG audio data
 81 |         vag_data = vag_file.read(data_size - 16)  # Skip header and metadata
 82 |         
 83 |         # Decode ADPCM to PCM
 84 |         pcm_data = adpcm_decode(vag_data)
 85 |         
 86 |         # Write the WAV file
 87 |         with wave.open(wav_file_path, "wb") as wav_file:
 88 |             wav_file.setnchannels(CHANNELS)
 89 |             wav_file.setsampwidth(BITS_PER_SAMPLE // 8)
 90 |             wav_file.setframerate(SAMPLE_RATE)
 91 |             
 92 |             # Convert PCM data to bytes and write to the WAV file
 93 |             pcm_bytes = struct.pack(f'>{len(pcm_data)}h', *pcm_data)
 94 |             wav_file.writeframes(pcm_bytes)
 95 |     
 96 |     print(f"Converted {vag_file_path} to {wav_file_path}")
 97 |     return 0
 98 | 
 99 | # Example usage
100 | if __name__ == "__main__":
101 |     vag_file_path = "workingFiles/vag-examples/00042.vag"
102 |     wav_file_path = "workingFiles/vag-examples/00042.wav"
103 |     vag_to_wav(vag_file_path, wav_file_path)
104 | 


--------------------------------------------------------------------------------
/Old vers/vagToWav2.py:
--------------------------------------------------------------------------------
 1 | import struct
 2 | 
 3 | def vag2wav(vag_path, wav_path):
 4 |     # Open the VAG file
 5 |     with open(vag_path, "rb") as vag:
 6 |         # Read the header
 7 |         header = vag.read(48)
 8 |         if len(header) < 48:
 9 |             raise ValueError("Invalid VAG file: Header too short.")
10 |         
11 |         # Check the magic number (VAGp)
12 |         magic_number = header[:4]
13 |         if magic_number != b'VAGp':
14 |             raise ValueError(f"Invalid VAG file: Magic number {magic_number} does not match 'VAGp'.")
15 |         
16 |         # Extract data_size
17 |         data_size = struct.unpack(">I", header[12:16])[0]
18 |         
19 |         # Prepare the WAV header
20 |         wav_header = (
21 |             b'RIFF' + struct.pack("<I", 36 + data_size * 4) +  # ChunkSize
22 |             b'WAVEfmt ' + struct.pack("<I", 16) +             # Subchunk1Size (PCM)
23 |             struct.pack("<H", 1) +                            # AudioFormat (PCM = 1)
24 |             struct.pack("<H", 1) +                            # NumChannels (mono)
25 |             struct.pack("<I", 22050) +                         # SampleRate (default for VAG is 22050 Hz)
26 |             struct.pack("<I", 44100) +                         # ByteRate (SampleRate * NumChannels * BitsPerSample / 8)
27 |             struct.pack("<H", 2) +                            # BlockAlign (NumChannels * BitsPerSample / 8)
28 |             struct.pack("<H", 16) +                           # BitsPerSample
29 |             b'data' + struct.pack("<I", data_size * 4)         # Subchunk2Size
30 |         )
31 |         
32 |         # Open the WAV file for writing
33 |         with open(wav_path, "wb") as pcm:
34 |             pcm.write(wav_header)
35 |             
36 |             # Predictors and shift factors
37 |             f = [
38 |                 [0.0, 0.0],
39 |                 [60.0 / 64.0, 0.0],
40 |                 [115.0 / 64.0, -52.0 / 64.0],
41 |                 [98.0 / 64.0, -55.0 / 64.0],
42 |                 [122.0 / 64.0, -60.0 / 64.0]
43 |             ]
44 |             
45 |             s_1 = 0.0
46 |             s_2 = 0.0
47 |             
48 |             samples = [0] * 28
49 |             
50 |             # Process each block
51 |             while vag.tell() < (data_size + 48):
52 |                 predict_nr = struct.unpack("B", vag.read(1))[0]
53 |                 shift_factor = predict_nr & 0xf
54 |                 predict_nr >>= 4
55 |                 flags = struct.unpack("B", vag.read(1))[0]  # flags
56 |                 
57 |                 if flags == 7:
58 |                     break
59 |                 
60 |                 for i in range(0, 28, 2):
61 |                     d = struct.unpack("B", vag.read(1))[0]
62 |                     s = (d & 0xf) << 12
63 |                     if s & 0x8000:
64 |                         s |= 0xffff0000
65 |                     samples[i] = int(s >> shift_factor)
66 |                     
67 |                     s = (d & 0xf0) << 8
68 |                     if s & 0x80000:
69 |                         s |= 0xffff0000
70 |                     samples[i + 1] = int(s >> shift_factor)
71 |                 
72 |                 for i in range(28):
73 |                     samples[i] += s_1 * f[predict_nr][0] + s_2 * f[predict_nr][1]
74 |                     s_2 = s_1
75 |                     s_1 = samples[i]
76 |                     
77 |                     # Clamp to 16-bit signed integer range
78 |                     sample_value = max(-32768, min(32767, int(samples[i] + 0.5)))
79 |                     pcm.write(struct.pack("<h", sample_value))
80 | 
81 | if __name__ == "__main__":
82 |     import sys
83 |     if len(sys.argv) != 3:
84 |         print("Usage: python vag2wav.py <input.vag> <output.wav>")
85 |         sys.exit(1)
86 |     
87 |     vag_path = sys.argv[1]
88 |     wav_path = sys.argv[2]
89 |     vag2wav(vag_path, wav_path)
90 | 


--------------------------------------------------------------------------------
/Old vers/xmltest.py:
--------------------------------------------------------------------------------
 1 | import os, xml.etree.ElementTree as ET
 2 | 
 3 | root = ET.Element("library")
 4 | 
 5 | books = [
 6 |     {"title": "The Catcher in the Rye", "author": "J.D. Salinger", "edition": "First", "publication_year": "1951"},
 7 |     {"title": "1984", "author": "George Orwell", "edition": "Second", "publication_year": "1949"},
 8 |     {"title": "To Kill a Mockingbird", "author": "Harper Lee", "edition": "Third", "publication_year": "1960"}
 9 | ]
10 | 
11 | for book in books:
12 |     book_element = ET.SubElement(root, "book", {
13 |         "edition": book["edition"],
14 |         "publication_year": book["publication_year"]
15 |     })
16 |     title_element = ET.SubElement(book_element, "title")
17 |     title_element.text = book['title']
18 |     author_element = ET.SubElement(book_element, "author")
19 |     author_element.text = book['author']
20 | 
21 | # Convert the tree to a string
22 | tree = ET.ElementTree(root)
23 | tree.write("library.xml")
24 | 
25 | # Pretty print XML for viewing
26 | from xml.dom.minidom import parseString
27 | xmlstr = parseString(ET.tostring(root)).toprettyxml(indent="   ")
28 | print(xmlstr)
29 | 
30 | ##### 
31 | 
32 | xml_data = """
33 | <library>
34 |     <book title="The Catcher in the Rye" author="J.D. Salinger" edition="First" publication_year="1951"/>
35 |     <book title="1984" author="George Orwell" edition="First" publication_year="1949"/>
36 |     <book title="To Kill a Mockingbird" author="Harper Lee" edition="Third" publication_year="1960"/>
37 | </library>
38 | """
39 | 
40 | # Parse the XML data
41 | root = ET.fromstring(xml_data)
42 | 
43 | # Find the book with the title "1984"
44 | book = root.find(".//book[@title='1984']")
45 | 
46 | if book is not None:
47 |     # Change the edition attribute
48 |     book.set("edition", "Revised Edition")
49 | 
50 |     # If you want to add new attributes or modify child elements, do so here
51 |     # For example, changing the author:
52 |     # book.set("author", "New Author Name")
53 | 
54 |     # Print out the modified XML
55 |     xmlstr = ET.tostring(root, encoding='unicode')
56 |     print(xmlstr)
57 | else:
58 |     print("Book not found")
59 | 
60 | 
61 | import xml.etree.ElementTree as ET
62 | 
63 | # Create the root element
64 | root = ET.Element("library")
65 | 
66 | # Add multiple books using SubElement
67 | ET.SubElement(root, "book", {"title": "Book One", "author": "Author A"})
68 | ET.SubElement(root, "book", {"title": "Book Two", "author": "Author B"})
69 | 
70 | # Print the resulting XML
71 | tree = ET.ElementTree(root)
72 | xmlstr = ET.tostring(root, encoding='unicode')
73 | print(xmlstr)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # mgs1-scripts
  2 | Reverse engineering scripts for MGS1. 
  3 | So far, mostly scoped on RADIO.DAT extraction. 
  4 | 
  5 | # Project Goals
  6 | 
  7 | I started this to finally have an un-dubbed version of Metal Gear Solid to play with. Hopefully once we can inject english subtitles into the Japanese version, we'll be able to experience the original VA performance and see the subtleties between versions released in the US and JPN. 
  8 | 
  9 | You can find additional details on my blog at ![mgs-undubbed.io](https://blog.mgs-undubbed.io)
 10 | 
 11 | # Next steps:
 12 | 
 13 | Most of the tooling is done. There are some finishing touches for demo and zmovie still before the code is complete. Then it's onto translation. 
 14 | 
 15 | # Usage
 16 | 
 17 | Most of the scripts will have command-line based arguments. I do have some directories still hard coded. 
 18 | 
 19 | ## Directories and Flow
 20 | 
 21 | Each of the files have several scripts to help with editing. I do want to explain how my scripts are used, though. Most everything has a command line usage with arguments. There are some instances where things still need to be hard coded. For that I've tried to keep all of the variables in the top section so they can be easily changed. 
 22 | 
 23 | The top level of my project folder looks like this:
 24 | 
 25 | ```
 26 | [Project Folder]
 27 | ├── build
 28 | │   ├── jpn-d1
 29 | │   │   ├── DUMMY3M.DAT
 30 | │   │   ├── MGS
 31 | │   │   ├── SYSTEM.CNF
 32 | │   │   ├── license_data.dat
 33 | │   │   └── rebuild.xml
 34 | │   ├── jpn-d2 ...
 35 | │   ├── usa-d1
 36 | │   └── usa-d2
 37 | ├── build-proprietary
 38 | │   ├── README.md
 39 | │   ├── US Version Text.txt
 40 | │   ├── demo
 41 | │   │   ├── demoText-jpn-modified.json
 42 | │   │   └── demoText-jpn-undub.json
 43 | │   ├── itemDesc-inject.json
 44 | │   ├── itemDesc-jpn.json
 45 | │   ├── jsonCompare.py
 46 | │   ├── radio
 47 | │   │   ├── codecWindowPreview.py
 48 | │   │   ├── dialogueLineReplace.json
 49 | │   │   ├── dialogueSwap.py
 50 | │   │   ├── output.json
 51 | │   │   └── storyCalls.json
 52 | │   ├── vox
 53 | │   │   ├── vox-jpn-d1.json
 54 | │   │   └── voxText-jpn-d1.json
 55 | │   └── zmovie
 56 | ├── build-src
 57 | │   ├── jpn-d1
 58 | │   │   ├── DUMMY3M.DAT
 59 | │   │   ├── MGS
 60 | │   │   ├── SYSTEM.CNF
 61 | │   │   ├── license_data.dat
 62 | │   │   └── rebuild.xml
 63 | │   ├── jpn-d2 ...
 64 | │   ├── usa-d1 ...
 65 | │   └── usa-d2 ...
 66 | ├── myScripts
 67 | │   ├── DemoTools
 68 | │   ├── ... etc
 69 | ├── workingFiles
 70 | ```
 71 | 
 72 | For each of the top level directories here's my description:
 73 | - **build-src** : All original iso files and the rebuild.xml generated by dumpsxiso are written here. 
 74 | - **build** : These are the modified files that are rebuilt using mkpsxiso for testing
 75 | - **build-proprietary** : These files contain my script files that are injected. 
 76 | - **myScripts** : (this repository of code)
 77 | - **workingFiles** : I should have everything extracted by script going here. This was a recent change, not everything is there. 
 78 | 
 79 | The reasons I had for splitting them this way was:
 80 | 1. Common file paths that could have [*version*] or [*disk*] swapped out easily.
 81 | 2. This keeps an unmodified version of the disk's original contents so that i can replace it when i want to revert changes.
 82 | 3. the build-proprietary folder now holds my working files so that they are not included. The scripts themselves have no copyrighted material, but as the "source" files for my modifications contain the US version texts, I wanted to make a separate git repository for both collaboration and tracking changes to my mods. 
 83 |  
 84 | Hopefully this helps wth the script, once I revisit front-end and UX, I'll look at doing a master config file so that it's clear where things are being saved. For now, my goal for each script will be to function similarly to the radioTools.py script where an input/output file is specified in the command line usage.
 85 | 
 86 | ## Recompilation workflow
 87 | 
 88 | For a working example of how everything is compiled, check this script as it is what i use for running all of the scripts in tandem. 
 89 | myScripts/testing/runJpnBuildTest.sh
 90 | 
 91 | This script..
 92 | 1. Starts with the unmodified files
 93 | 2. Extracts them to the working directory
 94 | 3. Modifies them with my new dialogues from build-proprietary
 95 | 4. Compiles the new dat files and moves them to build/ (If something is excluded or skipped we replace it with the original)
 96 | 5. Also runs any fixes (for example, stage.dir offset adjustments)
 97 | 6. Compiles a new iso with mkpsxiso, and 
 98 | 7. opens the resulting iso file in Duckstation for testing. 
 99 | 
100 | I do want to note for testing, do not use savestates as that has led to crashes/instability for me. Use them within the same iso, don't save a state and then load it with a new build.
101 | 
102 | # The scripts 
103 | 
104 | ## Radio.dat
105 | 
106 | Quick overview:
107 | 
108 | 1. RadioDatTools.py -- Extract game text in xml and json format
109 | 2. xmlModifierTools.py -- Imports adjusted json dialogue into the XML file. Recomputes lengths of all calls as needed
110 | 3. RadioDatRecompiler.py -- Takes an XML Radio data and creates a .dat file. Can run the recompiler and also adjust stage.dir values (using -s and -S flags)
111 | 
112 | Most scripts have an arg parser, use -h for help.
113 | 
114 | ex: 
115 | 
116 | `RadioDatTools.py -h` for help.
117 | 
118 | ```
119 | usage: RadioDatTools.py [-h] [-v] [-i] [-s] [-H] [-g] [-x] [-z] filename [output]
120 | 
121 | Parse a binary file for Codec call GCL. Ex. script.py <filename> <output.txt>
122 | 
123 | positional arguments:
124 |   filename         The call file to parse. Can be RADIO.DAT or a portion of it.
125 |   output           Output Filename (.txt)
126 | 
127 | options:
128 |   -h, --help       show this help message and exit
129 |   -v, --verbose    Write any errors to stdout for help parsing the file
130 |   -i, --indent     Indents container blocks, WORK IN PROGRESS!
131 |   -s, --split      Split calls into individual bin files
132 |   -H, --headers    Extract call headers ONLY!
133 |   -g, --graphics   export graphics
134 |   -x, --xmloutput  Exports the call data into XML format
135 |   -z, --iseeeva    Exports the dialogue in a json like Iseeeva's script
136 |   ```
137 | 
138 | ## Demo.dat
139 | 
140 | Example usage:
141 | 
142 | 1. splitDemoFiles.py -- Splits all demo files to individual demos
143 | 2. demoTextExtractor.py -- Extracts texts from all demo files in the output folder
144 | 3. demoTextInjector.py -- Injects json text back into demo files, outputs the binaries as new files
145 | 4. demoRejoiner.py -- Joins all demo files into one large DAT file. 
146 | 
147 | ## Known issues:
148 | - RADIO.DAT: MGS Integral does nto recompile correctly. I think there is extra null space between call data (after graphics data) that will need to be accounted for. The data is correct, but there's also too much graphics data. 
149 | - RADIO.DAT: Recompiler works but will not correctly count/re-encode special characters. 
150 | - RADIO.DAT: Still missing/incorrect kanji characters that need to be OCR'd from their graphics files. ~30 yet to identify, numerous others are wrong. Reach out to me if you would like to help translate them!
151 | - RADIO.DAT: Have not tested all the offset adjustments to STAGE.DIR yet. Could be faulty. Works so far as I've tested.
152 | 
153 | This tool is now functional with some limitations:
154 | 1. Save blocks need some manual tweaks in the code to be 100% accurate on recompile, but it can be done. 
155 | 2. Length calculations should be correct. The script will warn you if a call exceeds the safe limit (length bytes are only 2, so max length in bytes of a call is 65535, if we exceed this the files may not work properly.)
156 | 
157 | # Using this script to replace the dialogue. 
158 | 
159 | To use this to make changes, run it in more or less this way... Here's an example workflow:
160 | 
161 | 1. Use RadioDatTools.py to export an `XML` and `json` file containing the full data. 
162 | ```python radioDatTools.py RADIO.DAT -zx```
163 | 2. Edit the XML data. If using the json, use the jsonTools.py to inject call dialogue into XML data. Optionally use json tools to merge dialogue with offsets from different versions. 
164 | ```python jsontools.py subtitles.json offsets.json```
165 | 3. Use xmlModifierTools.py to inject the json data to the XML. Differnt aspects can be commeted out, but should match the original if untouched.
166 | ```python xmlModifierTools.py inject RADIO-output-Iseeva.json RADIO-output.xml```
167 | 4. Once the XML is fully completed, it's time to recompile RADIO.DAT. Use the radioDatRecompiler to recompile any valid XML into a binary DAT file. use the -S to modify STAGE.DIR offset numbers. There will be expected errors, but at this time it might work. If STAGE.DIR is specified (-s) we use that as a template to fix offsets and output a new file (use -S to set output name)
168 | ```python RadioDatRecompiler.py -p RADIO-output.xml new-RADIO.DAT -s STAGE.DIR -S new-STAGE.DIR```
169 | 
170 | There are nuances there but that's basically the gist. either `RadioRecompiler -p` or `xmlModifierTools prepare` will calculate the lenght changes needed. For more info, use -h on any script.
171 | 
172 | [Note: Recompiling with the -x uses the original hex for dialogue and overrides any changes, but DOES NOT RECALCULATE LENGTHS! Use it to ensure recompilation is working, not for xml files where lengths were changed.]
173 | 
174 | # Scripts Overview
175 | 
176 | ## Main tools:
177 | 
178 | ### RadioDatTools.py
179 | 
180 | This extracts all call data, hopefully keeping other byte data intact in the file. The goal is to have all bytes there so it can be re-compiled into a new file. -h for help. This should be mostly complete now. Remaining work will be adjusting XML container data as needed for recompilation.
181 | 
182 | Can also split calls out for further analysis.
183 | 
184 | Usage:
185 | 
186 | ```
187 | $ RadioDatTools.py path/to/Radio.dat [outputfilename] [-h, -i, -d, ...]
188 | ```
189 | 
190 | ### RadioDatRecompiler.py 
191 | Recompiles a given XML document (exported from RadioDatTools) into a binary file. 
192 | 
193 | Eventually, it will inject the json data and recompute the lengths for all containers.
194 | 
195 | ### xmlModifierTools.py
196 | 
197 | Scripts to modify the XML, including recalculating lengths once dialogues have been changed. 
198 | 
199 | NOTE! It will not correctly account for any two-byte characters that were decoded!
200 | 
201 | ### jsonTools.py
202 | 
203 | Use this to zip together offsets from one json and subtitles from another json (useful for injecting an English subtitle in with japanese offsets)
204 | 
205 | ### StageDirTools
206 | 
207 | ### callsInStageDirFinder.py
208 | 
209 | Scripts for finding all call offsets in Stage.dir. Currently this is working. Can be run on its own for analysis tools. 
210 | 
211 | Logic is shamelessly reverse engineered from iseeeva's radio extractor:
212 | https://github.com/iseeeva/metal/tree/main
213 | 
214 | ## radioTools
215 | 
216 | ### callExtractor.py
217 | 
218 | Extracts a single call based on offsets (leaves in a bin format), to be merged into a better library
219 | 
220 | ### callInsertor.py
221 | 
222 | Inserts a call into an existing RADIO.DAT file. Useful if you want to modify only one call's worth of binary and inject it at the original offset. Good for testing recompiler logic.
223 | 
224 | ### splitRadioFile.py -- DEPRECATED
225 | 
226 | Previously split RADIO.DAT into individual calls. Use RadioDatTools with the -s option. 
227 | 
228 | ### characters.py
229 | 
230 | Contains dicts in use by the radioDict library. SOME CHARACTERS HAVE YET TO BE IDENTIFIED!
231 | 
232 | ### radioDict.py
233 | 
234 | The heart of the translation of japanese/special character hex. This has libraries for decoding the odd hex codes into japanese characters, but can also assist in outputting graphics found in the data. 
235 | 
236 | NOTE: Does not yet decode / re-encode all characters. 
237 | 
238 | 


--------------------------------------------------------------------------------
/StageDirTools/Notes.txt:
--------------------------------------------------------------------------------
 1 | 02348A
 2 | 
 3 | 
 4 | 01 00 91 00 
 5 | A7 96 73 62 8A 24 02 00 # Length of file: 0x02238A # SB / bin 
 6 | 00 00 6E 64 38 E6 00 00 # nd # length E638? Starts 0x00023800 This is the DAR file 
 7 | A7 96 63 6B 00 00 00 00 # CK 00
 8 | A7 96 63 6C B4 00 00 00 # CL 180 bytes kmd
 9 | A7 96 63 68 B8 00 00 00 # CG 184 bytes 
10 | 54 EA 63 67 EC 00 00 00 # CH EC = 
11 | 00 00 63 FF F8 03 00 00 # C 0xff // 1016 b / 
12 | 
13 | 
14 | 22 AD 73 77 40 46 01 00 # 00ad22.wvx
15 | 3A 6D 73 65 B0 11 00 00 # 
16 | 90 ED 73 6D 90 04 00 00
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/StageDirTools/analyzeStageDirFiles.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | stage_dir="radioDatFiles/stage-jpn/"
4 | 
5 | for script in $(find $stage_dir -name '*.gcx'); do
6 |     echo $script
7 |     python3 myScripts/StageDirTools/callsInStageDirFinder.py $script
8 | done
9 | 


--------------------------------------------------------------------------------
/StageDirTools/assmembleDar.py:
--------------------------------------------------------------------------------
 1 | import struct
 2 | import os
 3 | import glob
 4 | import argparse
 5 | 
 6 | def extract_numeric_prefix(filename):
 7 |     # Extract the numeric prefix before the first hyphen
 8 |     base_name = os.path.basename(filename)
 9 |     prefix = base_name.split('-')[0]
10 |     return int(prefix)
11 | 
12 | def getHashHex(filename: str) -> str:
13 |     return filename.split('-')[1].split('.')[0]
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     darFileName: str
18 |     inputDir: str
19 | 
20 |     parser = argparse.ArgumentParser(description=f'Creates a dar file from a directory with .pcx files. Ex: assembleDar.py path/to/pcxfiles/ [output.dar]')
21 |     parser.add_argument('input', type=str, help="Folder containing .pcx files to assemble into a DAR.")
22 |     parser.add_argument('filename', type=str, help="Output filename, ex: new-01.dar")
23 | 
24 |     args= parser.parse_args()
25 | 
26 |     inputDir = args.input
27 |     darFileName = args.filename
28 | 
29 |     files = glob.glob(f'{inputDir}/*')
30 | 
31 |     # Sort the files using the custom key
32 |     files.sort(key=extract_numeric_prefix)
33 | 
34 |     darBytes = b''
35 | 
36 |     for file in files:
37 |         # Get header bytes
38 |         fileHeader = getHashHex(os.path.basename(file))
39 |         fileHeadBytes = bytes.fromhex(fileHeader)[::-1]
40 |         print(fileHeadBytes.hex())
41 |         with open(file, 'rb') as f:
42 |             data = f.read()
43 |             f.close()
44 |         darBytes += fileHeadBytes + struct.pack("I", len(data)) + data
45 | 
46 |     with open(darFileName, 'wb') as f:
47 |         f.write(darBytes)
48 |         f.close


--------------------------------------------------------------------------------
/StageDirTools/callsInStageDirFinder.py:
--------------------------------------------------------------------------------
  1 | import os, struct
  2 | import argparse
  3 | import json
  4 | 
  5 | import progressbar
  6 | bar = progressbar.ProgressBar()
  7 | 
  8 | # filename = "radioDatFiles/STAGE-usa-d1.DIR"
  9 | 
 10 | freqList = [
 11 |     b'\x37\x05', # 140.85, Campbell
 12 |     b'\x37\x10', # 140.96, Mei Ling
 13 |     b'\x36\xbf', # 140.15, Meryl
 14 |     b'\x37\x20', # 141.12, Otacon
 15 |     b'\x37\x48', # 141.52, Nastasha
 16 |     b'\x37\x64', # 141.80, Miller
 17 |     b'\x36\xE0', # 140.48, Deepthroat
 18 |     b'\x36\xb7',  # 140.07, Staff, Integral exclusive
 19 |     b'\x36\xbb',
 20 |     b'\x36\xbc', 
 21 |     bytes.fromhex('36bb'), 
 22 |     bytes.fromhex('36bc'), # 140.12, ????
 23 |     b'\x37\xac', # 142.52, Nastasha? ACCIDENT
 24 | ]
 25 | 
 26 | # This dict will have {stageOffset: [ callOffset int, hexstr ] } to be updated later.
 27 | offsetDict: dict[int, tuple[int, str]] = {}
 28 | filesize = 0
 29 | stageData = b''
 30 | debug = False
 31 | outputFileToggle = False
 32 | 
 33 | def checkFreq(offset):
 34 |     global stageData
 35 |     
 36 |     if stageData[offset + 1 : offset + 3] in freqList:
 37 |         return True
 38 |     else:
 39 |         return False
 40 | 
 41 | def writeCall(offset):
 42 |     global stageData
 43 |     global freqList
 44 |     global outputFileToggle
 45 |     
 46 |     callHex = stageData[offset + 4: offset + 8].hex()
 47 |     callInt = str(struct.unpack('>L', b'\x00' + stageData[offset + 5: offset + 8])[0])
 48 |     offsetDict.update({offset: (callInt, callHex)})
 49 | 
 50 |     # Write to output file:    
 51 |     if outputFileToggle:
 52 |         
 53 |         writeString = f'{offset},'                                                          # Offset in stage.dir
 54 |         writeString += stageData[offset: offset + 4].hex() + ","                            # Offset of the frequency as it appears in hex
 55 |         writeString += str(struct.unpack('>h', stageData[offset + 1: offset + 3])[0]) + "," # Call Frequency     
 56 |         writeString += f'{callHex},{callInt},\n'                                            # offset (hex, int) of call in Radio.dat
 57 |         output.write(writeString)
 58 | 
 59 | # For now this will just get all offsets of radio calls in the stage.dir and write a CSV file with the relevent offsets.
 60 | def getCallOffsets():
 61 |     global filesize
 62 |     global bar
 63 |     
 64 |     offset = 0
 65 |     bar.maxval = filesize
 66 |     bar.start()
 67 | 
 68 |     while offset < filesize:
 69 |         # Check for \x01 first, then check for a call
 70 |         if stageData[offset].to_bytes() == b'\x01' and stageData[offset + 3].to_bytes() == b'\x0a': # After running without this, seems all call offsets DO have 0x0a in the 4th byte
 71 |             if checkFreq(offset): # We only write the call to the csv if the call matches a frequency, this check might not be needed....?
 72 |                 # Optional print, this is still useful for progress I guess
 73 |                 # print(f'Offset {offset} has a possible call!\n====================================\n')
 74 |                 writeCall(offset)
 75 |         offset += 1 # No matter what we increase offset in all scenarios
 76 |         bar.update(offset)
 77 |     bar.finish()
 78 | 
 79 | 
 80 | def main(args=None):
 81 |     global stageData
 82 |     global filesize 
 83 |     global outputFileToggle
 84 | 
 85 |     stageData = stageDir.read() # The byte stream is better to use than the file on disk if you can. 
 86 |     filesize = len(stageData)
 87 | 
 88 |     # Write csv header
 89 |     output.write('offset,call hex,frequency,call data offset\n')
 90 | 
 91 |     # Main used to just be getting the call offsets
 92 |     getCallOffsets()
 93 |     print('Finished checking for calls in STAGE.DIR!')
 94 |     output.close()
 95 | 
 96 |     with open("callOffsetDict.json", 'w') as f:
 97 |         f.write(json.dumps(offsetDict))
 98 |         f.close
 99 | 
100 | if __name__ == "__main__":
101 | 
102 |     # We should get args from user. Using argParse
103 |     parser = argparse.ArgumentParser(description=f'Search a GCX file for RADIO.DAT codec calls')
104 |     # REQUIRED
105 |     parser.add_argument('filename', type=str, help="The GCX file to Search. Can be RADIO.DAT or a portion of it.")
106 |     parser.add_argument('output', nargs="?", type=str, help="Output Filename (.txt)")
107 | 
108 |     args = parser.parse_args()
109 | 
110 |     # Args parsed
111 |     filename: str = args.filename
112 | 
113 |     stageName = filename.split('/')[-2]
114 |     stageFile = filename.split('/')[-1].split(".")[0]
115 | 
116 |     print(f'{stageName}/{stageFile}')
117 | 
118 |     if args.output:
119 |         outputFile = args.output
120 |         outputFileToggle = True
121 |     else:
122 |         outputFile = f'stageAnalysis-jpn/{stageName}-{stageFile}.csv'
123 |     
124 |     stageDir = open(filename, 'rb')
125 |     output = open(outputFile, 'w')
126 |     
127 |     main()
128 | 
129 | def init(filename: str):
130 |     global filesize
131 |     global stageData
132 |     
133 |     stageDir = open(filename, 'rb')
134 |     stageData = stageDir.read()
135 |     filesize = len(stageData)
136 | 
137 |     if debug:
138 |         print(offsetDict)
139 | 
140 |     print(f'Getting STAGE.DIR call offsets... please be patient!')
141 |     getCallOffsets()
142 | 
143 |     print('Finished checking for calls in STAGE.DIR! Ready to proceed.')


--------------------------------------------------------------------------------
/StageDirTools/extractDar.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple script that exports all .pcx images in a .dar archive.
 3 | """
 4 | import struct
 5 | import os
 6 | import argparse
 7 | 
 8 | def parse_dar_file(file_path, output_dir):
 9 |     with open(file_path, 'rb') as f:
10 |         # Read and parse the header (example assumes header contains number of files)
11 |         num_files = struct.unpack('<I', f.read(4))[0]  # Assuming little-endian unsigned int
12 | 
13 |         file_entries = []
14 |         for _ in range(num_files):
15 |             # Read each file entry's metadata
16 |             # Example assumes each entry has a 4-byte offset and 4-byte size
17 |             offset = struct.unpack('<I', f.read(4))[0]
18 |             size = struct.unpack('<I', f.read(4))[0]
19 |             file_entries.append((offset, size))
20 | 
21 |         for i, (offset, size) in enumerate(file_entries):
22 |             f.seek(offset)
23 |             file_data = f.read(size)
24 |             output_file_path = os.path.join(output_dir, f'file_{i}')
25 |             with open(output_file_path, 'wb') as out_file:
26 |                 out_file.write(file_data)
27 | 
28 | # Usage
29 | # parse_dar_file('/home/solidmixer/projects/mgs1-undub/extractedStage/s00a/0000.dar', '/home/solidmixer/projects/mgs1-undub/extractedStage/s00a/')
30 | # darFileName = input(f'What dar file should I extract? ')
31 | 
32 | darFileName = 'extractedStage/s00a/s00a-02-0000.dar'
33 | 
34 | if __name__ == "__main__":
35 |     darFileName: str
36 |     outputDir: str
37 | 
38 |     parser = argparse.ArgumentParser(description=f'Extracts textures in a .dar file. Ex: extractDar.py [example.dar] [output.dir]')
39 | 
40 |     parser.add_argument('filename', type=str, help="the Dar file to extract.")
41 |     parser.add_argument('outputDir', nargs="?", type=str, help="Output directory")
42 | 
43 |     args = parser.parse_args()
44 | 
45 |     darFileName = args.filename
46 | 
47 |     darData = open(darFileName, 'rb').read()
48 | 
49 |     if args.outputDir == None:
50 |         outputDir = f'{os.path.dirname(darFileName)}/{os.path.basename(darFileName).split(".")[0]}'
51 |         os.makedirs(outputDir, exist_ok=True)
52 |     else:
53 |         outputDir = args.outputDir
54 |         os.makedirs(outputDir, exist_ok=True)
55 | 
56 |     offset = 0
57 |     i = 1
58 | 
59 |     while offset < len(darData):
60 |         # Filename 
61 |         fileName = darData[offset: offset + 4][::-1].hex()
62 |         print(f'{i:02}-{fileName} written!')
63 |         # Get the file
64 |         nextFileSize = struct.unpack("<I", darData[offset + 4: offset + 8])[0]
65 |         offset += 8
66 |         with open(f'{outputDir}/{i:03}-{fileName}.pcx', 'wb') as f:
67 |             f.write(darData[offset: offset + nextFileSize])
68 |             f.close()
69 |         # Print the "in between"
70 |         offset += nextFileSize
71 |         i += 1


--------------------------------------------------------------------------------
/StageDirTools/stageDirFileExtractor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is a testing script for developing a tool for working with STAGE.DIR
  3 | """
  4 | 
  5 | import os, re, glob, struct
  6 | 
  7 | extTable = { # From jayveer's REX utility: https://github.com/Jayveer/Rex/blob/master/mgs/common/ext_table.h
  8 |     0x62: "bin",
  9 |     0x63: "con",
 10 |     0x64: "dar",
 11 |     0x65: "efx",
 12 |     0x67: "gcx",
 13 |     0x68: "hzm",
 14 |     0x69: "img",
 15 |     0x6B: "kmd",
 16 |     0x6C: "lit",
 17 |     0x6D: "mdx",
 18 |     0x6F: "oar",
 19 |     0x70: "pcx",
 20 |     0x72: "rar",
 21 |     0x73: "sgt",
 22 |     0x77: "wvx",
 23 |     0x7A: "zmd",
 24 |     0xFF: "noFile" # End of the C family grouping
 25 | }
 26 | 
 27 | # First thing is to figure out a working Table of Contents, then we can work at making and splitting directories.
 28 | 
 29 | filename = 'build-src/usa-d1/MGS/STAGE.DIR'
 30 | filename = 'build-src/demo/MGS/STAGE.DIR' # Temporarily hijacking this
 31 | outputFolder = "extractedStage/"
 32 | outputFolder = "build-src/demo/extractedStage/"
 33 | 
 34 | stageData = open(filename, 'rb').read() 
 35 | debug = True
 36 | 
 37 | """
 38 | class tableContents:
 39 |     size = struct.unpack("I", stageData[0:4])[0]
 40 | 
 41 | """
 42 | 
 43 | class stageContents:
 44 |     name: str
 45 |     """
 46 |     data start/end is given in blocks of 0x800
 47 |     """
 48 |     startBlock: int
 49 |     endBlock: int
 50 |     binaryData: bytes
 51 | 
 52 |     files = [] # List of stageFile class objects, each is a file in the stage data.
 53 | 
 54 |     """
 55 |     Not sure if this will work?
 56 |     """
 57 |     def getFiles(self):
 58 |         self.files = getStageFiles(self.binaryData[0:0x800])
 59 | 
 60 | class stageFile:
 61 |     """
 62 |     An individual file in a stage. Should have the info
 63 |     needed to extract on a per-file basis if warranted.
 64 |     """
 65 |     nameChecksum: bytes
 66 |     fileFamily: int
 67 |     fileType: int
 68 |     # offset: int # I don't think this is used. use start instead. 
 69 |     startBlock: int # This is the block where the file starts, where each block is 0x800 and first block is TOC
 70 |     start: int # Offset (relative to the stage)
 71 |     end: int # start + size
 72 |     size: int # size of file (bytes)
 73 |     filename: str
 74 | 
 75 |     # Repacking info. 
 76 |     numBlocks: int # returns 0 if this is a c family!
 77 | 
 78 |     def getFilename(self) -> str:
 79 |         self.filename = f'{self.nameChecksum}.{extTable.get(self.fileType)}'
 80 |         return self.filename
 81 | 
 82 |     def getBlocks(self):
 83 |         if self.fileFamily == 0x63:
 84 |             self.numBlocks = 0
 85 |         else:
 86 |             self.numBlocks = self.size // 0x800 + 1
 87 |     
 88 |     def __str__(self):
 89 |         global filename
 90 |         filename = extTable.get(self.fileType)
 91 |         printText = f'File: {self.nameChecksum}.{filename}\n\tOffset: {self.start}\n\tSize: {self.size}'
 92 |         return printText
 93 |     
 94 | def getStagesInBin():
 95 |     global stageData
 96 | 
 97 |     size = struct.unpack("I", stageData[0:4])[0]
 98 |     tableOfConts = {} # List of stages and their blocks (start, end)
 99 |     stageList = []
100 | 
101 |     offset = 4
102 |     while offset < size:
103 |         newStage = stageContents()
104 |         # Get the data
105 |         stageName = stageData[offset:offset + 8].decode('utf8').rstrip('\x00')
106 |         location = struct.unpack("I", stageData[offset + 8: offset + 12])[0]
107 |         locationEnd = struct.unpack("I", stageData[offset + 20: offset + 24])[0]
108 |         if locationEnd == 0:
109 |             locationEnd = len(stageData) // 0x800
110 | 
111 |         # Old:
112 |         tableOfConts.update({stageName: (location, locationEnd)})
113 | 
114 |         # Add attributes to stage object:
115 |         newStage.name = stageName
116 |         newStage.startBlock = location
117 |         newStage.endBlock = locationEnd
118 |         newStage.binaryData = stageData[location * 0X800: locationEnd * 0X800]
119 |         
120 |         # Add to list
121 |         stageList.append(newStage)
122 |         offset += 12
123 |     
124 |     return stageList
125 | 
126 | allStages = getStagesInBin()
127 | 
128 | def getStage(name: str):
129 |     global allStages
130 |     for stage in allStages:
131 |         if stage.name == name:
132 |             return stage
133 |     print(f'Error! "{name}" is not a valid stage name! Exiting... ')
134 |     exit(1)
135 | 
136 | """for key in tableOfConts.keys():
137 |     print(f'{key}: {tableOfConts.get(key)}')"""
138 | 
139 | def printStageOffsets() -> None:
140 |     for stage in allStages:
141 |         print(f'{stage.name}: ({stage.startBlock}, {stage.endBlock})')
142 |     return
143 | 
144 | def extractStageBins():
145 |     """
146 |     Writes individual stage binaries to individual folders/files
147 |     """
148 |     for stage in allStages:
149 |         os.makedirs(f"{outputFolder}{stage.name}")
150 |         with open(f'{outputFolder}{stage.name}/{stage.name}.bin', 'wb') as f:
151 |             f.write(stage.binaryData)
152 |         print(f'Stage {stage.name} written!')
153 | 
154 | def getStageFiles(fileListBin: bytes) -> list:
155 |     """
156 |     This gets the list of files in the stage. Alternatively, can be called
157 |     from within the stage 
158 |     """
159 |     offset = 4
160 |     stageFiles = []
161 |     blockOffset = 1 # Always 1, never seen a table of contents longer than 0x800 bytes.
162 | 
163 |     while offset < len(fileListBin):
164 |         currentFile = stageFile()
165 |         # We can loop to see which type of file we hit and add it. 
166 |         if fileListBin[offset : offset + 8] == bytes(8): # Reached end of the contents
167 |             if debug:
168 |                 print(f'Reached end of list! Breaking...')
169 |             break
170 |         elif fileListBin[offset + 2] == 0x63: # Handling C files... 
171 |             stageCFiles = []
172 |             cfileHeaders = []
173 |             while fileListBin[offset + 3] != 0xFF:
174 |                 cfileHeaders.append(fileListBin[offset: offset + 8])
175 |                 offset += 8
176 |             cfileHeaders.reverse()
177 |             cfileEnd = struct.unpack("I", fileListBin[offset + 4: offset + 8])[0] # Used to track the end of a file, as these are crunched together.
178 |             cFileBlocks = cfileEnd // 0x800 + 1
179 |             for header in cfileHeaders:
180 |                 currentCFile = stageFile()
181 |                 currentCFile.nameChecksum = header[0:2][::-1].hex()
182 |                 currentCFile.fileFamily = header[2]
183 |                 currentCFile.fileType = header[3]
184 |                 currentCFile.startBlock = blockOffset # Doesnt mean anything for the cfiles.
185 |                 # Start, end, size
186 |                 currentCFile.end = cfileEnd 
187 |                 currentCFile.start = (blockOffset * 0x800) + struct.unpack("I", header[4:8])[0]
188 |                 currentCFile.size = (blockOffset * 0x800) + currentCFile.end - currentCFile.start
189 | 
190 |                 # We update this for the next loop
191 |                 cfileEnd = currentCFile.start
192 |                 # Add to sub list, which is then reversed and added to stageFiles
193 |                 stageCFiles.append(currentCFile)
194 | 
195 |             # Before exiting c family loop, add the total blocks 
196 |             blockOffset += cFileBlocks
197 | 
198 |             # Add the C files to stage files
199 |             stageCFiles.reverse()
200 |             for file in stageCFiles:
201 |                 stageFiles.append(file)
202 |             # Offset stil at the cfile total, ok to add 8 bytes.
203 |         else:  
204 |             tocEntry = fileListBin[offset : offset + 8]
205 |             currentFile.nameChecksum = tocEntry[0:2][::-1].hex()
206 |             currentFile.fileFamily = tocEntry[2]
207 |             currentFile.fileType = tocEntry[3]
208 | 
209 |             # Now the dicey bits...
210 |             currentFile.size = struct.unpack("I", tocEntry[4:8])[0]
211 |             currentFile.startBlock = blockOffset
212 |             currentFile.start = blockOffset * 0x800
213 |             currentFile.end = currentFile.start + currentFile.size
214 | 
215 |             # Update the file blocks (blocks is how many blocks of 0x800 size it needs)
216 |             fileBlocks = currentFile.size // 0x800 + 1
217 |             blockOffset += fileBlocks
218 | 
219 |             # Add to the files list
220 |             stageFiles.append(currentFile)
221 |         # After each file, we increase offset by 8
222 |         offset += 8
223 |     
224 |     # Optional debug output of the file. 
225 |     """if debug: # Took this out for now. 
226 |         for file in stageFiles:
227 |             print(file)"""
228 | 
229 |     return stageFiles
230 | 
231 | def printStageFiles():
232 |     for stage in allStages:
233 |         print(stage)
234 | 
235 | 
236 | """
237 | Next step: Write block and file size calcs. 
238 | Then: The file exports on a per-stage basis.
239 | """
240 | 
241 | def exportStageFiles(stageName: str, file:str=None) -> None:
242 |     pass
243 | 
244 | if __name__ == "__main__":
245 | 
246 |     exportFileData: bytes = None
247 | 
248 |     stageSelect = input('Which stage do you want to list files from? \n')
249 |     stage = getStage(stageSelect)
250 |     files: list [stageFile] = getStageFiles(stage.binaryData[0:0x800])
251 |     for file in files:
252 |         print(file)
253 |     fileToExport = input(f'Which file from stage {stageSelect} do you want to export? [ALL exports all files!]\n')
254 | 
255 |     # Ensure output directory exists
256 |     os.makedirs(f"{outputFolder}{stageSelect}", exist_ok=True)
257 | 
258 |     for file in files:
259 |         file.getFilename()
260 |         if file.filename == fileToExport:
261 |             exportFileData = stage.binaryData[file.start: file.end]
262 |             break
263 |     
264 |     if fileToExport == "ALL":
265 |         i = 0
266 |         for file in files:
267 |             exportFileData = stage.binaryData[file.start: file.end]
268 |             with open(f'{outputFolder}{stageSelect}/{stageSelect}-{i:02}-{file.filename}', 'wb') as f:
269 |                 f.write(exportFileData)
270 |             f.close()
271 |             i += 1
272 |     elif exportFileData == None: 
273 |         print(f'Export failed! {fileToExport} was not found in stage {stageSelect}! Exiting...')
274 |         exit(2)
275 |     else:
276 |         with open(f'{outputFolder}{stageSelect}/{fileToExport}', 'wb') as f:
277 |             f.write(exportFileData)
278 |         f.close()
279 |     
280 |     exit(0)


--------------------------------------------------------------------------------
/audioTools/sub-test-2.py:
--------------------------------------------------------------------------------
 1 | # Subtitle test 2
 2 | 
 3 | import json
 4 | import time
 5 | 
 6 | # Load JSON data
 7 | with open('workingFiles/vag-testing.json', 'r') as file:
 8 |     dialogue_data = json.load(file)
 9 | 
10 | # Extract dialogues from the nested structure
11 | dialogues = []
12 | for key1, value1 in dialogue_data.items():
13 |     for key2, value2 in value1.items():
14 |         for key3, dialogue in value2.items():
15 |             start_frame = int(dialogue['startFrame'])
16 |             display_frames = int(dialogue['displayFrames'])
17 |             text = dialogue.get('text', '')
18 |             dialogues.append((start_frame, display_frames, text))
19 | 
20 | # Sort dialogues by start frame
21 | dialogues.sort(key=lambda x: x[0])
22 | 
23 | # Simulate frame counting and display subtitles
24 | current_frame = 0
25 | while True:
26 |     # Clear the screen (works in Unix-like systems)
27 |     print("\033[H\033[J", end="")
28 | 
29 |     # Track which dialogues are currently active
30 |     active_dialogues = []
31 | 
32 |     # Check if there are any dialogues to display at the current frame
33 |     for start_frame, display_frames, text in dialogues:
34 |         if start_frame <= current_frame < start_frame + display_frames:
35 |             active_dialogues.append(text)
36 | 
37 |     # Print all active dialogues
38 |     for text in active_dialogues:
39 |         print(text)
40 | 
41 |     # Increment the frame counter
42 |     current_frame += 1
43 | 
44 |     # Simulate frame rate (30 fps)
45 |     time.sleep(1/30)
46 | 
47 |     # Break condition to stop the loop after a certain number of frames or other criteria
48 |     if current_frame > 200:  # Adjust this condition as needed
49 |         break
50 | 


--------------------------------------------------------------------------------
/audioTools/subtitle display test.py:
--------------------------------------------------------------------------------
 1 | import pygame
 2 | import json
 3 | 
 4 | # Initialize Pygame
 5 | pygame.init()
 6 | 
 7 | # Screen dimensions
 8 | screen_width, screen_height = 800, 600
 9 | screen = pygame.display.set_mode((screen_width, screen_height))
10 | pygame.display.set_caption("Dialogue Display")
11 | 
12 | # Load JSON data
13 | with open('workingfiles/vag-testing.json', 'r') as file:
14 |     dialogue_data = json.load(file)
15 | 
16 | # Font for displaying text
17 | font = pygame.font.Font(None, 36)
18 | fps = 30
19 | clock = pygame.time.Clock()
20 | 
21 | # Function to draw text on the screen
22 | def draw_text(text, x, y):
23 |     surface = font.render(text, True, (255, 255, 255))
24 |     screen.blit(surface, (x, y))
25 | 
26 | # Main loop
27 | running = True
28 | current_frame = 0
29 | 
30 | while running:
31 |     for event in pygame.event.get():
32 |         if event.type == pygame.QUIT:
33 |             running = False
34 |     
35 |     # Clear the screen
36 |     screen.fill((0, 0, 0))
37 |     
38 |     # Determine which dialogue to show based on current frame
39 |     for key1, value1 in dialogue_data.items():
40 |         for key2, value2 in value1.items():
41 |             for key3, dialogue in value2.items():
42 |                 start_frame = int(dialogue['startFrame'])
43 |                 display_frames = int(dialogue['displayFrames'])
44 |                 
45 |                 if start_frame <= current_frame < (start_frame + display_frames):
46 |                     draw_text(dialogue['text'], 100, 100)
47 |     
48 |     # Update the screen
49 |     pygame.display.flip()
50 |     
51 |     # Increment frame and control the frame rate
52 |     current_frame += 1
53 |     clock.tick(fps)
54 | 
55 | # Quit Pygame
56 | pygame.quit()
57 | 


--------------------------------------------------------------------------------
/audioTools/vagAudioTools.py:
--------------------------------------------------------------------------------
 1 | import ffmpeg
 2 | import subprocess, os
 3 | 
 4 | filename = ""
 5 | tempDir = "/tmp"
 6 | 
 7 | def splitVagFile(filename, leftChanFilename, rightChanFilename):
 8 |     # Check if the file is a VAG file
 9 |     with open(filename, 'rb') as f:
10 |         data = f.read()
11 |     header = data[:0x40]
12 |     oldSize = int.from_bytes(header[12:16], 'big')
13 |     newSize = (oldSize // 2).to_bytes(4, 'big')
14 | 
15 |     leftChannelData = header[16:]
16 |     rightChannelData = header[16:]
17 | 
18 |     for i in range(0x40, len(data), 0x2000):
19 |         leftChannelData += data[i:i+0x1000]
20 |         rightChannelData += data[i+0x1000:i+0x2000]
21 | 
22 |     with open(leftChanFilename, 'wb') as f:
23 |         f.write(header[0:12])
24 |         f.write(newSize)
25 |         f.write(leftChannelData)
26 | 
27 |     with open(rightChanFilename, 'wb') as f:   
28 |         f.write(header[0:12])
29 |         f.write(newSize)
30 |         f.write(rightChannelData)
31 | 
32 | def convert_vag_to_wav(input_path, output_path):
33 |     (
34 |         ffmpeg
35 |         .input(input_path, f='vag')
36 |         .output(output_path)
37 |         .overwrite_output()
38 |         .run()
39 |     )
40 | 
41 | def convert_stereo_vag_to_wav(left_vag, right_vag, output_wav):
42 |     # Set inputs separately to obects
43 |     try:
44 |         left = ffmpeg.input(left_vag, f='vag')
45 |         right = ffmpeg.input(right_vag, f='vag')
46 |         ffmpeg.filter([left, right], 'join', inputs=2, channel_layout='stereo').output(output_wav, acodec='pcm_s16le').overwrite_output().run()
47 |     except ffmpeg.Error as e:
48 |         print('FFmpeg error:', e.stderr.decode())
49 | 
50 | 
51 | def play_with_ffplay(wav_file):
52 |     try:
53 |         print(subprocess.run(['ffplay', wav_file, "-nodisp", "-autoexit"]))
54 |     except subprocess.SubprocessError as e:
55 |         print(e)
56 | 
57 | def playVagFile(filename: str) -> str:
58 |     """
59 |     Automatically plays vag file, regardless of format. Returns the full path of the 
60 |     """
61 |     global tempDir
62 |     with open(filename, 'rb') as f:
63 |         magic = f.read(4)
64 |         if magic == b'VAGp':
65 |             print(f'File {filename} is MONO! Not playing!')
66 |             convert_vag_to_wav(filename, f"{tempDir}/temp.wav")
67 |         elif magic == b'VAGi':
68 |             # Interleaved file! Play separately.
69 |             splitVagFile(filename, f"{tempDir}/temp-L.vag", f"{tempDir}/temp-R.vag")
70 |             convert_stereo_vag_to_wav(f"{tempDir}/temp-L.vag", f"{tempDir}/temp-R.vag", f"{tempDir}/temp.wav")
71 |             # Cleanup
72 |             # os.remove(f"{tempDir}/temp-L.wav")
73 |             # os.remove(f"{tempDir}/temp-R.wav")
74 |         else:
75 |             print(f'ERROR! File was not valid VAG file. Magic: 0x{magic.hex()} // {magic}')
76 |             return -1
77 |         
78 |         # File is ready, play it!!!
79 |         play_with_ffplay(f"{tempDir}/temp.wav")
80 |         os.remove(f"{tempDir}/temp.wav")
81 |         return 0
82 | 
83 | 
84 | def main():
85 |     # TESTING AREA 
86 |     convert_stereo_vag_to_wav("workingFiles/vag-examples/testLeft.vag", "workingFiles/vag-examples/testRight.vag", "workingFiles/vag-examples/newFile.wav") 
87 |     play_with_ffplay("workingFiles/vag-examples/newFile.wav")
88 | 
89 | if __name__ == "__main__":
90 |     main()


--------------------------------------------------------------------------------
/common/structs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Commonly used structures for MGS dialogue lines will go here.
 3 | 
 4 | demoSub is one of the big ones as the similar thing is used in demo, vox, zmovie.
 5 | """
 6 | import sys, struct, os
 7 | sys.path.append(os.path.abspath('./myScripts'))
 8 | 
 9 | import translation.radioDict as RD
10 | 
11 | 
12 | class subtitle:
13 |     text: str
14 |     startFrame: int
15 |     duration: int
16 | 
17 |     def __init__(self, dialogue_or_bytes, b = None, c = None) -> None:
18 |         if type(dialogue_or_bytes) == bytes:
19 |             length, start, duration = struct.unpack("III", rawBytes[0:12])
20 |             self.text = dialogue_or_bytes[16:].strip(bytes.fromhex("00"))
21 |             self.startFrame = int(start)
22 |             self.duration = int(duration)
23 |         elif type(dialogue_or_bytes) == str:
24 |             self.text = dialogue_or_bytes
25 |             self.startFrame = int(b)
26 |             self.duration = int(c)
27 | 
28 |         return
29 |     
30 |     # def __init__(self, rawBytes: bytes) -> None:
31 |     #     length, start, duration = struct.unpack("III", rawBytes[0:12])
32 |     #     self.text = rawBytes[16:].strip(bytes.fromhex("00"))
33 |     #     self.startFrame = int(start)
34 |     #     self.duration = int(duration)
35 | 
36 |     #     return
37 |     
38 |     def __str__(self) -> str:
39 |         a = f'Subtitle contents: Start: {self.startFrame} Duration: {self.duration} Text: {self.text}'
40 |         return a
41 |     
42 |     def __bytes__(self) -> bytes:
43 |         """
44 |         Simple. Encodes the dialogue as bytes. 
45 |         Adds the buffer we need to be divisible by 4...
46 |         Return the new bytes.
47 |         """
48 |         subtitleBytes: bytes = struct.pack("III", self.startFrame, self.duration, 0)
49 |         subtitleBytes += RD.encodeJapaneseHex(self.text)[0]
50 |         bufferNeeded = 4 - (len(subtitleBytes) % 4)
51 |         subtitleBytes += bytes(bufferNeeded)
52 |         
53 |         return subtitleBytes


--------------------------------------------------------------------------------
/creditsHacking/decryptionDiagram.md:
--------------------------------------------------------------------------------
 1 | 01 FF 
 2 | 
 3 | C7 01 
 4 | FF X 0x48 times (72)
 5 | 
 6 | 01 EE 
 7 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffee
 8 | 
 9 | 84 01 
10 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeee
11 | 
12 | 87 07 
13 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeeffffeeeeeeeeee
14 | 
15 | 
16 | 01 FE 
17 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeeffffeeeeeeeeeefe
18 | 
19 | 83 07 
20 | ffeeee
21 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeeffffeeeeeeeeeefeffeeee
22 | 
23 | C8 58 0x48 added, from 58 back
24 | 
25 | 00
26 | 
27 | C7 01 
28 | 
29 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeeffffeeeeeeeeee
30 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeeefeffeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
31 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeeeeee
32 | 


--------------------------------------------------------------------------------
/creditsHacking/imageComparison.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Fixed this to use the correct path for the script based on OS
 4 | if uname == "Darwin"; then
 5 |     SCRIPT="python3 /Users/solidmixer/projects/mgs1-undub/myScripts/creditsHacking/imageEncoder.py"
 6 | else
 7 |     SCRIPT="python3 /home/solidmixer/projects/mgs1-undub/myScripts/creditsHacking/imageEncoder.py"
 8 | fi
 9 | 
10 | echo "" > creditsHacking/output/recreatedPalletes.txt
11 | 
12 | # Run the script on all the images
13 | for file in $(ls -1 creditsHacking/output/images/*.tga); do
14 |     echo "Running $file through script..."
15 |     $SCRIPT $file >> creditsHacking/output/recreatedPalletes.txt
16 | done
17 | 
18 | # Compare the blocks generated
19 | for file in $(ls -1 creditsHacking/output/blocks/*.txt); do
20 |     BASENAME=$(basename $file)
21 |     if diff $file creditsHacking/output/verification/$BASENAME; then
22 |         echo "Block $BASENAME is the same"
23 |     else
24 |         echo "Block $BASENAME is different"
25 |     fi
26 | done


--------------------------------------------------------------------------------
/creditsHacking/imhex patterns 00eae8rar.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | // image 1
 3 | u8 image00[0x422C] @ 0x2C;
 4 | 
 5 | // image 2 0x00004258
 6 | u8 image01[0x2914] @ 0x4280;
 7 | 
 8 | // image 3
 9 | u8 image02[0x6950] @ 0x6BBC;
10 | 
11 | // image 4
12 | u8 image03[0x5ad4] @0xD534;
13 | 
14 | // image 5
15 | u8 image04[0x3190] @0x13030;


--------------------------------------------------------------------------------
/creditsHacking/lz77-test.py:
--------------------------------------------------------------------------------
 1 | def lz77_compress(data, window_size=128):
 2 |     """
 3 |     Compresses data using a simple LZ77 algorithm.
 4 |     This was created by chatgpt. I wanted to see if we could
 5 |     replicate the lz77 compression used on graphics. As it stands,
 6 |     its very similar but the imlimentation is likely different.
 7 |     """
 8 |     compressed = []
 9 |     i = 0
10 | 
11 |     while i < len(data):
12 |         # Look for the longest match in the sliding window
13 |         match_distance = 0
14 |         match_length = 0
15 | 
16 |         for j in range(max(0, i - window_size), i):
17 |             length = 0
18 |             while (i + length < len(data) and 
19 |                    data[j + length] == data[i + length] and
20 |                    length < 255):  # Limit match length
21 |                 length += 1
22 | 
23 |             if length > match_length:
24 |                 match_distance = i - j
25 |                 match_length = length
26 | 
27 |         # Add match or literal to the compressed output
28 |         if match_length > 1:
29 |             # (distance, length, next character)
30 |             next_char = data[i + match_length] if i + match_length < len(data) else None
31 |             compressed.append((match_distance, match_length, next_char))
32 |             i += match_length + 1
33 |         else:
34 |             # Literal (distance=0, length=0, char)
35 |             compressed.append((0, 0, data[i].to_bytes().hex()))
36 |             i += 1
37 | 
38 |     return compressed
39 | 
40 | # Example: Compress 160 bytes of random data
41 | data = bytes.fromhex('ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f21fe9f11fb14000059000110fe3e60ff8fb2ffffffffcf0300d49f010040ff5f40ffcf0100b3ffffff3c0092ff6f0030fb6fd2ffff19f9150051fdffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff')
42 | 
43 | compressed = lz77_compress(data)
44 | 
45 | # Print the compressed data
46 | print("Compressed Data:")
47 | for entry in compressed:
48 |     print(entry)


--------------------------------------------------------------------------------
/creditsHacking/lzss-test.py:
--------------------------------------------------------------------------------
 1 | def lzss_compress(data, window_size=128, lookahead_buffer_size=128):
 2 |     """
 3 |     Compress a bytes object using a simple LZSS algorithm.
 4 | 
 5 |     Args:
 6 |         data (bytes): The data to compress.
 7 |         window_size (int): The size of the sliding window.
 8 |         lookahead_buffer_size (int): The size of the lookahead buffer.
 9 | 
10 |     Returns:
11 |         list[tuple]: The compressed data as a list of (offset, length, next_byte) tuples.
12 |     """
13 |     compressed = []
14 |     i = 0
15 | 
16 |     while i < len(data):
17 |         match_distance = 0
18 |         match_length = 0
19 | 
20 |         # Sliding window start
21 |         start_window = max(0, i - window_size)
22 | 
23 |         # Look for the longest match in the sliding window
24 |         for j in range(start_window, i):
25 |             length = 0
26 |             while (length < lookahead_buffer_size and 
27 |                    i + length < len(data) and 
28 |                    data[j + length] == data[i + length]):
29 |                 length += 1
30 | 
31 |             if length > match_length:
32 |                 match_distance = i - j
33 |                 match_length = length
34 | 
35 |         # If a match is found, add it as a (distance, length, next byte) tuple
36 |         if match_length > 1:
37 |             next_byte = data[i + match_length] if i + match_length < len(data) else None
38 |             compressed.append((match_distance, match_length, next_byte))
39 |             i += match_length + 1
40 |         else:
41 |             # Add a literal (distance=0, length=0, next_byte)
42 |             compressed.append((0, 0, data[i]))
43 |             i += 1
44 | 
45 |     return compressed
46 | 
47 | # Example usage
48 | data = bytes.fromhex('ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeeeeeefeffefeeffffefeeeeeeeeeeeeeefeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff')
49 | compressed_data = lzss_compress(data)
50 | print("Compressed Data:")
51 | for entry in compressed_data:
52 |     print(entry)


--------------------------------------------------------------------------------
/creditsHacking/newCompressionTest.py:
--------------------------------------------------------------------------------
  1 | import os, struct
  2 | from PIL import Image
  3 | import numpy as np
  4 | # from creditsHacking.creditsHacking import imageData
  5 | import argparse
  6 | 
  7 | debug = True
  8 | 
  9 | def compressLine(data: bytes) -> bytes:
 10 | 
 11 |     global debug
 12 | 
 13 |     def findNextPatternOrRepeat(data: bytes, index: int) -> int:
 14 |         """
 15 |         Finds how many bytes starting at index
 16 |         until either we repeat the same byte 4x 
 17 |         or the next 4 bytes are a repeated pattern
 18 |         """
 19 |         count = 0
 20 |         checkLength = 3
 21 |         while True:
 22 |             patternCheck = data[index + count: index + count + checkLength]
 23 |             if len(set(patternCheck)) == 1 or data[:index + count].find(patternCheck) != -1:
 24 |                 break
 25 |             else:
 26 |                 count += 1
 27 |         
 28 |         return count
 29 | 
 30 |     def getLongestRepeat(data: bytes, index: int) -> int:
 31 |         """
 32 |         Get the longest repeated character starting at index.
 33 |         """
 34 | 
 35 |         before = data[:index]
 36 |         after = data[index:]
 37 | 
 38 |         count = 0
 39 |         while count < min(128, len(after)):
 40 |             if len(set(after[:count + 1])) == 1:
 41 |                 count += 1
 42 |             else:
 43 |                 break
 44 | 
 45 |         # print(f'{count} bytes were repeated following index {index}' )
 46 |         return count
 47 | 
 48 |     def getLongestPattern(data: bytes, index: int) -> tuple [int, int]:
 49 |         """
 50 |         For the index, return the longest pattern starting there that 
 51 |         appears earlier in the data and how far back to go.
 52 |         """
 53 |         before = data[:index]
 54 |         after = data[index:]
 55 | 
 56 |         count = 0
 57 |         while count < len(after):
 58 |             if before.find(after[:count + 1]) != -1:
 59 |                 count += 1
 60 |             else:
 61 |                 break
 62 |         
 63 |         distance = abs(len(before) - before.rfind(after[:count]))
 64 | 
 65 |         return distance, count
 66 |     
 67 |     """
 68 |     Actual compression Def starts here.
 69 | 
 70 |     """
 71 |     
 72 |     compressedData = b''
 73 |     i = 0
 74 | 
 75 |     while i < len(data):
 76 |         newBytes = b''
 77 |         repeatCount = getLongestRepeat(data, i)
 78 |         distance, patternLen = getLongestPattern(data, i)
 79 | 
 80 |         if patternLen >= repeatCount and patternLen > 1:
 81 |             if data[i - 1] == data[i]:
 82 |                 newBytes += (repeatCount + 0x80).to_bytes() + int(1).to_bytes()
 83 |                 i += repeatCount
 84 |             else:
 85 |                 newBytes += (patternLen + 0x80).to_bytes() + distance.to_bytes()
 86 |                 i += patternLen
 87 |         elif repeatCount > 3:
 88 |             if data[i - 1] == data[i] and i != 0: 
 89 |                 newBytes += (repeatCount + 0x80).to_bytes() + int(1).to_bytes()
 90 |                 i += repeatCount
 91 |             else:
 92 |                 newBytes += int(1).to_bytes() + data[i].to_bytes()
 93 |                 newBytes += (repeatCount - 1 + 0x80).to_bytes() + int(1).to_bytes()
 94 |                 i += repeatCount
 95 |         else:
 96 |             newString = findNextPatternOrRepeat(data, i)
 97 |             if newString < 1:
 98 |                 newString = 1
 99 |             newBytes += newString.to_bytes() + data[i : i + newString]
100 |             i += newString
101 |         if debug: 
102 |             print(f'Compressed data: {newBytes.hex(sep=' ')}')
103 |         
104 |         compressedData += newBytes
105 |     
106 |     compressedData += bytes.fromhex('00')
107 |     
108 |     return compressedData
109 | 
110 | 
111 | line = bytes.fromhex('ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff')
112 | 
113 | print(compressLine(line).hex(sep=' '))
114 | 
115 | """
116 | ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
117 | 6f 40 f2 0b 04 fb 04 aa ba ff bf 60 ff ef 41 0d f5 6f b0
118 | ffffffff
119 | 04fdeeff9f50abebff143ef3cf60ef04fcffff0483fdbf50ffdf505fc0ffff09f905fcef31ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
120 | 
121 | """
122 | # Original
123 | # 01 FF B1 01 13 6F 40 F2 0B 04 FB 04 AA BA FF BF 60 FF EF 41 0D F5 6F B0 84 17 11 04 FD EE FF 9F 50 AB EB FF 14 3E F3 CF 60 EF 04 FC 83 13 11 83 FD BF 50 FF DF 50 5F C0 FF FF 09 F9 05 FC EF 31 B2 6E 00
124 | # 01 ff b1 01 13 6f 40 f2 0b 04 fb 04 aa ba ff bf 60 ff ef 41 0d f5 6f b0 84 17 81 11 10 fd ee ff 9f 50 ab eb ff 14 3e f3 cf 60 ef 04 fc 83 13 11 83 fd bf 50 ff df 50 5f c0 ff ff 09 f9 05 fc ef 31 b2 6e 00
125 | # mine


--------------------------------------------------------------------------------
/creditsHacking/scra.py:
--------------------------------------------------------------------------------
 1 | # MINE
 2 | # 01 ff c7 01 0a ee ee ee fe ff ef ee ff ff ef 01 ee 86 01 82 0e c5 01 00
 3 | # 01 FF C7 01 01 EE 07 EE EE FE FF EF EE FF 81 01 82 04 86 01 82 0E C5 01 00 
 4 | # 01 FF C6 01 13 EF 2B 00 40 EC FF 0C 60 FE FF 6D 02 72 ED 04 00 00 40 FE C6 59 00
 5 | # 01 FF C6 01 13 BE 01 74 05 E2 FF 08 10 FE EF 06 76 04 E5 46 04 40 64 FE C6 59 00
 6 | # 01 FF C6 01 13 4E 70 EE 8E E8 EF 03 06 E9 EF 30 EE 6E EB EE 0E E0 EE FE C6 59 00
 7 | # 01 FF C6 01 02 0D D0 83 05 0C BF 50 08 E6 EF 02 D8 EE FE FF 0E E0 01 FF C7 01 00
 8 | # 01 FF C6 01 02 0A E4 83 05 0C 6E 80 0D E1 EF 09 00 A5 EE FF 0E E0 01 FF C7 01 00
 9 | # 01 FF C6 01 02 0A E4 83 05 0C 2E C0 5E 90 FF CE 26 00 E6 FF 0E E0 01 FF C7 01 00
10 | # 01 FF C6 01 02 0C E1 83 05 0C 0B 00 00 60 FE ED EE 08 C0 FF 0E E0 01 FF C7 01 00
11 | # 01 FF C6 01 11 2E 80 EE AE EB 06 43 44 11 BE A3 EE 4E A0 FF 0E E0 01 FF C7 01 00
12 | # 01 FF C6 01 11 9E 10 A7 18 D2 02 EC EE 06 D9 10 A8 08 D1 FF 0E E0 01 FF C7 01 00
13 | # 01 FF C6 01 11 EF 08 00 10 B9 50 FE EF 09 E6 18 00 00 E8 FF 0E E0 01 FF C7 01 00
14 | # 01 FF C7 01 01 EE 84 01 87 07 01 FE 83 07 C8 58 00
15 | 
16 | # Original
17 | # 01 FF C7 01 08 EE EE EE FE FF EF EE FF 83 04 86 01 82 0E C5 01 00 
18 | # 01 FF C6 01 13 EF 2B 00 40 EC FF 0C 60 FE FF 6D 02 72 ED 04 00 00 40 FE C6 59 00 
19 | # 01 FF C6 01 13 BE 01 74 05 E2 FF 08 10 FE EF 06 76 04 E5 46 04 40 64 FE C6 59 00 
20 | # 01 FF C6 01 13 4E 70 EE 8E E8 EF 03 06 E9 EF 30 EE 6E EB EE 0E E0 EE FE C6 59 00 
21 | # 01 FF C6 01 02 0D D0 83 05 0C BF 50 08 E6 EF 02 D8 EE FE FF 0E E0 C7 58 01 FF 00 
22 | # 01 FF C6 01 02 0A E4 83 05 0C 6E 80 0D E1 EF 09 00 A5 EE FF 0E E0 C7 58 01 FF 00
23 | # 01 FF C6 01 02 0A E4 83 05 0C 2E C0 5E 90 FF CE 26 00 E6 FF 0E E0 C7 58 01 FF 00
24 | # 01 FF C6 01 02 0C E1 83 05 0C 0B 00 00 60 FE ED EE 08 C0 FF 0E E0 C7 58 01 FF 00
25 | # 01 FF C6 01 11 2E 80 EE AE EB 06 43 44 11 BE A3 EE 4E A0 FF 0E E0 C7 58 01 FF 00
26 | # 01 FF C6 01 11 9E 10 A7 18 D2 02 EC EE 06 D9 10 A8 08 D1 FF 0E E0 C7 58 01 FF 00
27 | # 01 FF C6 01 11 EF 08 00 10 B9 50 FE EF 09 E6 18 00 00 E8 FF 0E E0 C7 58 01 FF 00
28 | # 01 FF C7 01 01 EE 84 01 87 07 01 FE 83 07 C8 58 00
29 | 
30 | EE EE EE FE FF EF EE FF EF EE FF EF 
31 | EE EE EE FE FF EF EE FF 
32 | 83 04
33 | EF EE FF
34 | 86 01
35 | EF
36 | 
37 | 
38 | ee ee ee fe ff ef ee ff ff ef ee ee ee ee ee ee ee fe ff


--------------------------------------------------------------------------------
/demoManager.py:
--------------------------------------------------------------------------------
  1 | import xml.etree.ElementTree as ET
  2 | from xml.dom.minidom import parseString
  3 | import os, sys, json
  4 | 
  5 | # Add subfolder(s) relative to the script location
  6 | # script_dir = os.path.dirname(os.path.abspath(__file__))
  7 | # sys.path.insert(0, os.path.join(script_dir, 'radioTools'))
  8 | # sys.path.insert(0, os.path.join(script_dir, 'demoTools'))
  9 | # Assuming your submodule is in 'my-renamed-lib'
 10 | # submodule_path = os.path.join(os.path.dirname(__file__), "tools", "myscripts", "radioTools")  # Adjust path if needed
 11 | # submodule_path = os.path.join(os.path.dirname(__file__), "tools", "myscripts")  # Adjust path if needed
 12 | # sys.path.insert(0, submodule_path) # Insert at the beginning to prioritize
 13 | 
 14 | import demoClasses as demoCtrl
 15 | # import tools.myscripts.translation.radioDict as RD
 16 | # import tools.myscripts.translation.characters
 17 | 
 18 | 
 19 | # 
 20 | demoDatData: bytes
 21 | demoStructure: list [demoCtrl.demo]
 22 | workingDemo: demoCtrl.demo
 23 | 
 24 | # Testing Variables
 25 | filename = "build-src/usa-d1/MGS/DEMO.DAT"
 26 | # demoDatData = open(filename, "rb").read()
 27 | outputFilename = "workingFiles/demoDat.xml"
 28 | 
 29 | DEMO_HEADER: bytes = b'\x10\x08\x00\x00'
 30 | DEMO_CHUNKSIZE: int = 0x800
 31 | 
 32 | def findDemoOffsets(demoFileData: bytes, header: bytes, chunkSize: int):
 33 |     """
 34 |     Modified from the original splitter. This now accepts chunk size and header. 
 35 |     This should work for Demo, Vox, and Zmovie (Zmovie has different chunk size)
 36 |     """
 37 |     offset = 0
 38 |     offsets = []
 39 |     while offset < len(demoFileData):
 40 |         checkbytes = demoFileData[offset:offset + 4]
 41 |         if checkbytes == header:
 42 |             offsets.append(offset)
 43 |             offset += chunkSize # All demo files are aligned to 0x800, SIGNIFICANTLY faster to do this than +8! Credit to Green Goblin
 44 |         else:
 45 |             offset += chunkSize
 46 |     return offsets
 47 | 
 48 | def parseDemoFile(demoDatData: bytes) -> dict [str, demoCtrl.demo]:
 49 |     demoOffsets = findDemoOffsets(demoDatData, DEMO_HEADER, DEMO_CHUNKSIZE)
 50 |     demos: dict [str, demoCtrl.demo] = {}
 51 |     for i in range(len(demoOffsets) - 1):
 52 |         demoData = demoDatData[demoOffsets[i]:demoOffsets[i + 1]]
 53 |         demos[str(demoOffsets[i])] = demoCtrl.demo(demoOffsets[i], demoData)
 54 |     demos[str(demoOffsets[-1])] = demoCtrl.demo(demoOffsets[-1], demoData)
 55 | 
 56 |     return demos
 57 |     # Add the final demo
 58 | 
 59 | if __name__ == "__main__":
 60 |     # TESTING BRANCH
 61 |     print(f'This is a test!!!')
 62 |     
 63 | 
 64 |     import audioTools.vagAudioTools as VAG
 65 | 
 66 |     voxTestFilename = "workingFiles/usa-d1/demo/bins/demo-01.bin"
 67 |     # voxTestFilename = "workingFiles/usa-d1/vox/bins/vox-0035.bin"
 68 |     voxData = open(voxTestFilename, 'rb').read()
 69 |     vox = demoCtrl.demo(demoData=voxData)
 70 |     fileWritten = demoCtrl.outputVagFile(vox, 'demo-1', 'workingFiles/vag-examples/')
 71 |     print(f'Wrote file: {fileWritten}')
 72 | 
 73 |     jsonList = {}
 74 |     offset, subdata = vox.toJson()
 75 |     jsonList[offset] = subdata
 76 |     print(jsonList)
 77 |     
 78 |     VAG.playVagFile(fileWritten)
 79 | 
 80 |     # # JSON output
 81 |     # jsonList = {}
 82 |     # for demo in demos:
 83 |     #     # Get demo json data here. 
 84 |     #     offset, subdata = demo.toJson()
 85 |     #     jsonList[offset] = subdata
 86 |     
 87 |     with open("workingfiles/vag-testing.json", "w") as f:
 88 |         json.dump(jsonList, f, ensure_ascii=False, indent=2)
 89 |     
 90 | 
 91 |     """# XML Output
 92 |     allDemos = ET.Element("DemoDat")
 93 |     # allDemos.append(demos[0].structure)
 94 |     for demo in demos:
 95 |         allDemos.append(demo.structure)
 96 |         
 97 |     # TESTING BRANCH
 98 |     # testDemoExport = demos[1].structure
 99 |     xmlstr = parseString(ET.tostring(allDemos)).toprettyxml(indent="  ")
100 |     xmlFile = open(outputFilename, 'w', encoding='utf8')
101 |     xmlFile.write(xmlstr)
102 |     xmlFile.close()"""
103 | 
104 | """
105 |     stringOut = ET.tostring(testDemoExport, encoding='utf-8')
106 |     parseString(stringOut)
107 |     xmlstr = stringOut.toprettyxml(indent="  ")
108 |     # xmlstr = parseString(ET.tostring(allDemos)).toprettyxml(indent="  "
109 |     xmlFile = open(f'{outputFilename}.xml', 'wb')
110 |     xmlFile.write(stringOut)
111 |     xmlFile.close()
112 |     """


--------------------------------------------------------------------------------
/graphicsExport/KanjiStillMissing.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drsparklegasm/mgs1-scripts/fba25e409c5ad49938ba0b6d60e1e48a9c37fc7a/graphicsExport/KanjiStillMissing.txt


--------------------------------------------------------------------------------
/graphicsExport/contextList.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drsparklegasm/mgs1-scripts/fba25e409c5ad49938ba0b6d60e1e48a9c37fc7a/graphicsExport/contextList.txt


--------------------------------------------------------------------------------
/insertVox.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This rough script inserts a different VAG audio file into a VOX file. 
 3 | """
 4 | 
 5 | # import demoManager as DM
 6 | from demoClasses import *
 7 | 
 8 | voxFilename = ""
 9 | vagFilename = ""
10 | 
11 | VAG_HEADER_LENGTH = 0x40
12 | 
13 | originalDemo = demo(0, open(voxFilename, 'rb').read())
14 | 
15 | # Create the vag header:
16 | 
17 | 


--------------------------------------------------------------------------------
/itemDescriptionFinder.py:
--------------------------------------------------------------------------------
  1 | #!/bin/python
  2 | 
  3 | """
  4 | This is a quick script to pull the item description areas out of the binary. 
  5 | We're working with the japanese version, SLPM-861.11
  6 | 
  7 | Will add a re-injector later. For safety, will not excceed original length -1
  8 | 
  9 | """
 10 | 
 11 | import os, struct, re, sys, json
 12 | sys.path.append(os.path.abspath('./myScripts'))
 13 | import translation.radioDict as RD
 14 | 
 15 | execFilename = "/home/solidmixer/projects/mgs1-undub/build-src/usa-d1/MGS/SLUS_861.11"
 16 | execFilename = "/home/solidmixer/projects/mgs1-undub/build-src/jpn-d1/MGS/SLPM_861.11"
 17 | execData = open(execFilename, 'rb').read()
 18 | 
 19 | outputJsonFilename = 'build-proprietary/itemDesc-jpn.json'
 20 | newDescriptionjson = 'build-proprietary/itemDesc-inject.json'
 21 | 
 22 | newBinaryFilename = 'build/jpn-d1/MGS/SLPM_861.11'
 23 | 
 24 | # Load new data
 25 | injectItemData: dict = json.load(open(newDescriptionjson, 'r'))
 26 | 
 27 | def getOffsets(data: bytes) -> list [tuple [int, int]]:
 28 |     offset = 0
 29 |     offsets = []
 30 |     while True:
 31 |         if execData[offset: offset + 2] != bytes.fromhex("B014") and offset < len(execData):
 32 |             offset += 2
 33 |         elif offset >= 0x2500:
 34 |             break
 35 |         else:
 36 |             endbyte = bytes.find(execData[offset:], b'\x00')
 37 |             endbyte = endbyte + (4 - (endbyte % 4))
 38 |             print(f'{struct.pack(">I", offset).hex()}: {endbyte}')
 39 |             offsets.append((offset, endbyte, execData[offset: offset + endbyte]))
 40 |             offset += endbyte
 41 |     
 42 |     return offsets
 43 | 
 44 | # Injection logic
 45 | 
 46 | if __name__ == "__main__":
 47 |     # Turn it into a list
 48 |     offsets = []
 49 |     for key in injectItemData.keys():
 50 |         print(key)
 51 |         offsets.append(int(key))
 52 |     newBinData = execData[:offsets[0]]
 53 |     # iterate through Keys... 
 54 |     for i in range(len(offsets) - 1):
 55 |         length, data = injectItemData.get(str(offsets[i]))
 56 |         injectDesc = RD.encodeJapaneseHex(data)
 57 |         if len(injectDesc[0]) > length:
 58 |             print(f'ERROR! Offset {offsets[i]} is too long! Revise... Length = {length}, currently {len(injectDesc[0])}\n{data}')
 59 |             exit(2)
 60 |         else:
 61 |             newBinData += injectDesc[0]
 62 |             newBinData += bytes(1) * (length - len(injectDesc[0]))
 63 |         if len(newBinData) == offsets[i + 1]:
 64 |             continue
 65 |         else:
 66 |             newBinData += execData[offsets[i] + length: offsets[i + 1]]
 67 |     
 68 |     # Resolve final offset:
 69 |     length, data = injectItemData.get(str(offsets[-1]))
 70 |     injectDesc = RD.encodeJapaneseHex(data)
 71 |     if len(injectDesc[0]) > length:
 72 |         print(f'ERROR! Offset {offsets[-1]} is too long! Revise...\n{data}')
 73 |         exit(2)
 74 |     else:
 75 |         newBinData += injectDesc[0]
 76 |         newBinData += bytes(1) * (length - len(injectDesc[0]))
 77 | 
 78 |     # Finish the file
 79 |     newBinData += execData[offsets[-1] + length: ]
 80 | 
 81 |     if len(newBinData) == len(execData):
 82 |         print(f'Success!! Files have same length! Outputting new binary....')
 83 |     else:
 84 |         print(f'ERROR! New binary is a different length. Please check!')
 85 |     
 86 |     with open(newBinaryFilename, 'wb') as f:
 87 |         f.write(newBinData)
 88 |         f.close
 89 |     # End!
 90 |     exit(0)
 91 | 
 92 | 
 93 | # Extractor logic
 94 | 
 95 | """
 96 | if __name__ == "__main__":
 97 |     offset = 0
 98 |     offsets = getOffsets(execData[0:0x2500])
 99 | 
100 |     descriptions = {}
101 |     for item in offsets:
102 |         descriptions.update({item[0]: [item[1], RD.translateJapaneseHex(item[2]).strip('\x00')]})
103 |     
104 |     with open(outputJsonFilename, 'w') as f:
105 |         json.dump(descriptions, f, ensure_ascii=False)"""
106 |     
107 | 


--------------------------------------------------------------------------------
/jsonTools.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is a collection of methods to modify json files. 
  3 | 
  4 | Some english calls match the japanese ones enough to zip the english lines in with the japanese offsets.
  5 | This will also help do other json modifications as needed.
  6 | 
  7 | """
  8 | 
  9 | import os, sys, struct
 10 | import argparse
 11 | import json
 12 | import xml.etree.ElementTree as ET
 13 | from xml.dom.minidom import parseString
 14 | 
 15 | # import xmlModifierTools as xmlin
 16 | 
 17 | # flags
 18 | debug = True
 19 | 
 20 | # This should be the format moving forward
 21 | newjson = {
 22 |     "calls": {},
 23 |     "saves": {},
 24 |     "freqAdd": {},
 25 |     "prompts": {}
 26 | }
 27 | 
 28 | codecNames = {
 29 |     "メリル" : "MERYL",
 30 |     "キャンベル" : "CAMPBELL",
 31 |     "メイ・リン" : "MEI LING",
 32 |     "オタコン" : "OTACON",
 33 |     "マスター" : "MASTER",
 34 |     "ナスターシャ" : "NASTASHA",
 35 |     "ディープスロート" : "DEEPTHROAT",
 36 |     "STAFF" : "STAFF",
 37 | }
 38 | 
 39 | matchingCalls = {
 40 |     "0": "0",
 41 |     "505": "910", # Meryl call
 42 |     "26537": "42370",
 43 |     "38411": "59333",
 44 |     "41131": "63533",
 45 |     "94514": "105879",
 46 |     "179885": "177459",
 47 |     "293536": "283744", # 140.85
 48 | }
 49 | 
 50 | def replaceJsonText(callOffsetA: str, callOffsetB: str):
 51 |     """
 52 |     Replaces the subtitles in jsonB with the subtitles from jsonA while keeping the offsets the same. 
 53 |     Each Call Offset is the (original) call offset as seen in the key of the json format.
 54 |     """
 55 |     global jsonContentA
 56 |     global jsonContentB
 57 |     global newjson
 58 |     
 59 |     newCallSubs = dict(zip(jsonContentB["calls"][callOffsetB].keys(), jsonContentA["calls"][callOffsetA].values()))
 60 |     jsonContentB[callOffsetB] = newCallSubs
 61 |     newjson["calls"].update({"0": newCallSubs}) 
 62 | 
 63 | def writeJsonToFile(outputFilename: str):
 64 |     """
 65 |     Writes the new json file to output
 66 |     """
 67 |     global newjson
 68 | 
 69 |     newCall = open(outputFilename, 'w')
 70 |     newCall.write(json.dumps(newjson))
 71 |     newCall.close
 72 | 
 73 | # test 
 74 | 
 75 | """
 76 | if __name__ == '__main__':
 77 | """
 78 | # This one is for the whole json with all call information
 79 | """
 80 |     parser = argparse.ArgumentParser(description=f'Zip subtitles json offsets from another. \nUsage: main.py subs.json:callOffsetA offsets.json:callOffsetB [outputFilename.json]')
 81 | 
 82 |     # REQUIRED
 83 |     parser.add_argument('subsJson', type=str, help="json including the subtitles we want to zip, ex: filename.json:12345")
 84 |     parser.add_argument('offsetsJson', type=str, help="json including the offsets we want to zip, ex: filename.json:12345")
 85 |     # Optionals
 86 |     parser.add_argument('output', nargs="?", type=str, help="Output Filename (.json)")
 87 | 
 88 |     args = parser.parse_args()
 89 |     
 90 |     subsInFilename = args.subsJson.split(':')[0]
 91 |     offsetsInFilename = args.offsetsJson.split(':')[0]
 92 | 
 93 |     subsCall = args.subsJson.split(':')[1]
 94 |     offsetsCall = args.offsetsJson.split(':')[1]
 95 | 
 96 |     jsonContentA = json.load(open(subsInFilename, 'r'))
 97 |     jsonContentB = json.load(open(offsetsInFilename, 'r'))
 98 | 
 99 |     matchingCalls.update({subsCall: offsetsCall})
100 |     
101 |     for key in matchingCalls:
102 |         # If we need to do only one, you can do {"0": "0"}
103 |         replaceJsonText(key, matchingCalls.get(key))
104 |     
105 |     outputFilename = "recompiledCallBins/modifiedCall.json"
106 |     writeJsonToFile(outputFilename)
107 | """
108 | 
109 | jsonA = open("recompiledCallBins/RADIO-usa-d1-Iseeva.json", 'r')
110 | jsonB = open("recompiledCallBins/RADIO-jpn-d1-Iseeva.json", 'r')
111 | 
112 | outputFilename = 'recompiledCallBins/modifiedCalls.json'
113 | 
114 | inputJson = json.load(jsonA)
115 | modJson = json.load(jsonB)
116 | 
117 | # Def put calls together
118 | for call in inputJson['calls'].keys():
119 |     newSubs: dict = inputJson['calls'][call]
120 |     destCall = matchingCalls.get(call)
121 |     if destCall is None:
122 |         continue
123 |     newOffsets: dict = modJson['calls'][destCall]
124 |     newCall = dict(zip(newOffsets.keys(), newSubs.values()))
125 |     newjson['calls'][destCall] = newCall
126 | 
127 | # Save file names (Dock, heliport, etc)
128 | # is coming out as unicode for some reason...
129 | newSaves: dict = next(iter(inputJson['saves'].values()))
130 | for save in modJson['saves'].keys():
131 |     newjson['saves'][save] = newSaves
132 | 
133 | # Save options (SAVE / DO NOT SAVE)
134 | options: dict = next(iter(inputJson['prompts'].values()))
135 | for opt in modJson['prompts'].keys():
136 |     newjson['prompts'][opt] = options
137 | 
138 | # Codec frequency names
139 | for name in modJson['freqAdd'].keys():
140 |     newName = codecNames.get(modJson['freqAdd'].get(name))
141 |     newjson['freqAdd'].update({name: newName})
142 | 
143 | """matches = zip(inputJson['calls'].keys(), modJson['calls'].keys())
144 | for item in matches:
145 |     print(item)
146 |    
147 | zippedOffsets = {
148 |     {'0': '0'},
149 |     {'505': '910'},
150 |     {'671': '1143'},
151 |     {'3326': '5833'},
152 |     {'11590': '20491'},
153 |     {'26537': '42370'},
154 |     {'26940': '43010'},
155 |     {'35756': '58410'},
156 |     {'38411': '59333'},
157 |     {'41131': '63533'},
158 |     {'43872': '67711'},
159 |     {'69134': '100554'},
160 |     {'69320': '101971'},
161 |     {'94514': '105879'},
162 |     {'97082': '109999'},
163 |     {'122241': '113848'},
164 |     {'126772': '118092'},
165 |     {'129537': '122322'},
166 |     {'179885': '177459'},
167 |     {'182648': '181774'},
168 |     {'229267': '261556'},
169 |     {'271289': '272965'},
170 |     {'282526': '278558'},
171 |     {'287169': '283744'},
172 |     {'293536': '285449'},
173 |     {'294379': '288512'},
174 |     {'295704': '291664'},
175 |     {'298506': '295376'},
176 |     {'301847': '297431'},
177 |     {'302703': '298504'},
178 |     {'303179': '300837'},
179 |     {'304549': '303506'},
180 |     {'305678': '308961'},
181 |     {'308325': '309595'},
182 |     {'308699': '311129'},
183 |     {'309558': '315529'},
184 |     {'311736': '333438'},
185 |     {'321920': '345760'},
186 |     {'328919': '346636'},
187 |     {'329312': '353110'},
188 |     {'332769': '354836'},
189 |     {'333817': '357134'},
190 |     {'335020': '360648'},
191 |     {'337120': '364457'},
192 |     {'338913': '370275'},
193 |     {'342170': '373157'},
194 |     {'343739': '373922'},
195 |     {'344231': '382452'},
196 |     {'348314': '384737'},
197 |     {'349360': '387533'},
198 |     {'350796': '392774'},
199 |     {'353441': '404827'},
200 |     {'360036': '411172'},
201 |     {'363076': '420535'},
202 |     {'367351': '424648'},
203 |     {'369290': '435981'},
204 |     {'376677': '437859'},
205 |     {'377402': '438879'},
206 |     {'378647': '440301'},
207 |     {'379873': '442271'},
208 |     {'380406': '442769'},
209 |     {'381165': '443783'},
210 |     {'382244': '444479'},
211 |     {'382543': '444665'},
212 |     {'383629': '500848'},
213 |     {'384599': '501542'},
214 |     {'384716': '502253'},
215 |     {'432135': '503414'},
216 |     {'432611': '507374'},
217 |     {'433194': '511743'},
218 |     {'433843': '516278'},
219 |     {'436675': '521476'},
220 |     {'439333': '522080'},
221 |     {'442577': '523100'},
222 |     {'446412': '523558'},
223 |     {'446777': '528381'},
224 |     {'447310': '537194'},
225 |     {'448486': '541583'},
226 |     {'451001': '542961'},
227 |     {'456523': '553357'},
228 |     {'458986': '578094'},
229 |     {'459677': '613767'},
230 |     {'466198': '655215'},
231 |     {'489546': '705182'},
232 |     {'516272': '746540'},
233 |     {'549024': '759232'},
234 |     {'591025': '764099'},
235 |     {'623761': '835965'},
236 |     {'631792': '838702'},
237 |     {'634999': '842905'},
238 |     {'699738': '846865'},
239 |     {'701246': '850748'},
240 |     {'703536': '853313'},
241 |     {'705785': '857004'},
242 |     {'707757': '862252'},
243 |     {'709148': '864340'},
244 |     {'711264': '864934'},
245 |     {'713923': '865585'},
246 |     {'714032': '871462'},
247 |     {'714385': '871740'},
248 |     {'714712': '872031'},
249 |     {'717673': '872900'},
250 |     {'717843': '873258'},
251 |     {'718013': '873491'},
252 |     {'718622': '874414'},
253 |     {'718803': '874749'},
254 |     {'718943': '875056'},
255 |     {'719453': '876158'},
256 |     {'719679': '876792'},
257 |     {'719832': '878450'},
258 |     {'720492': '878723'},
259 |     {'720677': '878944'},
260 |     {'721483': '880788'},
261 |     {'721707': '881135'},
262 |     {'721832': '882585'},
263 |     {'722648': '885749'},
264 |     {'722890': '886218'},
265 |     {'723789': '886375'},
266 |     {'725700': '886608'},
267 |     {'726078': '886801'},
268 |     {'726179': '887492'},
269 |     {'726304': '973398'},
270 |     {'726461': '975232'},
271 |     {'726815': '976020'},
272 |     {'770767': '978487'},
273 |     {'807721': '979250'},
274 |     {'808970': '980047'},
275 |     {'809469': '980510'},
276 |     {'811155': '1002600'},
277 |     {'811467': '1004940'},
278 |     {'811916': '1005403'},
279 |     {'812065': '1010981'},
280 |     {'831617': '1012874'},
281 |     {'832981': '1013171'},
282 |     {'833159': '1049352'},
283 |     {'836088': '1085533'},
284 |     {'836918': '1121714'},
285 |     {'837068': '1157895'},
286 |     {'867121': '1194076'},
287 |     {'897174': '1230257'},
288 |     {'927227': '1266438'},
289 |     {'957280': '1302619'},
290 |     {'987333': '1303528'},
291 |     {'1017386': '1306954'},
292 |     {'1047439': '1307107'},
293 |     {'1077492': '1308686'},
294 |     {'1077803': '1309090'},
295 |     {'1079275': '1310166'},
296 |     {'1079502': '1310397'},
297 |     {'1080525': '1310526'},
298 |     {'1080808': '1311009'},
299 |     {'1081462': '1311570'},
300 |     {'1081608': '1312516'},
301 |     {'1081700': '1312937'},
302 |     {'1081866': '1313084'},
303 |     {'1082197': '1315869'},
304 |     {'1082768': '1316466'},
305 |     {'1083011': '1322167'},
306 |     {'1083177': '1322481'},
307 |     {'1084424': '1325742'},
308 |     {'1084770': '1328330'},
309 |     {'1087728': '1329057'},
310 |     {'1087971': '1356954'},
311 |     {'1089994': '1357344'},
312 |     {'1091343': '1367614'},
313 |     {'1091760': '1371192'},
314 |     {'1111668': '1373669'},
315 |     {'1111875': '1374442'},
316 |     {'1119995': '1375262'},
317 |     {'1121944': '1375619'},
318 |     {'1123640': '1382995'},
319 |     {'1123962': '1383704'},
320 |     {'1124391': '1384423'},
321 |     {'1124548': '1389728'},
322 |     {'1128535': '1392298'},
323 |     {'1128893': '1392479'},
324 |     {'1129204': '1393925'},
325 |     {'1129641': '1423837'},
326 |     {'1130932': '1470339'},
327 |     {'1131055': '1519040'},
328 |     {'1131722': '1572335'},
329 |     {'1152131': '1629598'},
330 |     {'1191276': '1692839'},
331 |     {'1234214': '1758627'},
332 |     {'1283463': '1821547'},
333 |     {'1323905': '1879728'},
334 |     {'1371648': '1911349'},
335 |     {'1416778': '1942970'},
336 |     {'1467474': '1974591'},
337 |     {'1510221': '2006212'},
338 |     {'1532572': '2037833'},
339 |     {'1554923': '2069454'},
340 |     {'1577274': '2101075'},
341 |     {'1599625': '2132696'},
342 |     {'1621976': '2136524'},
343 |     {'1644327': '2141301'},
344 |     {'1666678': '2145475'},
345 |     {'1689029': '2150117'},
346 |     {'1690966': '2179852'},
347 |     {'1693308': '2191515'},
348 |     {'1695364': '2195443'},
349 |     {'1697474': '2200316'},
350 |     {'1717658': '2204609'},
351 |     {'1725045': '2209223'},
352 |     {'1727078': '2213285'},
353 |     {'1729279': '2216663'},
354 |     {'1731618': '2221409'},
355 |     {'1734005': '2225332'},
356 |     {'1735898': '2276631'},
357 |     {'1737574': '2277523'},
358 |     {'1739868': '2278723'},
359 |     {'1741893': '2280299'},
360 |     {'1773680': '2280829'}, 
361 | } 
362 | """
363 | 
364 | writeJsonToFile(outputFilename)


--------------------------------------------------------------------------------
/quickTranslate.py:
--------------------------------------------------------------------------------
1 | import translation.radioDict as RD
2 | 
3 | text = ""
4 | textToPrint = RD.translateJapaneseHex(bytes.fromhex(text), callDict = {} )
5 | print(textToPrint)
6 | 


--------------------------------------------------------------------------------
/radioModule.py:
--------------------------------------------------------------------------------
 1 | import RadioDatTools as RDT
 2 | import xml.etree.ElementTree as ET
 3 | import os, sys, json
 4 | 
 5 | class radioDataEditor():
 6 |     radioXMLData: ET.Element
 7 |     calls: list[ET.Element]
 8 |     workingCall: ET.Element
 9 |     workingVox: ET.Element
10 | 
11 | 
12 |     def __init__(self) -> None:
13 |         """
14 |         Initialize the class. Load the radio data as persistent for reading/editing.
15 |         """
16 |         self.radioXMLData = None
17 |         self.calls = []
18 |         self.workingCall = None
19 |         self.workingVox = None
20 |         pass
21 | 
22 |     def loadRadioXmlFile(self, filename: str) -> None:
23 |         try:
24 |             self.radioXMLData = ET.parse(filename).getroot()
25 |             self.calls = self.radioXMLData.findall("Call")
26 |         except FileNotFoundError:
27 |             print(f"Error: File not found: {filename}")
28 |             self.radioXMLData = None
29 |         except ET.ParseError:
30 |             print(f"Error: Could not parse XML File {filename}. Ensure we've loaded an XML file created from RadioDatTools.")
31 |             self.radioXMLData = None
32 |         # Done
33 |         return
34 |     
35 |     def setWorkingCall(self, offset: str):
36 |         for call in self.calls:
37 |             if call.get("offset") == offset:
38 |                 self.workingCall = call
39 |                 print(f'RDE: Working call was set to offset {offset}')
40 |                 break
41 |         pass
42 | 
43 |     def setWorkingVox(self, offset: str):
44 |         # self.workingVox = self.workingCall.find(f".//VOX_CUES[@offset='{offset}']")
45 |         voxes = self.workingCall.findall(f".//VOX_CUES")
46 |         for vox in voxes:
47 |             if vox.get("offset") == offset:
48 |                 self.workingVox = vox
49 |                 print(f'VOX {offset} identified and selected')
50 |                 break
51 |         pass
52 |     
53 |     def getCallOffsets(self) -> list[str]:
54 |         """
55 |         Returns call offsets found
56 |         """
57 |         callOffsets = []
58 |         for callElem in self.calls:
59 |             callOffsets.append(callElem.get("offset"))
60 |         return callOffsets
61 |     
62 |     def getCall(self, offset: int) -> ET.Element:
63 |         """
64 |         Returns the call element for a given offset.
65 |         If offset does not exist, throw error!
66 |         """
67 |         try:
68 |             call = self.radioXMLData.find(f".//Call[@offset='{offset}']")
69 |         except Exception as e:
70 |             print(f'Error: {e}')
71 |         return call
72 | 
73 |     def getVoxOffsets(self) -> list[str]:
74 |         """
75 |         Returns a list of Vox elements in the call. 
76 |         """
77 |         try:
78 |             voxList = []
79 |             audios = self.workingCall.findall(f".//VOX_CUES")
80 |             for vox in audios:
81 |                 voxList.append(vox.get("offset"))
82 |         except Exception as e:
83 |             print(f'Error: {e}')
84 |         return voxList
85 | 
86 |     def getSubs(self) -> list[str]:
87 |         """
88 |         Returns a list of Subtitles elements in the VOX element. 
89 |         """
90 |         dialogue = []
91 |         for sub in self.workingVox.findall("SUBTITLE"):
92 |             dialogue.append(sub.get("text"))
93 |         return dialogue
94 | 
95 |     def replaceVox(newVoxElem: ET.Element) -> None:
96 |         """
97 |         Replaces the modified element into the element tree (Radio Data)
98 |         """
99 |         pass


--------------------------------------------------------------------------------
/radioTools/__init__.py:
--------------------------------------------------------------------------------
1 | # radioTools/__init__.py
2 | PACKAGE_NAME = "Radio Tools"
3 | VERSION = "1.0"


--------------------------------------------------------------------------------
/radioTools/callExtactor.py:
--------------------------------------------------------------------------------
 1 | #!/bin/python
 2 | """
 3 | This script is to extact a specific radio call to another file. 
 4 | I find it's easier to study the raw hex on a subset of it, rather than the whole file.
 5 | Have offsets ready or refer to the bin file. Modify the filename for your Radio.dat file.
 6 | """
 7 | 
 8 | import os, struct, re, argparse
 9 | 
10 | filename = "RADIO-jpn.DAT"
11 | """
12 | def __init__(self, radioFilename: str) -> None:
13 |     os.makedirs("Extracted-Calls", 755, exist_ok=True)
14 |     if not os.path.exists(filename):
15 |         print(f'File {radioFilename} does not exist! Check path and try again.\n')
16 |     else:
17 |         radioFile = open(radioFilename, 'rb')
18 |         radioData = radioFilename.read()
19 |         print(f'Exporter Ready!')
20 | 
21 | """
22 | 
23 | 
24 | 
25 | 
26 | def main(filename, offset, length):
27 |     
28 |     print("Please provide offsets for the call in decimal forrmat (not hex)!")
29 | 
30 |     # Get in/out from user
31 |     startOffset = offset
32 |     endOffset = offset + length
33 |     outputFile = str(offset) + '.bin'
34 | 
35 |     radioFile = open(filename, 'rb')
36 |     output = open(outputFile, 'wb')
37 | 
38 |     # fileSize = len(radioFile)
39 | 
40 |     radioFile.seek(startOffset)
41 |     output.write(radioFile.read(endOffset - startOffset))
42 | 
43 |     output.close()
44 |     return
45 | 
46 | 
47 | def splitCall(offset: int, length: int) -> None:
48 |     global radioData
49 |     splitCall = radioData[offset:offset+length]
50 |     filename = str(offset) + '.bin'
51 |     f = open(filename, 'wb')
52 |     f.write(splitCall)
53 |     f.close()
54 |     return 0
55 | 
56 | 
57 | if __name__ == '__main__':
58 |     """parser = argparse.ArgumentParser(description=f'Parse a binary file for Codec call GCL. Ex. script.py <filename> <output.txt>')
59 | 
60 |     # REQUIRED
61 |     parser.add_argument('offset', type=int, help="Offset of the start of the")
62 |     parser.add_argument('output', type=str, help="Output Filename (.txt)")
63 |     """
64 |     main("", int(), int())
65 |     # main("radioDatFiles/RADIO-usa-d1.DAT", int(26213), int(324))


--------------------------------------------------------------------------------
/radioTools/callInsertor.py:
--------------------------------------------------------------------------------
 1 | #!/bin/python
 2 | """
 3 | This script is to extact a specific radio call to another file. 
 4 | I find it's easier to study the raw hex on a subset of it, rather than the whole file.
 5 | Have offsets ready or refer to the bin file. Modify the filename for your Radio.dat file.
 6 | """
 7 | 
 8 | import os, struct, re, argparse
 9 | 
10 | filename = "14085-testing/RADIO-jpn-d1.DAT"
11 | updatedCall = "283744-new-mod.bin"
12 | offset = 283744
13 | 
14 | radioFile = open(filename, 'rb')
15 | radioData = radioFile.read()
16 | 
17 | newCallFile = open(updatedCall, 'rb')
18 | newCallData = newCallFile.read()
19 | 
20 | newFileData = radioData[0: offset]
21 | newFileData += newCallData
22 | newFileData += radioData[ len(newFileData) : len(radioData) ]
23 | 
24 | if len(newFileData) == len(radioData):
25 |     print(f'Success! Files are the same length')
26 |     newFile = open('14085-testing/RADIO.DAT-modified', 'wb')
27 |     newFile.write(newFileData)
28 |     newFile.close
29 | else:  
30 |     print(f'File lengths differ! New: {len(newFileData)}, old: {len(radioData)}')
31 | 
32 | 
33 | def splitCall(offset: int, length: int) -> None:
34 |     global radioData
35 |     splitCall = radioData[offset:offset+length]
36 |     filename = str(offset) + '.bin'
37 |     f = open(filename, 'wb')
38 |     f.write(splitCall)
39 |     f.close()
40 |     return 0
41 | 
42 | 


--------------------------------------------------------------------------------
/radioTools/jsonToCSV.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | import json
 3 | 
 4 | jsonData = json.load(open('recompiledCallBins/RADIO-jpn-d1-Iseeva.json', 'r'))
 5 | 
 6 | with open('callDialogue.csv', 'w') as f:
 7 |     f.write(f'offset, dialogue\n')
 8 |     for key in jsonData["calls"].keys():
 9 |         callDict: dict = jsonData["calls"][key]
10 |         for key in callDict:
11 |             f.write(f'{key},{callDict.get(key)}\n') 
12 |     
13 |     f.close()


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ProgressBar
2 | Pillow 
3 | ffmpeg-python
4 | 


--------------------------------------------------------------------------------
/testing/compareDemos.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script runs the python script recursively, first to export all calls, then translate individual calls
 4 | 
 5 | input_dir='demoWorkingDir/usa/bins'
 6 | output_dir='demoWorkingDir/usa/newBins'
 7 | 
 8 | same_count=0
 9 | different_count=0
10 | 
11 | for original in "$input_dir"/*; do
12 |     base_filename=$(basename "$original" .bin) 
13 |     if diff "$original" "$output_dir/$base_filename.bin" >/dev/null; then
14 |         # echo "Files are the same: $original"
15 |         ((same_count++))
16 |     else
17 |         echo "Files are different: $original"
18 |         ((different_count++))
19 |     fi
20 | done
21 | 
22 | echo "Total files that are the same: $same_count"
23 | echo "Total files that are different: $different_count"
24 | 


--------------------------------------------------------------------------------
/testing/convertImage.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Directory containing the TGA files
 4 | input_dir="graphicsExport"
 5 | output_dir="graphicsExport/output"
 6 | 
 7 | # Create output directory if it doesn't exist
 8 | mkdir -p "$output_dir"
 9 | 
10 | # Loop through all TGA files in the input directory
11 | for input_tga in "$input_dir"/*.tga; do
12 |   # Extract the base filename without extension
13 |   base_filename=$(basename "$input_tga" .tga)
14 |   
15 |   # Set the output PNG and text file paths
16 |   output_png="$output_dir/$base_filename.png"
17 |   output_txt="$output_dir/$base_filename"
18 |   
19 |   # Convert the TGA file to PNG using ImageMagick
20 |   # convert "$input_tga" -resize 300% -colorspace Gray -contrast-stretch 0 "$output_png"
21 |   convert "$input_tga" "$output_png"
22 |   # Perform OCR using Tesseract with Japanese language data and additional options
23 |   # tesseract "$output_png" "$output_dir/$base_filename" -l jpn --psm 6
24 |   # tesseract "$output_png" $output_txt -l jpn --psm 6
25 |   # echo "Printed image $output_txt\n"
26 | done
27 | 


--------------------------------------------------------------------------------
/testing/demoBinChecker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | oldBinDir="demoWorkingDir/usa/bins/"
 4 | newBinDir="demoWorkingDir/usa/newBins/"
 5 | 
 6 | for file in "$oldBinDir"/*; do
 7 |     BASENAME=$(basename $file)
 8 |     diff "$oldBinDir$BASENAME" "$newBinDir$BASENAME" 
 9 | done
10 | 
11 | 


--------------------------------------------------------------------------------
/testing/exportAndAnalyze.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script runs the python script recursively, first to export all calls, then translate individual calls
 4 | 
 5 | SCRIPT="./myScripts/RadioDatTools.py"
 6 | RADIODAT="radioDatFiles/RADIO-usa-d1.DAT"
 7 | input_dir='extractedCallBins/usa-d1'
 8 | 
 9 | python3 $SCRIPT $RADIODAT Headers -sH
10 | 
11 | for input in "$input_dir"/*.bin; do
12 |     base_filename=$(basename "$input" .bin)
13 |     output="$input_dir/$base_filename-decrypted"
14 | 
15 |     python3 $SCRIPT $input $output -xz
16 | 
17 | done


--------------------------------------------------------------------------------
/testing/extractALLmaterials.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This is the extraction area:
 4 | # /bin/python3 /home/solidmixer/projects/mgs1-undub/myScripts/RadioDatTools.py -jzx build-src/jpn-d1/MGS/RADIO.DAT radioWorkingDir/jpn-d1/RADIO-d1
 5 | 
 6 | # VOX editing here!
 7 | 
 8 | # This area compiles the new DEMO.DAT and adds it to the disk image (D1)
 9 | 
10 | # # Extracting and automating translation (disk 1)
11 | # /bin/python3 /home/solidmixer/projects/mgs1-undub/myScripts/RadioDatTools.py -jzx build-src/jpn-d1/MGS/RADIO.DAT radioWorkingDir/jpn-d1/RADIO 
12 | 
13 | # This area re-compiles a RADIO file for jpn
14 | 
15 | # Move all files into the build folder.
16 | 
17 | mkpsxiso build/jpn-d1/rebuild.xml -o mgsJpnMod-d1.bin -c mgsJpnMod-d1.cue -y
18 | # mkpsxiso build/jpn-d2/rebuild.xml -o mgsJpnMod-d2.bin -c mgsJpnMod-d2.cue -y
19 | flatpak run org.duckstation.DuckStation mgsJpnMod-d1.cue
20 | 
21 | 


--------------------------------------------------------------------------------
/testing/extractAllCalls.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script runs the python script recursively, first to export all calls, then translate individual calls
 4 | 
 5 | SPLITSCRIPT="myScripts/RadioDatTools.py"
 6 | RECOMPILESCRIPT="myScripts/RadioDatRecompiler.py"
 7 | RADIODAT="radioDatFiles/$1"
 8 | input_dir="extractedCallBins/$2"
 9 | # output_dir="recompiledCallBins/$2"
10 | 
11 | rm $input_dir/*
12 | # rm $output_dir/*
13 | 
14 | same_count=0
15 | different_count=0
16 | 
17 | python3 $SPLITSCRIPT $RADIODAT Headers -s
18 | 
19 | for input in "$input_dir"/*.bin; do
20 |     base_filename=$(basename "$input" .bin)
21 |     # echo $base_filename
22 |     output="$input_dir/$base_filename"
23 |     python3 $SPLITSCRIPT $input $output -xz
24 | done
25 | 
26 | echo "Total files that are the same: $same_count"
27 | echo "Total files that are different: $different_count"
28 | 
29 | # rm $output_dir/*.log
30 | 


--------------------------------------------------------------------------------
/testing/findEndings.py:
--------------------------------------------------------------------------------
 1 | import os, struct, re
 2 | import radioDict # May remove later
 3 | import argparse
 4 | 
 5 | offset = 0
 6 | 
 7 | radioFile = open("radioDatFiles/RADIO-usa-d1.DAT", 'rb')
 8 | radioData = radioFile.read()
 9 | 
10 | def getLength(offset: int):
11 |     length = struct.unpack('>H', radioData[offset + 2:offset + 4])[0] + 2
12 |     return length
13 | 
14 | while offset < len(radioData):
15 |     if radioData[offset:offset+2] == bytes.fromhex("ff10") or radioData[offset:offset+2] == bytes.fromhex("ff11"):
16 |         length = getLength(offset)
17 |         print(f'{offset}, {length}, {offset + length}, \'{radioData[offset + length - 6: offset + length].hex()}\', \'{radioData[offset + length: offset + length + 4].hex()}\'')
18 |     offset += 1


--------------------------------------------------------------------------------
/testing/goblin.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | setlocal
 3 | 
 4 | REM This script runs the python script recursively, first to export all calls, then translate individual calls
 5 | 
 6 | set SCRIPT=myScripts\RadioDatToolsXMLoutput.py
 7 | set RADIODAT=%1
 8 | set input_dir=extractedCallBins
 9 | 
10 | python %SCRIPT% %RADIODAT% Headers.txt -sH
11 | 
12 | for %%f in (%input_dir%\*.bin) do (
13 |     set base_filename=%%~nf
14 |     set output=%input_dir%\%base_filename%-decrypted.txt
15 | 
16 |     python myScripts\RadioDatTools.py %%f %output% -zx
17 | )
18 | 
19 | endlocal


--------------------------------------------------------------------------------
/testing/incorrectRecompileCheck.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Compares two binary files and reports back the offset where the file breaks
 3 | """
 4 | 
 5 | import os, struct
 6 | # import translation.radioDict as radioDict 
 7 | import argparse
 8 | 
 9 | # Start by parsing old and new files
10 | 
11 | parser = argparse.ArgumentParser("Compare two binary files and figure out where they differ")
12 | 
13 | parser.add_argument('input', type=str, help="Input Filename from script (.bin).")
14 | parser.add_argument('output', type=str, help="Output Filename from script (.bin).")
15 | parser.add_argument('-a','--allDiffs', action='store_true', help="Prints all errors (as opposed to breaking at the first one)")
16 | 
17 | args = parser.parse_args()
18 | 
19 | ##########################################
20 | if args.output:
21 |     originalFile = open(args.input, 'rb')
22 |     originalData = originalFile.read()
23 | else:
24 |     originalFile = open(f'extractedCallBins/{args.input}.bin', 'rb')
25 |     originalData = originalFile.read()
26 | 
27 | if args.output:
28 |     compareFile = open(args.output, 'rb')
29 |     compareData = compareFile.read()
30 | else:
31 |     compareFile = open(f'recompiledCallBins/{args.input}-mod.bin', 'rb')
32 |     compareData = compareFile.read()
33 | 
34 | print(f'Original file: {len(originalData)} bytes. New file: {len(compareData)} bytes')
35 | 
36 | # Main comparison loop
37 | offset = 0
38 | 
39 | if len(originalData) > len(compareData):
40 |     size = len(originalData)
41 |     print("Original Data is larger!")
42 | elif len(compareData) > len(originalData):
43 |     size = len(compareData)
44 |     print("New Data is larger!")
45 | else:
46 |     print("The files are equal size!")
47 |     size = len(compareData)
48 | 
49 | while offset < size:
50 |     if originalData[offset] == compareData[offset]:
51 |         offset += 4
52 |     elif originalData[offset : offset + 2] == bytes.fromhex("9016") and compareData[offset : offset + 2] == bytes.fromhex("d016"):
53 |         print(f'Character mismatch! {originalData[offset : offset + 2]} {compareData[offset : offset + 2]} ')
54 |         offset += 2
55 |     else:
56 |         differ = True
57 |         print(f"Files break at offset {offset}")
58 |         offsetHex = struct.pack('>L', offset)
59 |         print(f'Offset in hex: 0x{offsetHex.hex()}')
60 |         print(f'Original: \n{originalData[offset - 10 : offset + 10].hex()}')
61 |         print(f'New Data: \n{compareData[offset - 10 : offset + 10].hex()}')
62 |         offset += 1
63 |         if not args.allDiffs:
64 |             break
65 |     print(f"Checking offset = {offset}\r")
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/testing/patternChecker.py:
--------------------------------------------------------------------------------
 1 | import os, struct
 2 | from datetime import datetime
 3 | import radioDict 
 4 | import argparse
 5 | import xml.etree.ElementTree as ET
 6 | 
 7 | radioFile = "radioDatFiles/RADIO-usa-d1.DAT"
 8 | radioData = open(radioFile, 'rb').read()
 9 | 
10 | size = len(radioData)
11 | patterns = {}
12 | 
13 | def getLength(offset: int) -> int: # Returns COMMAND length, offset must be at the 0xff bytes, length is bytes 1 and 2.
14 |     global radioData
15 |     
16 |     lengthBytes = radioData[offset + 2: offset + 4]
17 |     length = struct.unpack('>H', lengthBytes)[0]
18 |     return length + 2
19 | 
20 | def getLengthManually(offset: int) -> int:
21 |     length = 0
22 |     while True:
23 |         length += 1
24 |         if radioData[offset + length].to_bytes() == b'\xff' and radioData[offset + length - 3].to_bytes() == b'\x80':
25 |             return length
26 |         
27 | pattern = 'ff10'
28 | command = bytes.fromhex(pattern)
29 | 
30 | offset = 0
31 | 
32 | while offset < size:
33 |     if radioData[offset : offset + 2] == command:        
34 |         header = getLengthManually(offset) 
35 |         line = radioData[offset : offset + header]
36 | 
37 |         # print(f'Offset: {offset}, Header: {header}')
38 |         lengthA = getLength(offset)
39 |         lengthB = getLength(offset + header - 4)
40 |         lABytes = radioData[offset + 2: offset + 4].hex()
41 |         lBBytes = radioData[offset + header - 2: offset + header].hex()
42 |         """
43 |         print(lengthA)
44 |         print(lengthB)
45 |         print(lABytes)
46 |         print(lBBytes)
47 |         """
48 | 
49 |         if lengthA == lengthB + header - 3:
50 |             print(f'FF10 at offset {offset} length matched!')
51 |         else:
52 |             elseOffset = offset + header + lengthB - 4
53 |             if radioData[elseOffset: elseOffset + 2] in [bytes.fromhex("ff11"), bytes.fromhex("ff12")]:
54 |                 # print(f'0x{radioData[elseOffset: elseOffset + 2].hex()}')
55 |                 elseLength = getLengthManually(elseOffset)
56 |                 print(f"FF10 at offset {offset} has a subclause. Else statement matched!")
57 |             # else:
58 |                 # print(f'MO MATCH! 0x{radioData[elseOffset: elseOffset + 2].hex()}')
59 |                 # print(f"FF10 at offset {offset} has a subclause. Else statement WAS NOT MATCHED! \n\tBytes: {radioData[elseOffset : elseOffset + 5].hex()}")
60 |         
61 |     offset += 1
62 | 
63 | for line in patterns:
64 |     print(line)
65 |     
66 | #print(patterns)
67 | 
68 |         


--------------------------------------------------------------------------------
/testing/radioDatUSAChecker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # DEPRECATED! Need to update pathing. 
4 | python3 myScripts/RadioDatTools.py radioDatFiles/RADIO-usa-d1.DAT -zx
5 | python3 myScripts/RadioDatRecompiler.py RADIO-usa-d1-output.xml RADIO-usa-d1-recomp.DAT -x
6 | python3 myScripts/incorrectRecompileCheck.py radioDatFiles/RADIO-usa-d1.DAT RADIO-usa-d1-recomp.DAT


--------------------------------------------------------------------------------
/testing/runJpnBuildTest.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Rebuild japanese iso and launch in duckstation
  4 | # Argument parser by chatGPT
  5 | 
  6 | set -e # Exit if we hit a script error.
  7 | 
  8 | # Parse arguments
  9 | SKIP_EXTRACTION=false
 10 | SKIP_GRAPHICS=false
 11 | SKIP_VOX=false
 12 | SKIP_DEMO=false
 13 | SKIP_RADIO=false
 14 | 
 15 | 
 16 | while [[ "$#" -gt 0 ]]; do
 17 |     case $1 in
 18 |         --skip-extraction)
 19 |             SKIP_EXTRACTION=true
 20 |             shift
 21 |             ;;
 22 |         --skip-graphics)
 23 |             SKIP_GRAPHICS=true
 24 |             shift
 25 |             ;;
 26 |         --skip-vox)
 27 |             SKIP_VOX=true
 28 |             shift
 29 |             ;;
 30 |         --skip-demo)
 31 |             SKIP_DEMO=true
 32 |             shift
 33 |             ;;
 34 |         --skip-radio)
 35 |             SKIP_RADIO=true
 36 |             shift
 37 |             ;;
 38 |         --help)
 39 |             echo "Usage: $0 [--skip-extraction] [--skip-graphics] [--skip-vox] [--skip-demo]"
 40 |             exit 0
 41 |             ;;
 42 |         *)
 43 |             echo "Unknown option: $1"
 44 |             exit 1
 45 |             ;;
 46 |     esac
 47 | done
 48 | 
 49 | # Graphics Injection Step
 50 | if [ "$SKIP_GRAPHICS" = false ]; then
 51 |     echo "Injecting graphics data..."
 52 |     # Inject graphics data (STAGE.DIR) for disk 1 ONLY for now
 53 |     echo "Inject D1 with ninja..."
 54 |     wine goblin-tools/ninja.exe -i /home/solidmixer/projects/mgs1-undub/workingFiles/jpn-d1/stage/ -pack -o /home/solidmixer/projects/mgs1-undub/workingFiles/jpn-d1/stage/STAGE-j1.DIR -img  1>/dev/null
 55 |     # Disk 2 temp disable
 56 |     # echo "Inject D2 with ninja..."
 57 |     # wine goblin-tools/ninja.exe -i /home/solidmixer/projects/mgs1-undub/workingFiles/jpn-d2/stage/ -pack -o stageGraphicsWorking/out/STAGE-j2.DIR -img >/dev/null
 58 |     echo "New Stage.dir files created."
 59 | fi
 60 | sleep 2
 61 | 
 62 | # VOX Editing Step
 63 | if [ "$SKIP_VOX" = false ]; then
 64 |     echo "Processing VOX data..."
 65 |     python3 myScripts/voxTools/voxTextInjector.py
 66 |     python3 myScripts/voxTools/voxRejoiner.py
 67 | fi
 68 | sleep 2
 69 | 
 70 | # Demo Compilation Step
 71 | if [ "$SKIP_DEMO" = false ]; then
 72 |     echo "Compiling new DEMO.DAT..."
 73 |     python3 myScripts/DemoTools/demoTextInjector.py
 74 |     python3 myScripts/DemoTools/demoRejoiner.py
 75 | fi
 76 | sleep 2
 77 | 
 78 | # # Extracting and automating translation (disk 1)
 79 | # /bin/python3 /home/solidmixer/projects/mgs1-undub/myScripts/RadioDatTools.py -jzx build-src/jpn-d1/MGS/RADIO.DAT radioWorkingDir/jpn-d1/RADIO 
 80 | 
 81 | if [ "$SKIP_RADIO" = false ]; then
 82 |     # This area re-compiles a RADIO file for jpn
 83 |     # use Programatic replacement
 84 |     python3 build-proprietary/radio/dialogueSwap.py
 85 |     python3 myScripts/xmlModifierTools.py inject workingFiles/jpn-d1/radio/injected-Iseeva.json workingFiles/jpn-d1/radio/RADIO.xml 
 86 |     # python3 myScripts/RadioDatRecompiler.py -p radioWorkingDir/jpn-d1/RADIO-merged.xml radioWorkingDir/jpn-d1/new-RADIO.DAT -s build-src/jpn-d1/MGS/STAGE.DIR -S radioWorkingDir/jpn-d1/new-STAGE.DIR
 87 |     python3 myScripts/RadioDatRecompiler.py -p workingFiles/jpn-d1/radio/RADIO-merged.xml workingFiles/jpn-d1/radio/new-RADIO.DAT -s workingFiles/jpn-d1/stage/STAGE-j1.DIR -S workingFiles/jpn-d1/stage/new-STAGE.DIR 
 88 | fi
 89 | sleep 2
 90 | 
 91 | echo "Moving files into position"
 92 | # Move all files into the build folder.
 93 | # rm build/jpn-d1/MGS/RADIO.DAT
 94 | cp -v workingFiles/jpn-d1/radio/new-RADIO.DAT build/jpn-d1/MGS/RADIO.DAT 
 95 | # rm build/jpn-d1/MGS/STAGE.DIR
 96 | cp -v workingFiles/jpn-d1/stage/new-STAGE.DIR build/jpn-d1/MGS/STAGE.DIR
 97 | # rm build/jpn-d1/MGS/DEMO.DAT
 98 | cp -v workingFiles/jpn-d1/demo/new-DEMO.DAT build/jpn-d1/MGS/DEMO.DAT
 99 | # rm build/jpn-d1/MGS/VOX.DAT
100 | cp -v workingFiles/jpn-d1/vox/new-VOX.DAT build/jpn-d1/MGS/VOX.DAT
101 | # 
102 | 
103 | echo "READY TO BUILD ISO!"
104 | sleep 2
105 | 
106 | mkpsxiso build/jpn-d1/rebuild.xml -o mgsJpnMod-d1.bin -c mgsJpnMod-d1.cue -y
107 | # mkpsxiso build/jpn-d2/rebuild.xml -o mgsJpnMod-d2.bin -c mgsJpnMod-d2.cue -y
108 | if [ $(uname) = "Linux" ]; then 
109 |     flatpak run org.duckstation.DuckStation mgsJpnMod-d1.cue >/dev/null 2>&1 ;
110 | elif [ $(uname) = "Darwin" ]; then
111 |     /Applications/DuckStation.app/Contents/MacOS/DuckStation mgsJpnMod-d1.cue >/dev/null 2>&1 ;
112 | fi


--------------------------------------------------------------------------------
/testing/runusaBuildTest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Rebuild japanese iso and launch in duckstation
 4 | 
 5 | 
 6 | # Here is the section to rebuild demo.dat and add it to the files. 
 7 | # python3 myScripts/DemoTools/demoTextInjector.py
 8 | # cp -n demoWorkingDir/usa/bins/* demoWorkingDir/usa/newBins/
 9 | python3 myScripts/DemoTools/demoRejoiner.py
10 | cp demoWorkingDir/usa/new-DEMO.DAT build/usa-d1/MGS/DEMO.DAT
11 | 
12 | mkpsxiso build/usa-d1/rebuild.xml -o mgsUSAMod-d1.bin -c mgsUSAMod-d1.cue -y
13 | # mkpsxiso build/usa-d2/rebuild.xml -o mgsUSAMod-d2.bin -c mgsUSAMod-d2.cue -y
14 | flatpak run org.duckstation.DuckStation mgsUSAMod-d1.cue


--------------------------------------------------------------------------------
/testing/testAllRadioFiles.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script runs the python script recursively, first to export all calls, then translate individual calls
 4 | 
 5 | SPLITSCRIPT="./myScripts/RadioDatTools.py"
 6 | RECOMPILESCRIPT="./myScripts/RadioDatRecompiler.py"
 7 | input_dir='radioDatFiles'
 8 | output_dir='recompiledCallBins'
 9 | 
10 | same_count=0
11 | different_count=0
12 | 
13 | for input in "$input_dir"/*.DAT; do
14 |     base_filename=$(basename "$input" .DAT)
15 |     echo $base_filename
16 |     python3 $SPLITSCRIPT $input "$output_dir/$base_filename" -xz
17 | done
18 | 
19 | for original in "$output_dir"/*.xml; do
20 |     base_filename=$(basename "$original" .xml)
21 |     python3 $RECOMPILESCRIPT -D "$output_dir/$base_filename.xml" "$output_dir/$base_filename-mod.DAT" -x
22 |     if diff "$input_dir/$base_filename.DAT" "$output_dir/$base_filename-mod.DAT" >/dev/null; then
23 |         echo "Files are the same: $original"
24 |         ((same_count++))
25 |     else
26 |         echo "Files are different: $original"
27 |         ((different_count++))
28 |     fi
29 | done
30 | 
31 | echo "Total files that are the same: $same_count"
32 | echo "Total files that are different: $different_count"
33 | 
34 | rm recompiledCallBins/*.log


--------------------------------------------------------------------------------
/testing/testRecompileAll.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script runs the python script recursively, first to export all calls, then translate individual calls
 4 | 
 5 | SPLITSCRIPT="myScripts/RadioDatTools.py"
 6 | RECOMPILESCRIPT="myScripts/RadioDatRecompiler.py"
 7 | RADIODAT="radioDatFiles/RADIO-usa-d1.DAT"
 8 | input_dir='extractedCallBins'
 9 | output_dir='recompiledCallBins'
10 | 
11 | rm $input_dir/*
12 | rm $output_dir/*
13 | 
14 | same_count=0
15 | different_count=0
16 | 
17 | python3 $SPLITSCRIPT $RADIODAT Headers -s
18 | 
19 | for input in "$input_dir"/*.bin; do
20 |     base_filename=$(basename "$input" .bin)
21 |     # echo $base_filename
22 |     output="$output_dir/$base_filename"
23 |     python3 $SPLITSCRIPT $input $output -xz
24 | done
25 | 
26 | for original in "$input_dir"/*.bin; do
27 |     base_filename=$(basename "$original" .bin)
28 |     input="$base_filename-mod.bin"
29 |     python3 $RECOMPILESCRIPT "$output_dir/$base_filename.xml" "$output_dir/$base_filename-mod.bin"
30 |     if diff "$original" "$output_dir/$base_filename-mod.bin" >/dev/null; then
31 |         # echo "Files are the same: $original"
32 |         ((same_count++))
33 |     else
34 |         echo "Files are different: $original"
35 |         ((different_count++))
36 |     fi
37 | done
38 | 
39 | echo "Total files that are the same: $same_count"
40 | echo "Total files that are different: $different_count"
41 | 
42 | rm $output_dir/*.log
43 | 


--------------------------------------------------------------------------------
/translation/combine.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | a = open('unique graphics', 'r')
 4 | b = open('kanji.txt', 'r')
 5 | c = open('Output.txt', 'w')
 6 | 
 7 | for lineA, lineB in zip(a, b):
 8 |     c.write(f'\t"{lineA.strip()}": "{lineB.strip()}",\n')
 9 | 
10 | c.close()
11 | 


--------------------------------------------------------------------------------
/translation/graphicShower.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | def display_graphic(hex_string):
 5 |     """Generate and display a character image from the graphics hex with correct scaling (12x12 grid)."""
 6 |     file_data = bytes.fromhex(hex_string)
 7 | 
 8 |     # Convert binary data to bit string
 9 |     bit_string = ''.join(format(byte, '08b') for byte in file_data)
10 | 
11 |     # Fixed 12x12 grid
12 |     width, height = 12, 12  
13 | 
14 |     # Convert bit string to 2D pixel array
15 |     pixel_grid = np.zeros((height, width), dtype=np.uint8)
16 | 
17 |     for i in range(len(bit_string) // 2):
18 |         x, y = i % width, i // width
19 |         bits = bit_string[i * 2 : i * 2 + 2]
20 | 
21 |         # Match the original TGA color mapping
22 |         if bits == "00":
23 |             pixel_grid[y, x] = 0     # Black
24 |         elif bits == "01":
25 |             pixel_grid[y, x] = 85    # Dark gray
26 |         elif bits == "10":
27 |             pixel_grid[y, x] = 170   # Light gray
28 |         else:
29 |             pixel_grid[y, x] = 255   # White
30 | 
31 |     # Display image with proper scaling (400% zoom)
32 |     fig, ax = plt.subplots(figsize=(4, 4))  # 400% zoom
33 |     ax.imshow(pixel_grid, cmap="gray", interpolation="nearest")
34 |     ax.axis("off")
35 |     plt.show()
36 | 
37 | print(f'Character display! Will loop and display a graphic per the hex. ')
38 | 
39 | while True:
40 |     hexCharacters = input(f'\nPlease paste hex character string: ')
41 |     if len(hexCharacters) == 72:
42 |         display_graphic(hexCharacters)


--------------------------------------------------------------------------------
/translation/kanji.txt:
--------------------------------------------------------------------------------
   1 | 気
   2 | 仕
   3 | 掛
   4 | 肉
   5 | 壁
   6 | 何
   7 | 本
   8 | 出
   9 | 触
  10 | 屋
  11 | 闘
  12 | 吹
  13 | 命
  14 | 進
  15 | 戦
  16 | 車
  17 | 思
  18 | 倒
  19 | 事
  20 | 君
  21 | 罰
  22 | 練
  23 | 単
  24 | 独
  25 | 立
  26 | 勿
  27 | 論
  28 | 潜
  29 | 特
  30 | 殊
  31 | 部
  32 | 隊
  33 | 員
  34 | 協
  35 | 現
  36 | 実
  37 | 予
  38 | 想
  39 | 起
  40 | 場
  41 | 私
  42 | 頭
  43 | 棟
  44 | 博
  45 | 士
  46 | 先
  47 | 早
  48 | 来
  49 | 開
  50 | 助
  51 | 戻
  52 | 今
  53 | 奴
  54 | 言
  55 | 借
  56 | 下
  57 | 一
  58 | 階
  59 | 北
  60 | 束
  61 | 研
  62 | 究
  63 | 室
  64 | 捕
  65 | 込
  66 | 切
  67 | 抜
  68 | 所
  69 | 二
  70 | 格
  71 | ?
  72 | 羞
  73 | 飛
  74 | 細
  75 | 逃
  76 | ?
  77 | ?
  78 | 房
  79 | 佐
  80 | 姪
  81 | 誰
  82 | 伯
  83 | 父
  84 | 僻
  85 | 哀
  86 | 男
  87 | 英
  88 | ?
  89 | 武
  90 | 器
  91 | 時
  92 | 礼
  93 | 説
  94 | 教
  95 | 議
  96 | 合
  97 | ?
  98 | 縁
  99 | 名
 100 | 呼
 101 | 伝
 102 | 目
 103 | 新
 104 | 兵
 105 | 慈
 106 | ロ
 107 | 逢
 108 | 直
 109 | 面
 110 | 幻
 111 | 滅
 112 | 滝
 113 | 頭
 114 | ?
 115 | 兄
 116 | 弟
 117 | 家
 118 | 族
 119 | 情
 120 | 報
 121 | 欲
 122 | 最
 123 | 初
 124 | ?
 125 | 加
 126 | 当
 127 | 日
 128 | 流
 129 | 轟
 130 | 廃
 131 | ?
 132 | 中
 133 | 表
 134 | 社
 135 | 会
 136 | 民
 137 | 間
 138 | 模
 139 | 擬
 140 | 次
 141 | 世
 142 | 代
 143 | 召
 144 | 集
 145 | 極
 146 | 毯
 147 | ?
 148 | 密
 149 | 験
 150 | 限
 151 | 正
 152 | 式
 153 | 採
 154 | 決
 155 | 終
 156 | 的
 157 | 径
 158 | 蜂
 159 | 後
 160 | 緒
 161 | 牢
 162 | 獄
 163 | 解
 164 | 隙
 165 | 鍵
 166 | 預
 167 | 等
 168 | 女
 169 | ?
 170 | 引
 171 | 幾
 172 | 護
 173 | 彼
 174 | 死
 175 | 心
 176 | 騰
 177 | 局
 178 | 同
 179 | 病
 180 | 偶
 181 | 然
 182 | 者
 183 | 強
 184 | 生
 185 | 糞
 186 | 壌
 187 | 法
 188 | 監
 189 | 禁
 190 | 建
 191 | 横
 192 | ?
 193 | ?
 194 | 米
 195 | ?
 196 | 幸
 197 | 金
 198 | 相
 199 | 怖
 200 | 訳
 201 | 軍
 202 | 夢
 203 | 毎
 204 | 殺
 205 | 受
 206 | 異
 207 | 常
 208 | 罪
 209 | 悪
 210 | 感
 211 | 戮
 212 | 昔
 213 | 封
 214 | 印
 215 | 残
 216 | 虐
 217 | 闘
 218 | 争
 219 | 能
 220 | ?
 221 | 緩
 222 | 和
 223 | 高
 224 | 掲
 225 | 反
 226 | ?
 227 | 分
 228 | 泌
 229 | ?
 230 | 始
 231 | 結
 232 | 急
 233 | ?
 234 | ?
 235 | 考
 236 | 帰
 237 | 魔
 238 | 通
 239 | 閏
 240 | 詳
 241 | 絡
 242 | 焦
 243 | 少
 244 | 待
 245 | 耐
 246 | 箱
 247 | 話
 248 | 差
 249 | 控
 250 | 位
 251 | 示
 252 | 追
 253 | 道
 254 | 音
 255 | 痴
 256 | 辿
 257 | 足
 258 | 選
 259 | 茶
 260 | 理
 261 | ?
 262 | 上
 263 | 攻
 264 | ?
 265 | 火
 266 | ?
 267 | 危
 268 | 忍
 269 | 山
 270 | ?
 271 | 床
 272 | 歩
 273 | 鳴
 274 | 平
 275 | 医
 276 | 療
 277 | 南
 278 | 洞
 279 | ?
 280 | 路
 281 | 屋
 282 | ?
 283 | 廊
 284 | 溶
 285 | 鉱
 286 | 炉
 287 | 貸
 288 | 昇
 289 | 隙
 290 | 倉
 291 | 司
 292 | 令
 293 | 脱
 294 | ?
 295 | 品
 296 | ?
 297 | 負
 298 | 勝
 299 | 記
 300 | 録
 301 | 狭
 302 | ?
 303 | 共
 304 | 激
 305 | 波
 306 | 輯
 307 | ?
 308 | 折
 309 | 広
 310 | 我
 311 | 慢
 312 | 国
 313 | 匹
 314 | 勇
 315 | 求
 316 | 愚
 317 | 惧
 318 | 育
 319 | 籍
 320 | 省
 321 | 母
 322 | 文
 323 | ?
 324 | 勉
 325 | 煙
 326 | 草
 327 | 吸
 328 | 唯
 329 | 普
 330 | 尽
 331 | 注
 332 | 派
 333 | 烏
 334 | 美
 335 | 食
 336 | 深
 337 | 泉
 338 | 魚
 339 | 芳
 340 | 餌
 341 | 底
 342 | 住
 343 | 釣
 344 | 潔
 345 | 失
 346 | 敗
 347 | 例
 348 | 多
 349 | 則
 350 | 傷
 351 | 取
 352 | 張
 353 | 禍
 354 | 買
 355 | 恵
 356 | 点
 357 | ?
 358 | 良
 359 | 把
 360 | 握
 361 | 便
 362 | 利
 363 | 全
 364 | 嫁
 365 | 旦
 366 | 那
 367 | 浮
 368 | 間
 369 | 溺
 370 | 尋
 371 | 浅
 372 | 瀬
 373 | 自
 374 | 過
 375 | 他
 376 | ?
 377 | 狼
 378 | 衆
 379 | 数
 380 | 絶
 381 | 送
 382 | 盗
 383 | 色
 384 | 端
 385 | ?
 386 | 小
 387 | 忘
 388 | 交
 389 | 刻
 390 | 打
 391 | 六
 392 | 治
 393 | 治
 394 | 院
 395 | 百
 396 | 其
 397 | 至
 398 | 乃
 399 | 為
 400 | 菜
 401 | 乗
 402 | 映
 403 | 画
 404 | 効
 405 | 果
 406 | 判
 407 | 告
 408 | 標
 409 | 認
 410 | ?
 411 | 専
 412 | 門
 413 | 野
 414 | ?
 415 | 複
 416 | ?
 417 | 倍
 418 | 役
 419 | 頼
 420 | 断
 421 | 真
 422 | ?
 423 | 難
 424 | 好
 425 | 傍
 426 | 聴
 427 | 楽
 428 | 戻
 429 | 可
 430 | 退
 431 | 樹
 432 | 泳
 433 | 油
 434 | 快
 435 | 活
 436 | 千
 437 | 年
 438 | 愉
 439 | ?
 440 | 遊
 441 | 杯
 442 | 遠
 443 | 偏
 444 | 般
 445 | 辺
 446 | ?
 447 | 飢
 448 | 鼠
 449 | 啄
 450 | 渇
 451 | 腹
 452 | 水
 453 | ?
 454 | 因
 455 | 転
 456 | 臨
 457 | 任
 458 | 務
 459 | 志
 460 | 換
 461 | 消
 462 | 去
 463 | 句
 464 | ?
 465 | 技
 466 | 術
 467 | 天
 468 | 順
 469 | 逆
 470 | 亡
 471 | 従
 472 | 在
 473 | 惑
 474 | 戸
 475 | 陣
 476 | 詐
 477 | 欺
 478 | 厭
 479 | 榮
 480 | ?
 481 | 成
 482 | 途
 483 | 善
 484 | 秋
 485 | 義
 486 | 此
 487 | 之
 488 | 程
 489 | 価
 490 | 値
 491 | 得
 492 | 易
 493 | 簡
 494 | 雲
 495 | 海
 496 | 苦
 497 | 詞
 498 | 捉
 499 | 緑
 500 | 源
 501 | ?
 502 | 乱
 503 | 放
 504 | 吐
 505 | 許
 506 | 謝
 507 | ?
 508 | 突
 509 | 嫌
 510 | 居
 511 | 割
 512 | 周
 513 | 弱
 514 | 践
 515 | 工
 516 | 盛
 517 | ?
 518 | 創
 519 | 具
 520 | 造
 521 | 揮
 522 | 嬉
 523 | 将
 524 | 喜
 525 | 泣
 526 | 声
 527 | 恕
 528 | ?
 529 | 瀕
 530 | 誠
 531 | ?
 532 | 如
 533 | 約
 534 | 束
 535 | 元
 536 | 様
 537 | 拘
 538 | 援
 539 | 逮
 540 | 険
 541 | 虜
 542 | 況
 543 | 耳
 544 | 骨
 545 | 埋
 546 | 軽
 547 | 蔑
 548 | ?
 549 | 甘
 550 | ?
 551 | 央
 552 | 振
 553 | 嵐
 554 | 紹
 555 | 介
 556 | 処
 557 | 期
 558 | 辞
 559 | 屈
 560 | 互
 561 | 職
 562 | ?
 563 | 既
 564 | 寂
 565 | 談
 566 | 担
 567 | 覚
 568 | 以
 569 | 丸
 570 | 裸
 571 | 携
 572 | 帯
 573 | ?
 574 | 胃
 575 | 液
 576 | 抑
 577 | ?
 578 | 資
 579 | 権
 580 | 官
 581 | 防
 582 | 与
 583 | 塩
 584 | 検
 585 | 冷
 586 | 静
 587 | 服
 588 | 配
 589 | 番
 590 | 馬
 591 | 鹿
 592 | ?
 593 | 故
 594 | 陥
 595 | 蘇
 596 | 玩
 597 | 弄
 598 | ?
 599 | 黙
 600 | 項
 601 | 望
 602 | 洗
 603 | 脳
 604 | 鬼
 605 | 致
 606 | 避
 607 | 組
 608 | 久
 609 | 週
 610 | 観
 611 | ?
 612 | 距
 613 | 傾
 614 | 含
 615 | 救
 616 | 列
 617 | 蛮
 618 | 由
 619 | 政
 620 | 府
 621 | 稼
 622 | 試
 623 | 型
 624 | 統
 625 | 領
 626 | 昨
 627 | 計
 628 | 未
 629 | 騷
 630 | 繊
 631 | 厄
 632 | 第
 633 | 三
 634 | 削
 635 | 条
 636 | 公
 637 | 批
 638 | 准
 639 | 承
 640 | 域
 641 | 衛
 642 | 題
 643 | 燕
 644 | 返
 645 | 威
 646 | 墜
 647 | 部
 648 | 内
 649 | 容
 650 | ?
 651 | 迫
 652 | 呑
 653 | 倫
 654 | 厳
 655 | ?
 656 | 経
 657 | 血
 658 | 喋
 659 | 緊
 660 | 託
 661 | 苦
 662 | 痛
 663 | 親
 664 | 歳
 665 | 随
 666 | 恋
 667 | 態
 668 | 友
 669 | 称
 670 | 号
 671 | 皆
 672 | 係
 673 | 殴
 674 | ?
 675 | 健
 676 | 暴
 677 | 両
 678 | 詰
 679 | 白
 680 | ?
 681 | 科
 682 | 腕
 683 | ?
 684 | ?
 685 | 眠
 686 | 余
 687 | ?
 688 | 毛
 689 | 筋
 690 | 維
 691 | 刺
 692 | 類
 693 | 売
 694 | 悼
 695 | 似
 696 | 醒
 697 | 狩
 698 | 併
 699 | 宣
 700 | 倍
 701 | 侵
 702 | 済
 703 | 再
 704 | ?
 705 | 材
 706 | ?
 707 | 総
 708 | 主
 709 | 件
 710 | 荒
 711 | 清
 712 | 染
 713 | 買
 714 | 固
 715 | 執
 716 | ?
 717 | 種
 718 | 拾
 719 | 孤
 720 | 児
 721 | 頃
 722 | 肌
 723 | 川
 724 | 餌
 725 | 寸
 726 | 幼
 727 | 個
 728 | 証
 729 | 讐
 730 | 誓
 731 | 憎
 732 | 誤
 733 | 仇
 734 | 蔽
 735 | ?
 736 | ?
 737 | ?
 738 | 胞
 739 | 完
 740 | 了
 741 | 恩
 742 | 粗
 743 | 末
 744 | 読
 745 | 嚇
 746 | 八
 747 | ?
 748 | 叶
 749 | 賭
 750 | 才
 751 | ?
 752 | 札
 753 | 拷
 754 | ?
 755 | ?
 756 | 宅
 757 | 僕
 758 | 隣
 759 | 駐
 760 | 西
 761 | ?
 762 | 赦
 763 | 悔
 764 | 充
 765 | 仲
 766 | ?
 767 | 環
 768 | 境
 769 | 植
 770 | 犬
 771 | 街
 772 | 靱
 773 | 及
 774 | 更
 775 | 亜
 776 | 詞
 777 | 詳
 778 | 挙
 779 | 詩
 780 | 吉
 781 | 祖
 782 | 捕
 783 | 囮
 784 | 捜
 785 | 校
 786 | 洋
 787 | 腑
 788 | 繋
 789 | 農
 790 | 永
 791 | 土
 792 | 零
 793 | 寒
 794 | 暖
 795 | ?
 796 | 雑
 797 | 凝
 798 | ?
 799 | 側
 800 | 純
 801 | 鍛
 802 | 仮
 803 | 午
 804 | 鈍
 805 | 睡
 806 | 排
 807 | 尿
 808 | 満
 809 | 休
 810 | 息
 811 | 語
 812 | 栄
 813 | 養
 814 | 給
 815 | 併
 816 | 候
 817 | 測
 818 | 挑
 819 | 功
 820 | 銃
 821 | 嗅
 822 | 左
 823 | 右
 824 | 算
 825 | 低
 826 | 修
 827 | 羅
 828 | 散
 829 | 古
 830 | 典
 831 | ?
 832 | 巡
 833 | 率
 834 | ?
 835 | 慌
 836 | 璧
 837 | 姿
 838 | 遂
 839 | 駄
 840 | 曳
 841 | 航
 842 | 紛
 843 | 狂
 844 | 懇
 845 | ?
 846 | 摂
 847 | 氏
 848 | 氷
 849 | 賞
 850 | 緯
 851 | 超
 852 | 綻
 853 | 帽
 854 | 被
 855 | 汗
 856 | ?
 857 | 図
 858 | 肺
 859 | 炎
 860 | 症
 861 | 袋
 862 | 湯
 863 | 耗
 864 | 徐
 865 | 淀
 866 | 恐
 867 | 幅
 868 | ?
 869 | 膜
 870 | 焼
 871 | 跡
 872 | 悟
 873 | 偽
 874 | 踏
 875 | 爪
 876 | ?
 877 | 靴
 878 | ?
 879 | 熟
 880 | 欠
 881 | 隙
 882 | 抗
 883 | 級
 884 | 月
 885 | 怒
 886 | 歌
 887 | ?
 888 | 制
 889 | 御
 890 | 依
 891 | 旋
 892 | 骸
 893 | 沙
 894 | 汰
 895 | 席
 896 | 吊
 897 | 戒
 898 | 怠
 899 | 渉
 900 | 阻
 901 | 歪
 902 | 拠
 903 | ?
 904 | 首
 905 | 伺
 906 | 香
 907 | 炭
 908 | 囲
 909 | ?
 910 | 念
 911 | 択
 912 | 万
 913 | 音
 914 | 絞
 915 | 陰
 916 | 角
 917 | 背
 918 | 覗
 919 | 叩
 920 | 甲
 921 | 斐
 922 | 寄
 923 | ?
 924 | 輪
 925 | 営
 926 | 汚
 927 | 鮮
 928 | 膨
 929 | 費
 930 | 陽
 931 | ?
 932 | 布
 933 | 系
 934 | 皮
 935 | ?
 936 | 浸
 937 | 透
 938 | 懐
 939 | 枢
 940 | 弛
 941 | 律
 942 | 酔
 943 | 若
 944 | 干
 945 | 尊
 946 | 敬
 947 | 詭
 948 | ?
 949 | 穴
 950 | 崩
 951 | 税
 952 | 管
 953 | 旧
 954 | 劇
 955 | ?
 956 | 刑
 957 | 泡
 958 | 黄
 959 | 縛
 960 | ?
 961 | 裁
 962 | 繕
 963 | 辛
 964 | 抱
 965 | 看
 966 | 免
 967 | 星
 968 | 獲
 969 | 揃
 970 | 継
 971 | 刃
 972 | 斬
 973 | 尾
 974 | 範
 975 | ?
 976 | 索
 977 | 譜
 978 | 紫
 979 | 副
 980 | 折
 981 | 軟
 982 | 痺
 983 | 駆
 984 | 訴
 985 | ?
 986 | 市
 987 | 融
 988 | 疎
 989 | 康
 990 | 怯
 991 | 呪
 992 | 努
 993 | 腺
 994 | 悩
 995 | 妊
 996 | 短
 997 | 臭
 998 | 径
 999 | 夜
1000 | 扱
1001 | 凄
1002 | 秒
1003 | 械
1004 | 樹
1005 | 脂
1006 | 塑
1007 | 粘
1008 | 掃
1009 | ?
1010 | 鉄
1011 | 球
1012 | 施
1013 | 偵
1014 | 搭
1015 | 載
1016 | 燃
1017 | 四
1018 | 黒
1019 | 徴
1020 | 層
1021 | 隔
1022 | 座
1023 | 比
1024 | 板
1025 | 紙
1026 | 芯
1027 | 木
1028 | 荷
1029 | 梱
1030 | 包
1031 | 輸
1032 | 頑
1033 | ?
1034 | ?
1035 | 疲
1036 | 濡
1037 | ?
1038 | 損
1039 | 裂
1040 | 肝
1041 | 殖
1042 | 課
1043 | 鋭
1044 | 災
1045 | 抑
1046 | 伏
1047 | 圧
1048 | ?


--------------------------------------------------------------------------------
/voxTools/vagOutput.py:
--------------------------------------------------------------------------------
 1 | from demoClasses import *
 2 | 
 3 | if __name__ == "__main__":
 4 |     demoData: bytes
 5 |     # Add in and out file names here
 6 |     demoFilename = "example.vox"
 7 |     newFileName = demoFilename.split("/")[-1].split(".")[0] + ".vag"
 8 | 
 9 |     with open(demoFilename, "rb") as f:
10 |         demoData = f.read()
11 |         demoItems = parseDemoData(demoData)
12 | 
13 |     outputVagFile(demoItems, newFileName)


--------------------------------------------------------------------------------
/voxTools/voxRejoiner.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | sys.path.append(os.path.abspath('./myScripts'))
 3 | import re
 4 | import glob
 5 | import struct
 6 | import progressbar
 7 | import translation.radioDict as RD
 8 | import json
 9 | 
10 | import voxTools.voxTextExtractor as DTE
11 | 
12 | version = "usa"
13 | version = "jpn"
14 | disc = 1
15 | 
16 | # Toggles
17 | debug = True
18 | 
19 | # Directory configs
20 | originalVox = f'build-src/{version}-d{disc}/MGS/VOX.DAT'
21 | inputDir = f'workingFiles/{version}-d{disc}/vox/bins'
22 | outputDir = f'workingFiles/{version}-d{disc}/vox/newBins'
23 | outputvoxFile = f'workingFiles/{version}-d{disc}/vox/new-VOX.DAT'
24 | os.makedirs(outputDir, exist_ok=True)
25 | 
26 | origBinFiles = glob.glob(os.path.join(inputDir, '*.bin'))
27 | origBinFiles.sort(key=lambda f: int(f.split('-')[-1].split('.')[0]))
28 | 
29 | newBinFiles = glob.glob(os.path.join(outputDir, '*.bin'))
30 | origBinFiles.sort(key=lambda f: int(f.split('-')[-1].split('.')[0]))
31 | 
32 | print(f'Building New VOX File...')
33 | newvoxBytes = b''
34 | 
35 | count = 0
36 | with open(outputvoxFile, 'wb') as f:
37 |     for file in origBinFiles:
38 |         if count == len(newBinFiles):
39 |             print(f'\nAll new files injected. Using the remainder of original file...')
40 |             with open(originalVox, 'rb') as originalVox:
41 |                 originalVox.seek(len(newvoxBytes))
42 |                 newvoxBytes += originalVox.read()
43 |                 break
44 |         elif file.replace('bins', 'newBins') in newBinFiles:
45 |             file = file.replace('bins', 'newBins') 
46 |             basename = file.split("/")[-1].split(".")[0]
47 |             print(f'{basename}: Using new {basename}...')
48 |             count += 1
49 |         else:
50 |             basename = file.split("/")[-1].split(".")[0]
51 |             print(f'{basename}: Using old version...\r', end="")
52 |         voxBytes = open(file, 'rb')
53 |         newvoxBytes += voxBytes.read()
54 |         voxBytes.close()
55 |     f.write(newvoxBytes)
56 |     f.close()
57 | 
58 | print(f'{outputvoxFile} was written!')
59 | 
60 |             
61 | 


--------------------------------------------------------------------------------
/voxTools/voxSplit.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Pretty much follows the same rules as demo.dat for chunking
 3 | 
 4 | """
 5 | 
 6 | import os
 7 | 
 8 | # Config
 9 | version = 'usa'
10 | version = 'jpn'
11 | disc = 1
12 | 
13 | filename = f'build-src/{version}-d{disc}/MGS/VOX.DAT'
14 | outputDir = f'workingFiles/{version}-d{disc}/vox/bins'
15 | 
16 | demoFile = open(filename, 'rb')
17 | demoData = demoFile.read()
18 | 
19 | debug = True
20 | 
21 | offsets = []
22 | os.makedirs(outputDir, exist_ok=True)
23 | opening = b'\x10\x08\x00\x00'  # Adjusted opening pattern
24 | 
25 | def findDemoOffsets():
26 |     offset = 0
27 |     while offset < len(demoData) - 4:  # Adjusted for the new opening length
28 |         checkbytes = demoData[offset:offset + 4]  # Check the first 4 bytes
29 |         if checkbytes == opening:
30 |             offsets.append(offset)
31 |             offset += 2048  # Continue using 2048 or 0x800 as the increment step for speed
32 |         else:
33 |             offset += 2048
34 | 
35 | def splitDemoFiles():
36 |     global debug
37 | 
38 |     for i in range(len(offsets)):  
39 |         start = offsets[i] 
40 |         if i < len(offsets) - 1:
41 |             end = offsets[i + 1]
42 |         else:
43 |             end = len(demoData)  # Include the last byte
44 |         f = open(f'{outputDir}/vox-{i + 1:04}.vox', 'wb')
45 |         f.write(demoData[start:end])
46 |         f.close()
47 |         if debug:
48 |             print(f'Wrote VOX file {i}')
49 | 
50 | if __name__ == '__main__':
51 |     findDemoOffsets()
52 |     splitDemoFiles()
53 | 


--------------------------------------------------------------------------------
/voxTools/voxTextExtractor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Adapted from Green Goblins scripts. 
  3 | This is really heavily based on his awesome work. 
  4 | 
  5 | # Script for working with Metal Gear Solid data
  6 | #
  7 | # Copyright (C) 2023 Green_goblin (https://mgsvm.blogspot.com/)
  8 | #
  9 | # Permission to use, copy, modify, and/or distribute this software for any
 10 | # purpose with or without fee is hereby granted, provided that the above
 11 | # copyright notice and this permission notice appear in all copies.
 12 | 
 13 | """
 14 | 
 15 | import os, sys
 16 | sys.path.append(os.path.abspath('./myScripts'))
 17 | import re
 18 | import glob
 19 | import struct
 20 | import progressbar
 21 | import translation.radioDict as RD
 22 | import json
 23 | 
 24 | voxScriptData: dict = {}
 25 | 
 26 | bar = progressbar.ProgressBar()
 27 | 
 28 | version = "usa"
 29 | version = "jpn"
 30 | disc = 1
 31 | 
 32 | # Create a directory to store the extracted texts
 33 | # Get the files from the folder directory
 34 | inputDir = f'workingFiles/{version}-d{disc}/vox/bins'
 35 | outputDir = f'workingFiles/{version}-d{disc}/vox/texts'
 36 | os.makedirs(outputDir, exist_ok=True)
 37 | outputJsonFile = f"workingFiles/{version}-d{disc}/vox/voxText-{version}.json"
 38 | 
 39 | # Grab all files in the directory and sort into order.
 40 | bin_files = glob.glob(os.path.join(inputDir, '*.bin'))
 41 | bin_files.sort(key=lambda f: int(f.split('-')[-1].split('.')[0]))
 42 | 
 43 | # flags
 44 | debug = True
 45 | 
 46 | # List of files to skip (Ex: 005.bin does not contain texts)
 47 | skipFilesListD1 = [
 48 | 
 49 | ]
 50 | 
 51 | # Set up progress bar
 52 | # bar.maxval = len(bin_files)
 53 | # barCount = 0
 54 | # bar.start()
 55 | 
 56 | # DEBUG
 57 | # if debug:
 58 | #     print(f'Only doing vox-1.bin!')
 59 |     # bin_files = [f'voxWorkingDir/{version}/bins/vox-25.bin']
 60 | 
 61 | def getTextHexes(textToAnalyze: bytes) -> tuple[list, bytes, list]: 
 62 |     """
 63 |     This just grabs all the text from each sector of the text area.
 64 |     We just grab the hex and return it. We also return the custom 
 65 |     character bytes at the end, which should always make a dictionary.
 66 |     """
 67 |     global debug
 68 |     
 69 |     #startingPoint = struct.unpack("<H", textToAnalyze[18:20])[0]
 70 |     
 71 |     segments = []
 72 |     # Coords = dict of Starting time, length to display
 73 |     coords = []
 74 |     # graphics are only for japanese vers. generally. init here so that we can pass back something even if no graphics found. 
 75 |     graphics = b'' 
 76 |     offset = 0
 77 | 
 78 |     # Search for the second pattern while looking for size pointers
 79 |     while offset < len(textToAnalyze):
 80 |         if debug:
 81 |             print(f'Offset: {offset}')
 82 |         # If loop to determine if we hit the last one. 
 83 |         if textToAnalyze[offset] == 0x00: # This is the last segment, always the same length? # TODO CLEAN THIS UP
 84 |             # All this nonsense finds the last segment since the length bytes are null.
 85 |             lastEnd = textToAnalyze.find(bytes.fromhex('00'), offset + 16)
 86 |             subset = textToAnalyze[offset: lastEnd]
 87 |             evenBytes = (4 - (len(subset) % 4))
 88 |             subset = textToAnalyze[offset: lastEnd + evenBytes]
 89 |             textSize = len(subset)
 90 |             # Get timings
 91 |             appearTime = struct.unpack("I", textToAnalyze[offset + 4: offset + 8])[0]
 92 |             appearDuration = struct.unpack("I", textToAnalyze[offset + 8: offset + 12])[0]
 93 |             coords.append(f'{appearTime},{appearDuration}')
 94 | 
 95 |             print(f'Final length = {textSize}') 
 96 |             segments.append(textToAnalyze[offset + 16: offset + textSize])
 97 |             graphics = textToAnalyze[offset + textSize: -4]
 98 |             break
 99 |         else:
100 |             # Extract the double byte value (little-endian) as a pointer to the size
101 |             textSize = struct.unpack('<H', textToAnalyze[offset:offset + 2])[0]
102 |             appearTime = struct.unpack("I", textToAnalyze[offset + 4: offset + 8])[0]
103 |             appearDuration = struct.unpack("I", textToAnalyze[offset + 8: offset + 12])[0]
104 |             dialogueBytes = textToAnalyze[offset + 16: offset + textSize]
105 | 
106 |         # Append the size pointer and its offset to the list
107 |         segments.append(dialogueBytes)
108 |         coords.append(f'{appearTime},{appearDuration}')
109 | 
110 |         # Move to the next size pointer
111 |         offset += textSize
112 | 
113 |     return segments, graphics, coords
114 | 
115 | def getTextAreaOffsets(voxData: bytes) -> list:
116 |     """
117 |     This is awful, but it should to a certain degree find vox offset spots.
118 |     If there's a better way to do this lmk, but it's not too inefficient. 
119 |     """
120 |     patternA = b"\x03..." + b"...." +  b"...." +  b"...." + bytes(4) + bytes.fromhex("FF FF FF 7F 10 00") # Figured out the universal pattern. 
121 |     # 03 ?? ?? ?? ?? ?? ?? 00 ?? ?? ?? ?? 10 00 14 00 >> For IMHEX usage
122 |     # patternB = bytes.fromhex("FF FF FF 7F 10 00") 
123 |     # This is actually the indication a dialogue area runs to end of vox (until frame 0x7FFFFF)
124 | 
125 |     matches = re.finditer(patternA, voxData, re.DOTALL)
126 |     offsets = [match.start() for match in matches]
127 | 
128 |     finalMatches = []
129 |     for offset in offsets:
130 |         # Extract size of the area
131 |         length = 12 + struct.unpack('<H', voxData[offset + 13: offset + 15])[0]
132 |         
133 |         # This is just an alignment check. Last 4 should always be this constant.
134 |         bytesToCheck = voxData[offset + length : offset + 4 + length] # 4 bytes at head are included.
135 |         if bytesToCheck == bytes.fromhex("01 04 20 00"):
136 |             finalMatches.append(offset + 12)
137 | 
138 |     return finalMatches
139 | 
140 | def getTextAreaBytes(offset, voxData):
141 |     """
142 |     Returns the data from that offset found in the amount we expect 
143 |     for processing. 
144 |     """
145 |     length = struct.unpack('<H', voxData[offset + 1: offset + 3])[0]
146 |     exBuffer = struct.unpack('<H', voxData[offset + 14: offset + 16])[0] # Japanese has extra data here ?
147 |     subset = voxData[offset + 4 + exBuffer: offset + 4 + length] # Includes the tail bytes 0x[01 04 20 00]
148 | 
149 |     return subset
150 | 
151 | def getDialogue(textHexes: list [bytes], graphicsData: bytes = None) -> list:
152 |     global debug
153 |     global filename
154 |     global version
155 |     
156 |     dialogue = []
157 | 
158 |     if graphicsData is not None and filename is not None:
159 |         voxDict = RD.makeCallDictionary(filename, graphicsData)
160 |     else:
161 |         voxDict = {}
162 | 
163 |     # Loop for all text, offsets, etc.
164 |     for dialogueHex in textHexes:
165 |             text = RD.translateJapaneseHex(dialogueHex, voxDict)
166 |             # text = text.encode(encoding='utf8', errors='ignore')
167 |             if debug:
168 |                 print(text)
169 |             text = text.replace('\x00', "")
170 |             dialogue.append(text)
171 |     return dialogue
172 | 
173 | def textToDict(dialogue: list) -> dict:
174 |     i = 1
175 |     textDict = {}
176 |     for text in dialogue:
177 |         textDict[f'{i:02}'] = text
178 |         i += 1
179 |     
180 |     return textDict
181 |             
182 | def writeTextToFile(filename: str, dialogue: list) -> None:
183 |     global debug
184 |     with open(filename, 'w', encoding='utf8') as f:
185 |         for text in dialogue:
186 |             f.write(f'{text}\n')
187 |         f.close()
188 | 
189 | def findOffsets(byteData: bytes, pattern: bytes) -> list:
190 |     """
191 |     Find patterns in the byte data. 
192 |     """
193 |     foundPatterns = []
194 |     offset = 0
195 |     while offset != -1:
196 |         offset = byteData.find(pattern, offset)
197 |         if offset != -1:
198 |             foundPatterns.append(pattern)
199 |     return foundPatterns
200 | 
201 | if __name__ == "__main__":
202 |     # Loop through each .bin file in the folder
203 |     for bin_file in bin_files:
204 |         # Skip files in the skip list
205 |         filename = os.path.basename(bin_file)
206 | 
207 |         # Manual override to skip certain voxs
208 |         if filename in skipFilesListD1:
209 |             continue
210 | 
211 |         if debug:
212 |             print(f"Processing file: {bin_file}")
213 | 
214 |         # Open the binary file for reading in binary mode
215 |         with open(bin_file, 'rb') as binary_file:
216 |             voxData = binary_file.read()
217 |         
218 |         textOffsets = getTextAreaOffsets(voxData)
219 | 
220 |         print(f'{os.path.basename(bin_file)}: {textOffsets}')
221 | 
222 |         texts = []
223 |         timings = [] # list of timings (start time, duration)
224 |         timingCount = 1
225 | 
226 |         for offset in textOffsets:
227 |             subset = getTextAreaBytes(offset, voxData)
228 |             textHexes, graphicsBytes, coords = getTextHexes(subset)
229 |             texts.extend(getDialogue(textHexes, graphicsBytes))
230 |             timings.extend(coords)
231 |         
232 |         basename = filename.split('.')[0]
233 |         voxScriptData[basename] = [textToDict(texts), textToDict(timings)]
234 |         writeTextToFile(f'{outputDir}/vox-{basename}.txt', texts)
235 |         # writeTextToFile(f'{outputDir}/{basename}-timings.txt', timings) 
236 |         
237 |     with open(outputJsonFile, 'w') as f:
238 |         f.write(json.dumps(voxScriptData, ensure_ascii=False))
239 |         f.close()


--------------------------------------------------------------------------------
/voxTools/voxTextInjector.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Adapted from Green Goblins scripts. 
  3 | This is really heavily based on his awesome work. 
  4 | 
  5 | Script for working with Metal Gear Solid data
  6 | 
  7 | Copyright (C) 2023 Green_goblin (https://mgsvm.blogspot.com/)
  8 | 
  9 | Permission to use, copy, modify, and/or distribute this software for any
 10 | purpose with or without fee is hereby granted, provided that the above
 11 | copyright notice and this permission notice appear in all copies.
 12 | 
 13 | """
 14 | 
 15 | import os, sys
 16 | sys.path.append(os.path.abspath('./myScripts'))
 17 | import re
 18 | import glob
 19 | import struct
 20 | import progressbar
 21 | import translation.radioDict as RD
 22 | import json
 23 | 
 24 | import voxTools.voxTextExtractor as DTE # Leave for referential
 25 | from common.structs import subtitle
 26 | 
 27 | version = "usa"
 28 | version = "jpn"
 29 | disc = 1
 30 | 
 31 | # Toggles
 32 | debug = True
 33 | 
 34 | # Directory configs
 35 | inputDir = f'workingFiles/{version}-d{disc}/vox/bins'
 36 | outputDir = f'workingFiles/{version}-d{disc}/vox/newBins'
 37 | injectJson = f'build-proprietary/vox/voxText-{version}-d{disc}.json'
 38 | os.makedirs(outputDir, exist_ok=True)
 39 | 
 40 | # Collect files to use
 41 | bin_files = glob.glob(os.path.join(inputDir, '*.bin'))
 42 | bin_files.sort(key=lambda f: int(f.split('-')[-1].split('.')[0]))
 43 | 
 44 | # Collect source json to inject
 45 | injectTexts = json.load(open(injectJson, 'r'))
 46 | 
 47 | 
 48 | # Defs below
 49 | 
 50 | def assembleTitles(texts: dict, timings: dict) -> list [subtitle]:
 51 |     subsList = []
 52 |     for i in range(len(texts)):
 53 |         index = "{:02}".format(i + 1)
 54 |         start = timings.get(index).split(",")[0]
 55 |         duration = timings.get(index).split(",")[1]
 56 |         a = subtitle(texts.get(index), start, duration)
 57 |         subsList.append(a)
 58 |     
 59 |     return subsList
 60 | 
 61 | def genSubBlock(subs: list [subtitle] ) -> bytes:
 62 |     """
 63 |     Injects the new text to the original data, returns the bytes. 
 64 |     Also returns the index we were at when we finished. 
 65 | 
 66 |     """ 
 67 |     newBlock = b''
 68 |     for i in range(len(subs) -1):
 69 |         length = struct.pack("I", len(bytes(subs[i])) + 4)
 70 |         newBlock += length + bytes(subs[i])
 71 |     
 72 |     # Add the last one
 73 |     newBlock += bytes(4) + bytes(subs[-1])
 74 |     
 75 |     return newBlock
 76 | 
 77 | def injectSubtitles(originalBinary: bytes, newTexts: dict, frameLimit: int = 1, timings: dict = None) -> bytes:
 78 |     """
 79 |     Injects the new text to the original data, returns the bytes. 
 80 |     Also returns the index we were at when we finished. 
 81 | 
 82 |     New vers: Framelimit is the end of a cutscene segment.
 83 |     """ 
 84 | 
 85 |     def encodeNewText(text: str, timing: str):
 86 |         """
 87 |         Simple. Encodes the dialogue as bytes. 
 88 |         Adds the buffer we need to be divisible by 4...
 89 |         Return the new bytes.
 90 |         """
 91 |         timings = int(timing.split(','))
 92 |         start = timings[0]
 93 |         duration = timings[1]
 94 | 
 95 |         subtitleBytes: bytes = struct.pack("III", start, duration, 0)
 96 |         subtitleBytes += RD.encodeJapaneseHex(text)[0]
 97 |         bufferNeeded = 4 - (len(subtitleBytes) % 4)
 98 |         for j in range(bufferNeeded):
 99 |             newBytes += b'\x00'
100 |             j += 1
101 |         
102 |         return subtitleBytes
103 |     
104 | 
105 |     
106 |     newBytes = b""
107 |     firstLengthBytes = originalBinary[18:20]
108 |     firstLength = struct.unpack('<H', firstLengthBytes)[0]
109 |     offset = 8 + firstLength # This is our starting point for the dialogue.
110 | 
111 |     newBytes += originalBinary[0: offset]
112 | 
113 |     # i = startingNum
114 |     while i <= len(newTexts):
115 |         start, duration = timings.get(f"{i}").split(",")
116 |         start = int(start)
117 |         duration = int(duration)
118 |         if originalBinary[offset] == 0x00:
119 |             # Find the length here (This is stupid!)
120 |             origTextData = originalBinary[offset: offset + originalBinary.find(b'\x00', offset + 16)] # We can add the buffer later
121 |             bufferNeeded = 4 - (len(origTextData) % 4)
122 |             origTextLength = len(origTextData) + bufferNeeded
123 |             origTextData = originalBinary[offset: offset + origTextLength]
124 | 
125 |             # Now create the new one.
126 |             newText = encodeNewText(newTexts[str(i)])
127 |             newBytes = newBytes + origTextData[0:4] + struct.pack("<I", start) + struct.pack("<I", duration) + origTextData[12:16] + newText
128 |             i += 1
129 |             offset += origTextLength
130 |             break
131 |         else:
132 |             origLength = originalBinary[offset]
133 |             origTextData = originalBinary[offset: offset + origLength]
134 |             origTextLength = len(origTextData)
135 |             # New Text
136 |             newText = encodeNewText(newTexts[str(i)])
137 |             newLength = len(newText) + 16
138 |             newBytes += newLength.to_bytes() + origTextData[1:4] + struct.pack("<I", start) + struct.pack("<I", duration) + origTextData[12:16] + newText
139 |         
140 |             i += 1
141 |             offset += origTextLength
142 | 
143 |     return newBytes
144 | 
145 | def getvoxDiagHeader(data: bytes) -> bytes:
146 |     """
147 |     Returns the header portion only for a given dialogue section.
148 |     """
149 |     headerLength = struct.unpack("H", data[14:16])[0] + 4
150 |     return data[:headerLength]
151 | 
152 | # if debug:
153 | #     print(f'Only injecting vox 29!')
154 | #     bin_files = ['workingFiles/jpn-d1/vox/bins/vox-0029.bin']
155 | 
156 | if __name__ == "__main__":
157 |     """
158 |     Main logic is here.
159 |     """
160 |     for file in bin_files:
161 |         print(os.path.basename(f"{file}: "), end="")
162 |         filename = os.path.basename(file)
163 |         basename = filename.split(".")[0]
164 | 
165 |         # if injectTexts[basename] is None:
166 |         if basename not in injectTexts:
167 |             print(f'{basename} was not in the json. Skipping...\r', end="")
168 |             continue
169 |         
170 |         # Initialize the vox data and the dictionary we're using to replace it.
171 |         origvoxData = open(file, 'rb').read()
172 |         origBlocks = len(origvoxData) // 0x800 # Use this later to check we hit the same length!
173 |         voxDict: dict = injectTexts[basename][0]
174 |         voxTimings: dict = injectTexts[basename][1]
175 |         
176 |         subtitles = assembleTitles(voxDict, voxTimings)
177 | 
178 |         offsets = DTE.getTextAreaOffsets(origvoxData)
179 |         # nextStart = 1 # index of subtitle to encode. No longer needed.
180 |         newvoxData = origvoxData[0 : offsets[0]] # UNTIL the header
181 |         
182 |         for Num in range(len(offsets)):
183 |             oldHeader = getvoxDiagHeader(origvoxData[offsets[Num]:])
184 |             oldLength = struct.unpack("H", oldHeader[1:3])[0]
185 |             frameStart = struct.unpack("I", oldHeader[4:8])[0]
186 |             frameLimit = struct.unpack("I", oldHeader[8:12])[0]
187 |             # Get only subtitles in this section.
188 |             subsForSection = []
189 |             for sub in subtitles:
190 |                 if frameStart <= sub.startFrame < frameLimit:
191 |                     subsForSection.append(sub)
192 |             newSubBlock = genSubBlock(subsForSection) # TODO: CODE THIS DEF
193 |             newLength = len(oldHeader) + len(newSubBlock)
194 | 
195 |             newHeader = bytes.fromhex("03") + struct.pack("H", newLength) + bytes(1) + struct.pack("II", frameStart, frameLimit) + oldHeader[12:16] + struct.pack("I", len(oldHeader) + len(newSubBlock) - 4) + oldHeader[20:]
196 |             newvoxData += newHeader + newSubBlock
197 |             # Add the rest of the data from this to the next offset OR until end of original vox. 
198 |             if Num < len(offsets) - 1: # if it is NOT the last... 
199 |                 newvoxData += origvoxData[offsets[Num] + oldLength: offsets[Num + 1]]
200 |             else:
201 |                 newvoxData += origvoxData[offsets[Num] + oldLength: ]
202 |             # if debug:
203 |             #     print(newSubBlock.hex(sep=" ", bytes_per_sep=4))
204 |         
205 |         """# Buffer the vox to 0x800 block
206 |         if len(newvoxData) % 0x800 != 0:
207 |             if len(newvoxData) // 0x800 < len(origvoxData) // 0x800:
208 |                 newvoxData += bytes(len(newvoxData) % 0x800)
209 |             else:
210 |                 checkBytes = newvoxData[len(newvoxData) - len(origvoxData):]
211 |                 if checkBytes == bytes(len(checkBytes)):
212 |                     newvoxData = newvoxData[:len(newvoxData) - len(checkBytes)]"""
213 |         
214 |         # Adjust length to match original file.
215 |         if len(newvoxData) == len(origvoxData):
216 |             print("Alignment correct!")
217 |         elif len(newvoxData) < len(origvoxData): # new vox shorter
218 |             newvoxData += bytes(len(origvoxData) - len(newvoxData)) 
219 |             if len(newvoxData) % 0x800 == 0:
220 |                 print("Alignment correct!")
221 |         else:
222 |             checkBytes = newvoxData[len(newvoxData) - len(origvoxData):]
223 |             if checkBytes == bytes(len(checkBytes)):
224 |                 newvoxData = newvoxData[:len(newvoxData) - len(checkBytes)]
225 |             else:
226 |                 print(f'CRITICAL ERROR! New vox cannot be truncated to original length!')
227 |                 exit(2)
228 |         
229 |         newBlocks = len(newvoxData) // 0x800
230 |         if newBlocks != origBlocks:
231 |             print(f"{len(newvoxData)} / {len(origvoxData)}") 
232 |             print(f'BLOCK MISMATCH!\nNew data is {newBlocks} blocks, old was {origBlocks} blocks.\nTHERE COULD BE PROBLEMS IN RECOMPILE!!')
233 | 
234 |         # Finished work! Write the new file. 
235 |         newFile = open(f'{outputDir}/{basename}.bin', 'wb')
236 |         newFile.write(newvoxData)
237 |         newFile.close()
238 |         print(f'VOX Data successfully Output to new files!')
239 | 
240 | 
241 | 
242 | 
243 |     """
244 |     # not really needed just for reference.
245 |     for key in injectTexts:
246 |         print(key)
247 |         voxDict: dict = injectTexts[key]
248 |         
249 |     """


--------------------------------------------------------------------------------
/zmovieTools/movieSplitter.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Adapted from Green Goblins scripts. Very similar to demo
 3 | only alignments are 0x920
 4 | """
 5 | 
 6 | import os, struct, re, sys, glob, json
 7 | sys.path.append(os.path.abspath('./myScripts'))
 8 | sys.path.append(os.path.abspath('.'))
 9 | import DemoTools.demoTextExtractor as DTE
10 | 
11 | version = "usa"
12 | filename = f"build-src/{version}-d1/MGS/ZMOVIE.STR"
13 | outputDir = f"zMovieWorkingDir/{version}/bins"
14 | 
15 | zMovieScript = {}
16 | 
17 | zmFile = open(filename, 'rb')
18 | zmData = zmFile.read()
19 | 
20 | 
21 | offsets = []
22 | os.makedirs(outputDir, exist_ok=True)
23 | 
24 | def getOffsets(toc: bytes) -> list:
25 |     demoNum = 4 # If we figure out where this is we can implement it.
26 |     offsets = []
27 |     counter = 16
28 |     for i in range(demoNum):
29 |         offset = struct.unpack("<I", toc[counter : counter + 4])[0]
30 |         offsets.append(offset * 0x920)
31 |         counter += 8
32 |     return offsets
33 | 
34 | if __name__ == "__main__":
35 |     
36 |     # movieOffsets = getOffsets(zmData[0:0x920])
37 |     # movieOffsets.append(len(zmData))
38 |     # print(movieOffsets)
39 | 
40 |     # for i in range(len(movieOffsets) - 1):
41 |     #     # Write the output movie file
42 |     #     with open(f'{outputDir}/{i:02}-movie.bin', 'wb') as f:
43 |     #         start = movieOffsets[i]
44 |     #         end = movieOffsets[i + 1]
45 |     #         # Output movie data
46 |     #         f.write(zmData[start : end])
47 | 
48 |     bin_files = glob.glob(os.path.join(outputDir, '*.bin'))
49 |     bin_files.sort(key=lambda f: int(f.split('/')[-1].split('-')[0]))
50 | 
51 |     for bin_file in bin_files:
52 |         with open(bin_file, 'rb') as movieTest:
53 |             filename = os.path.basename(bin_file)
54 |             DTE.filename = filename
55 |             movieData = movieTest.read()
56 | 
57 |             # Get text areas
58 |             matches = re.finditer(b'\x02\x00\x00\x00......\x10\x00', movieData, re.DOTALL)
59 |             offsets = [match.start() for match in matches]
60 | 
61 |             # Trim false positives.
62 |             finalMatches = []
63 |             for offset in offsets:
64 |                 if movieData[offset + 28: offset + 32] == bytes(4):
65 |                     finalMatches.append(offset)
66 |             
67 |             offsets = finalMatches
68 | 
69 |             texts = []
70 |             timings = [] # list of timings (start time, duration)
71 |             timingCount = 1
72 |             # For now we assume they are correct.
73 |             for offset in offsets:
74 |                 # offset = offsets[0]
75 |                 length = struct.unpack("I", movieData[offset + 12 : offset + 16])[0] # Length for text only here.
76 |                 subset = movieData[offset + 16: offset + 0x7e0]
77 |                 textHexes, graphicsBytes, coords = DTE.getTextHexes(subset)
78 |                 texts.extend(DTE.getDialogue(textHexes, graphicsBytes))
79 |                 timings.extend(coords)
80 | 
81 |             basename = filename.split('.')[0]
82 |             zMovieScript[basename] = [DTE.textToDict(texts), DTE.textToDict(timings)]
83 |             DTE.writeTextToFile(f'{outputDir}/{basename}.txt', texts)
84 | 
85 |         zMovieScript.update({basename: [DTE.textToDict(texts), DTE.textToDict(timings)]})
86 | 
87 |     with open(f'{outputDir}/zMovie-out.json', 'w') as f:
88 |         json.dump(zMovieScript, f, ensure_ascii=False)


--------------------------------------------------------------------------------