├── README.md ├── bdump.js └── cpu-jtoy.py /README.md: -------------------------------------------------------------------------------- 1 | # bdump 2 | 3 | A windbg-js plugin to dump the cpu state. 4 | 5 | ## Usage 6 | 7 | Only tested with Hyper-V. You'll probably want the VM to have 1 vCPU. Sometimes 8 | kd gets confused about which CPU scripts run on. Additionally, once you hit your 9 | bp you should verify that `GSBASE` and `KERNEL_GSBASE` are different: 10 | 11 | ``` 12 | kd> rdmsr c0000101 13 | msr[c0000101] = 0x41414141 14 | kd> rdmsr c0000102 15 | msr[c0000102] = 0x42424242 16 | ``` 17 | 18 | If they are the same, youll need to hit the bp again. I have no idea why this 19 | happens. 20 | 21 | ``` 22 | kd> .scriptload c:\path\to\bdump.js 23 | kd> !bdump "c:\\path\\to\\dump" 24 | ``` 25 | 26 | This will create two files: 27 | - `dump\mem.dmp` is a standard windows dumpfile. Note that this is _not_ a 28 | minidump! 29 | - `dump\regs.json` is a JSON object containing all the register values encoded 30 | as hex strings. E.g 31 | 32 | ```json 33 | {"rax": "0x41414141", "rbx": "0x42424242", ... } 34 | ``` 35 | 36 | There is also a `cpu-jtoy.py` script which converts the registers from JSON to 37 | YAML. This file is vanilla YAML and is used to allow hex encoding of integers 38 | and avoid the 53-bit limitation in some JSON implementations. Hopefully with 39 | BigInt support in more JS engines soon I can drop this... 40 | 41 | This script will also add in the AVX register state with all values zeroed. 42 | 43 | ## Caveats 44 | - doesn't currently capture `xmm`/`ymm`/`zmm` register states 45 | - doesn't currently capture `mxcsr_mask` 46 | - capturing segments (especially `tr` and `es`) is kinda sketchy 47 | -------------------------------------------------------------------------------- /bdump.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const log = e => host.diagnostics.debugLog(e); 4 | const logln = e => log(e + '\n'); 5 | const hex = e => '0x' + e .toString(16); 6 | 7 | const is_usermode = e => e.bitwiseShiftRight(63) == 0; 8 | 9 | function usage() { 10 | logln('[bdump] Usage: !bdump "C:\\\\path\\\\to\\\\dump"') 11 | logln('[bdump] Usage: !bdump_full "C:\\\\path\\\\to\\\\dump"') 12 | logln('[bdump] Usage: !bdump_active_kernel "C:\\\\path\\\\to\\\\dump"') 13 | logln('[bdump] This will create a dump directory and fill it with a memory and register files'); 14 | logln('[bdump] NOTE: you must include the quotes and escape the backslashes!'); 15 | } 16 | 17 | function __hex_obj(o) { 18 | for (let k in o) { 19 | // if limit is in our obj, its almost definitely a seg reg and we 20 | // should recurse instead of hexing 21 | // if 0 is in our obj, its probably an array and we should also 22 | // recurse 23 | if (typeof o[k] === 'object' && ('limit' in o[k] || 0 in o[k])) { 24 | __hex_obj(o[k]); 25 | } else if (typeof o[k] == 'boolean') { 26 | continue; 27 | } else { 28 | o[k] = hex(o[k]); 29 | } 30 | } 31 | } 32 | 33 | function __create_dir(path) { 34 | const Control = host.namespace.Debugger.Utility.Control; 35 | 36 | Control.ExecuteCommand('.shell -i- mkdir ' + path); 37 | } 38 | 39 | function __rdmsr(msr_num) { 40 | const Control = host.namespace.Debugger.Utility.Control; 41 | 42 | let line = Control.ExecuteCommand('rdmsr ' + hex(msr_num)).Last(); 43 | // 1: kd> rdmsr 3a 44 | // msr[3a] = 00000000`00000001 45 | let chunks = line.split(' '); 46 | return host.parseInt64(chunks[chunks.length - 1].replace('`', ''), 16); 47 | } 48 | 49 | // must run after __collect_segs 50 | function __collect_msrs(regs) { 51 | const msrs = { 52 | "tsc": 0x10, 53 | "apic_base": 0x1b, 54 | "sysenter_cs": 0x174, 55 | "sysenter_esp": 0x175, 56 | "sysenter_eip": 0x176, 57 | "pat": 0x277, 58 | "efer": 0xc0000080, 59 | "star": 0xc0000081, 60 | "lstar": 0xc0000082, 61 | "cstar": 0xc0000083, 62 | "sfmask": 0xc0000084, 63 | "kernel_gs_base": 0xc0000102, 64 | "tsc_aux": 0xc0000103, 65 | }; 66 | 67 | for (let msr in msrs) { 68 | regs[msr] = __rdmsr(msrs[msr]); 69 | } 70 | 71 | // windbg shows these are zero because it stores the values in the msrs 72 | const msr_fs_base = 0xc0000100; 73 | regs.fs.base = __rdmsr(msr_fs_base); 74 | const msr_gs_base = 0xc0000101; 75 | regs.gs.base = __rdmsr(msr_gs_base); 76 | } 77 | 78 | function __collect_seg(n) { 79 | const Control = host.namespace.Debugger.Utility.Control; 80 | 81 | let r = {}; 82 | let line = Control.ExecuteCommand('dg @' + n).Last(); 83 | // Sel Base Limit Type l ze an es ng Flags 84 | // ---- ----------------- ----------------- ---------- - -- -- -- -- -------- 85 | // 0033 00000000`00000000 00000000`00000000 Code RE Ac 3 Nb By P Lo 000002fb 86 | if (line.indexOf('Unable to get descriptor') != -1) { 87 | logln('[bdump] could not recover ' + n + '!'); 88 | return null; 89 | } 90 | 91 | let chunks = line.split(' '); 92 | 93 | r.present = chunks[9] == 'P'; 94 | r.selector = host.parseInt64(chunks[0], 16); 95 | r.base = host.parseInt64(chunks[1].replace('`', ''), 16); 96 | r.limit = host.parseInt64(chunks[2].replace('`', ''), 16); 97 | r.attr = host.parseInt64(chunks[chunks.length - 1].replace('`', ''), 16); 98 | // attr needs to be fixed to include bits 16-19 of limit 99 | // since the flags field from windbg removes these bits 100 | // bochs and windows hypervisor platform expect these bits to be correct 101 | // 102 | // See Figure 3.8 (Segment Descriptor) in section 3.4.5 (Segment Descriptors) in Volume 3A of the intel manuals. 103 | r.attr = r.attr.bitwiseAnd(0xFF) + r.attr.bitwiseAnd(0xF00).bitwiseShiftLeft(4) + 104 | r.limit.bitwiseShiftRight(8).bitwiseAnd(0xF00); 105 | 106 | return r; 107 | } 108 | 109 | // must run before __collect_msrs 110 | function __collect_segs(regs) { 111 | regs.es = __collect_seg('es'); 112 | regs.cs = __collect_seg('cs'); 113 | regs.ss = __collect_seg('ss'); 114 | regs.ds = __collect_seg('ds'); 115 | // these values below will be wrong -- reading msrs will give us the 116 | // correct ones 117 | regs.fs = __collect_seg('fs'); 118 | regs.gs = __collect_seg('gs'); 119 | 120 | regs.tr = __collect_seg('tr'); 121 | // our ghetto string parsing is wrong for tr, force it to true 122 | regs.tr.present = true; 123 | 124 | regs.ldtr = __collect_seg('ldtr'); 125 | } 126 | 127 | function __collect_user(regs) { 128 | const User = host.currentThread.Registers.User; 129 | 130 | regs.rax = User.rax; 131 | regs.rbx = User.rbx; 132 | regs.rcx = User.rcx; 133 | regs.rdx = User.rdx; 134 | regs.rsi = User.rsi; 135 | regs.rdi = User.rdi; 136 | regs.rip = User.rip; 137 | regs.rsp = User.rsp; 138 | regs.rbp = User.rbp; 139 | regs.r8 = User.r8; 140 | regs.r9 = User.r9; 141 | regs.r10 = User.r10; 142 | regs.r11 = User.r11; 143 | regs.r12 = User.r12; 144 | regs.r13 = User.r13; 145 | regs.r14 = User.r14; 146 | regs.r15 = User.r15; 147 | 148 | regs.rflags = User.efl; 149 | 150 | if (is_usermode(regs.rip)) { 151 | regs.dr0 = User.dr0; 152 | regs.dr1 = User.dr1; 153 | regs.dr2 = User.dr2; 154 | regs.dr3 = User.dr3; 155 | regs.dr6 = User.dr6; 156 | regs.dr7 = User.dr7; 157 | } 158 | } 159 | 160 | function __collect_fp(regs) { 161 | const Fprs = host.currentThread.Registers.FloatingPoint; 162 | 163 | regs.fpcw = Fprs.fpcw; 164 | regs.fpsw = Fprs.fpsw; 165 | regs.fptw = Fprs.fptw; 166 | 167 | regs.fpst = Array(); 168 | regs.fpst[0] = Fprs.st0; 169 | regs.fpst[1] = Fprs.st1; 170 | regs.fpst[2] = Fprs.st2; 171 | regs.fpst[3] = Fprs.st3; 172 | regs.fpst[4] = Fprs.st4; 173 | regs.fpst[5] = Fprs.st5; 174 | regs.fpst[6] = Fprs.st6; 175 | regs.fpst[7] = Fprs.st7; 176 | } 177 | 178 | function __collect_simd(regs) { 179 | const Simd = host.currentThread.Registers.SIMD; 180 | 181 | // XXX TODO XXX 182 | 183 | if (is_usermode(regs.rip)) { 184 | regs.mxcsr = Simd.mxcsr; 185 | } 186 | } 187 | 188 | function __collect_kern(regs) { 189 | const Kern = host.currentThread.Registers.Kernel; 190 | 191 | regs.cr0 = Kern.cr0; 192 | regs.cr2 = Kern.cr2; 193 | regs.cr3 = Kern.cr3; 194 | regs.cr4 = Kern.cr4; 195 | regs.cr8 = Kern.cr8; 196 | 197 | regs.xcr0 = Kern.xcr0; 198 | 199 | regs.gdtr = {}; 200 | regs.gdtr.base = Kern.gdtr; 201 | regs.gdtr.limit = Kern.gdtl; 202 | 203 | regs.idtr = {}; 204 | regs.idtr.base = Kern.idtr; 205 | regs.idtr.limit = Kern.idtl; 206 | 207 | if (!is_usermode(regs.rip)) { 208 | regs.dr0 = Kern.kdr0; 209 | regs.dr1 = Kern.kdr1; 210 | regs.dr2 = Kern.kdr2; 211 | regs.dr3 = Kern.kdr3; 212 | regs.dr6 = Kern.kdr6; 213 | regs.dr7 = Kern.kdr7; 214 | 215 | regs.mxcsr = Kern.kmxcsr; 216 | } 217 | } 218 | 219 | function __fixup_regs(regs) { 220 | // regs im not sure how to get out of windbg... 221 | logln("[bdump] don't know how to get mxcsr_mask or fpop, setting mxcsr_mask to 0xffbf and fpop to zero..."); 222 | regs.mxcsr_mask = 0xffbf; // default value from linux kernel: https://elixir.bootlin.com/linux/latest/source/arch/x86/kernel/fpu/init.c#L117 223 | regs.fpop = 0; 224 | logln('[bdump]'); 225 | 226 | logln("[bdump] don't know how to get avx registers, skipping..."); 227 | logln('[bdump]'); 228 | 229 | // top 32 bits of tr are incorrectly either 0 or -1 230 | // take the top bits from the gdtr and OR them in to get a valid tr 231 | if (regs.tr.base.bitwiseShiftRight(32) == host.Int64(0xffffffff) 232 | || regs.tr.base.bitwiseShiftRight(32) == host.Int64(0)) { 233 | logln('[bdump] tr.base is not cannonical...'); 234 | logln('[bdump] old tr.base: ' + hex(regs.tr.base)); 235 | 236 | const low32 = host.Int64(0xffffffff); 237 | let new_tr = regs.tr.base.bitwiseAnd(low32); 238 | 239 | const hi32 = host.parseInt64('0xffffffff00000000', 16); 240 | const top_bits = regs.gdtr.base.bitwiseAnd(hi32); 241 | 242 | new_tr = new_tr.bitwiseOr(top_bits); 243 | regs.tr.base = new_tr; 244 | 245 | logln('[bdump] new tr.base: ' + hex(regs.tr.base)); 246 | logln('[bdump]'); 247 | } 248 | 249 | // kernel/user gs can be swapped when reading from msrs 250 | if (is_usermode(regs.rip) != is_usermode(regs.gs.base)) { 251 | logln("[bdump] rip and gs don't match kernel/user, swapping..."); 252 | const tmp = regs.kernel_gs_base; 253 | regs.kernel_gs_base = regs.gs.base; 254 | regs.gs.base = tmp; 255 | logln('[bdump] rip: ' + hex(regs.rip)); 256 | logln('[bdump] new gs.base: ' + hex(regs.gs.base)); 257 | logln('[bdump] new kernel_gs_base: ' + hex(regs.kernel_gs_base)); 258 | logln('[bdump]'); 259 | } 260 | 261 | if (is_usermode(regs.rip) && regs.cr8 != 0) { 262 | logln("[bdump] non-zero IRQL in usermode, resetting to zero..."); 263 | regs.cr8 = 0; 264 | } 265 | 266 | // if es was lost to the void, copy ds 267 | if (regs.es === null) { 268 | logln("[bdump] could not recover es, copying ds..."); 269 | regs.es = {}; 270 | Object.assign(regs.es, regs.ds); 271 | logln('[bdump]'); 272 | } 273 | 274 | // turn everything into strings because javascript 275 | __hex_obj(regs); 276 | } 277 | 278 | function __collect_regs() { 279 | let regs = {}; 280 | 281 | __collect_user(regs); 282 | __collect_segs(regs); 283 | __collect_msrs(regs); 284 | __collect_fp(regs); 285 | __collect_simd(regs); 286 | __collect_kern(regs); 287 | 288 | return regs; 289 | } 290 | 291 | function __save_regs(path, regs) { 292 | const Fs = host.namespace.Debugger.Utility.FileSystem; 293 | 294 | const data = JSON.stringify(regs); 295 | 296 | let file = Fs.CreateFile(path + '\\regs.json'); 297 | let writer = Fs.CreateTextWriter(file); 298 | 299 | writer.WriteLine(data); 300 | file.Close(); 301 | } 302 | 303 | function __save_mem(dmp_type, path) { 304 | const control = host.namespace.Debugger.Utility.Control; 305 | const options = new Map([ 306 | ['full', '/f'], 307 | ['active-kernel', '/ka'] 308 | ]); 309 | 310 | const option = options.get(dmp_type); 311 | if(option == undefined) { 312 | logln(`[bdump] ${dmp_type} is an unknown type`); 313 | return; 314 | } 315 | 316 | for (const line of control.ExecuteCommand(`.dump ${option} ${path}\\mem.dmp`)) { 317 | logln(`[bdump] ${line}`); 318 | } 319 | } 320 | 321 | function __bdump(dmp_type, path) { 322 | if (path == undefined) { 323 | usage(); 324 | return; 325 | } 326 | 327 | logln('[bdump] creating dir...'); 328 | __create_dir(path); 329 | 330 | logln('[bdump] saving regs...'); 331 | const regs = __collect_regs(); 332 | logln('[bdump] register fixups...'); 333 | __fixup_regs(regs); 334 | __save_regs(path, regs); 335 | logln('[bdump] saving mem, get a coffee or have a smoke, this will probably take around 10-15 minutes...'); 336 | __save_mem(dmp_type, path); 337 | 338 | logln('[bdump] done!'); 339 | } 340 | 341 | function __bdump_full(path) { 342 | return __bdump('full', path); 343 | } 344 | 345 | function __bdump_active_kernel(path) { 346 | return __bdump('active-kernel', path); 347 | } 348 | 349 | function initializeScript() { 350 | usage(); 351 | 352 | return [ 353 | new host.apiVersionSupport(1, 2), 354 | new host.functionAlias(__bdump_full, "bdump_full"), 355 | 356 | new host.functionAlias(__bdump_active_kernel, "bdump_active_kernel"), 357 | new host.functionAlias(__bdump_active_kernel, "bdump"), 358 | ]; 359 | } 360 | -------------------------------------------------------------------------------- /cpu-jtoy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import json 4 | import sys 5 | import yaml 6 | 7 | """ 8 | This is an annoying hack -- Right now the windbg js dumps integer values as 9 | hex strings, e.g.: 10 | { "rax": "0x4141" } 11 | 12 | This is because js ints are 53 bit, so we can't actually store full 64b 13 | registers. Writing a decoder for this in rust kind sucks for two reasons: 14 | - We'd need to create a separate decode function for each bitsize we want 15 | to decode, e.g. one for u16, u32, and u64 16 | - We'd need to create a separate decode function for each array size we 17 | want to decide, e.g. [u64; 4 vs [u64; 6] 18 | 19 | While this is definitely doable, I'd rather just bang out a json to yaml 20 | converter and have the mofo load yaml instead. 21 | """ 22 | 23 | def usage(): 24 | print('Usage: %s ' % sys.argv[0]) 25 | sys.exit(-1) 26 | 27 | # json hex string decoder class 28 | # https://stackoverflow.com/questions/45068797/how-to-convert-string-int-json-into-real-int-with-json-loads 29 | class HexStr(json.JSONDecoder): 30 | def decode(self, s): 31 | result = super(HexStr, self).decode(s) 32 | return self._decode(result) 33 | 34 | def _decode(self, o): 35 | if isinstance(o, str) or isinstance(o, unicode): 36 | try: 37 | return int(o, 0) 38 | except ValueError: 39 | try: 40 | return float(o) 41 | except ValueError: 42 | return o 43 | elif isinstance(o, dict): 44 | return {k: self._decode(v) for k, v in o.items()} 45 | elif isinstance(o, list): 46 | return [self._decode(v) for v in o] 47 | else: 48 | return o 49 | 50 | # yaml hex encoder functions 51 | # https://stackoverflow.com/questions/9100662/how-to-print-integers-as-hex-strings-using-json-dumps-in-python/9101562#9101562 52 | def hexint_presenter(dumper, data): 53 | return dumper.represent_int('%#x' % data) 54 | 55 | def unicode_representer(dumper, uni): 56 | node = yaml.ScalarNode(tag=u'tag:yaml.org,2002:str', value=uni) 57 | return node 58 | 59 | yaml.add_representer(int, hexint_presenter) 60 | yaml.add_representer(long, hexint_presenter) 61 | yaml.add_representer(unicode, unicode_representer) 62 | 63 | def main(): 64 | if len(sys.argv) != 2: usage() 65 | 66 | json_cpu_file = sys.argv[1] 67 | 68 | with open(json_cpu_file, 'rb') as h: 69 | j = json.load(h, cls=HexStr) 70 | 71 | # HACK HACK HACK 72 | # remove when I acutally lift zmm state from windbg 73 | j['zmm'] = [] 74 | for ii in range(32): 75 | j['zmm'].append({ 'q': [0 for _ in range(8)] }) 76 | 77 | y = yaml.dump(j) 78 | 79 | print(y) 80 | 81 | if __name__ == '__main__': 82 | main() 83 | --------------------------------------------------------------------------------