├── LICENSE.txt ├── README.txt └── protod /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 SYSDREAM 2 | 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is furnished 9 | to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 15 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 16 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 17 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 18 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 19 | OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | 21 | Author: Damien Cauquil -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | Protod - Protobuf's metadata extractor 2 | (c) 2012, Sysdream (d.cauquil@sysdream.com) 3 | 4 | WHAT IS PROTOD ? 5 | ---------------- 6 | 7 | Protod is a tool able to extract Google's protobuf metadata from any binary 8 | file. This version has been designed to cover every file format. 9 | 10 | The goal of this tool is to recover serialized protobuf's metadata inserted 11 | at compilation time inside an executable, and to make it available as .proto 12 | file, ready to compile with protoc (protobuf's compiler). 13 | 14 | For further information on Google's protobuf library, please see: 15 | 16 | https://developers.google.com/protocol-buffers/docs/overview 17 | 18 | 19 | HOW TO USE THIS TOOL ? 20 | ---------------------- 21 | 22 | Its usage is very simple. Here is a sample: 23 | 24 | To extract every metadata file (.proto) from a given executable: 25 | 26 | $ python protod.py somebinary 27 | 28 | 29 | IS THIS TOOL LIMITED ? 30 | ---------------------- 31 | 32 | Current version does not support every kind of fields, we are aware of this. 33 | It was developed as a proof-of-concept to demonstrate this technique, and of 34 | course you are more than welcome to contribute ! 35 | 36 | Feel free to fork this project on Github, and let us know about your issues 37 | and ideas ! 38 | 39 | 40 | THANKS 41 | ------ 42 | 43 | Great thanks to UNclePecos for his time, and all the Sysdream's staff for 44 | their support. 45 | -------------------------------------------------------------------------------- /protod: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | Protod, version 1.1 - Generic version 5 | 6 | Copyright (c) 2012 SYSDREAM 7 | 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is furnished 14 | to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 20 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 21 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 23 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 24 | OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 | 26 | Author: Damien Cauquil 27 | 28 | """ 29 | 30 | import sys 31 | import os 32 | 33 | # require google's protobuf library 34 | from google.protobuf.descriptor_pb2 import FileDescriptorProto, FieldDescriptorProto 35 | from google.protobuf.message import DecodeError 36 | 37 | 38 | ########### 39 | # helpers 40 | ########### 41 | 42 | def is_valid_filename(filename): 43 | ''' 44 | Check if given filename may be valid 45 | ''' 46 | charset = 'abcdefghijklmnopqrstuvwxyz0123456789-_/$,.[]()' 47 | for char in filename.lower(): 48 | if char not in charset: 49 | return False 50 | return True 51 | 52 | 53 | def decode_varint128(stream): 54 | ''' 55 | Decode Varint128 from buffer 56 | ''' 57 | bits = '' 58 | count = 0 59 | for stream_byte in stream: 60 | count += 1 61 | raw_byte = ord(stream_byte) 62 | bits += (bin((raw_byte&0x7F))[2:]).rjust(7,'0') 63 | if (raw_byte&0x80) != 0x80: 64 | break 65 | return (int(bits, 2), count) 66 | 67 | 68 | def render_type(field_type, package): 69 | ''' 70 | Return the string representing a given type inside a given package 71 | ''' 72 | i = 0 73 | nodes = field_type.split('.') 74 | nodes_ = package.split('.') 75 | for i in range(len(nodes)): 76 | if i < len(nodes_): 77 | if nodes[i] != nodes_[i]: 78 | return '.'.join(nodes[i:]) 79 | else: 80 | return '.'.join(nodes[i:]) 81 | return '.'.join(nodes[i:]) 82 | 83 | 84 | ############################# 85 | # Protobuf fields walker 86 | ############################# 87 | 88 | class ProtobufFieldsWalker: 89 | ''' 90 | Homemade Protobuf fields walker 91 | 92 | This class allows Protod to walk the fields 93 | and determine the probable size of the protobuf 94 | serialized file. 95 | ''' 96 | 97 | def __init__(self, stream): 98 | self._stream = stream 99 | self._size = -1 100 | 101 | def get_size(self): 102 | return self._size 103 | 104 | def walk(self): 105 | end = False 106 | offset = 0 107 | while (not end) and (offset0: 202 | for nested in field.nested_type: 203 | buffer += self.renderField(nested, depth+1, _package, nested=True) 204 | if len(field.enum_type)>0: 205 | for enum in field.enum_type: 206 | buffer += self.renderEnum(enum, depth+1, _package) 207 | if len(field.field)>0: 208 | for field in field.field: 209 | buffer += self.renderField(field, depth+1, _package) 210 | buffer += '%s}' % (' '*depth) 211 | buffer += '\n\n' 212 | return buffer 213 | 214 | 215 | def render(self, filename=None): 216 | print '[+] Processing %s' % self.desc.name 217 | buffer = '' 218 | buffer += 'package %s;\n\n' % self.desc.package 219 | 220 | # add dependencies 221 | if len(self.desc.dependency)>0: 222 | for dependency in self.desc.dependency: 223 | buffer += 'import "%s";\n' % dependency 224 | buffer += '\n' 225 | 226 | if len(self.desc.enum_type)>0: 227 | for enum in self.desc.enum_type: 228 | buffer += self.renderEnum(enum, package=self.desc.package) 229 | if len(self.desc.message_type)>0: 230 | for message in self.desc.message_type: 231 | buffer += self.renderField(message, package=self.desc.package) 232 | if filename: 233 | _dir = os.path.dirname(filename) 234 | if _dir != '' and not os.path.exists(_dir): 235 | os.makedirs(_dir) 236 | open(filename,'w').write(buffer) 237 | else: 238 | _dir = os.path.dirname(self.desc.name) 239 | if _dir != '' and not os.path.exists(_dir): 240 | os.makedirs(_dir) 241 | open(self.desc.name,'w').write(buffer) 242 | 243 | ############################# 244 | # Main code 245 | ############################# 246 | 247 | class ProtobufExtractor: 248 | def __init__(self, filename=None): 249 | self.filename = filename 250 | 251 | def extract(self): 252 | try: 253 | content = open(self.filename,'rb').read() 254 | 255 | # search all '.proto' strings 256 | protos = [] 257 | stream = content 258 | while len(stream)>0: 259 | try: 260 | r = stream.index('.proto') 261 | for j in range(64): 262 | try: 263 | if decode_varint128(stream[r-j:])[0]==(j+5) and is_valid_filename(stream[r-j+1:r+6]): 264 | # Walk the fields and get a probable size 265 | walker = ProtobufFieldsWalker(stream[r-j-1:]) 266 | walker.walk() 267 | probable_size = walker.get_size() 268 | 269 | """ 270 | Probable size approach is not perfect, 271 | we add a delta of 1024 bytes to be sure 272 | not to miss something =) 273 | """ 274 | for k in range(probable_size+1024, 0, -1): 275 | try: 276 | fds = FileDescriptorProto() 277 | fds.ParseFromString(stream[r-j-1:r-j-1+k]) 278 | protos.append(stream[r-j-1:r-j-1+k]) 279 | print '[i] Found protofile %s (%d bytes)' % (stream[r-j+1:r+6], k) 280 | break 281 | except DecodeError: 282 | pass 283 | except UnicodeDecodeError: 284 | pass 285 | break 286 | except IndexError: 287 | pass 288 | stream = stream[r+6:] 289 | except ValueError: 290 | break 291 | 292 | # Load successively each binary proto file and rebuild it from scratch 293 | seen = [] 294 | for content in protos: 295 | try: 296 | # Load the prototype 297 | fds = FileDescriptorProto() 298 | fds.ParseFromString(content) 299 | res = FileDescriptorDisassembler(fds) 300 | if len(res.desc.name)>0: 301 | if res.desc.name not in seen: 302 | open(res.desc.name+'.protoc','wb').write(content) 303 | res.render() 304 | seen.append(res.desc.name) 305 | except DecodeError: 306 | pass 307 | 308 | except IOError: 309 | print '[!] Unable to read %s' % sys.argv[1] 310 | 311 | if __name__ == '__main__': 312 | if len(sys.argv)>=2: 313 | print "[i] Extracting from %s ..." % sys.argv[1] 314 | extractor = ProtobufExtractor(sys.argv[1]) 315 | extractor.extract() 316 | print "[i] Done" 317 | else: 318 | print "[ Protod (Protobuf metadata extractor) (c) 2012 Sysdream ]" 319 | print '' 320 | print '[i] Usage: %s [executable]' % sys.argv[0] 321 | --------------------------------------------------------------------------------