├── README
├── vcg_GraphLink.py
├── vcg_GraphNode.py
├── vcg_parse.py
├── vcg_Graph.py
├── rtti.parser.py
└── x86_RE_lib.py


/README:
--------------------------------------------------------------------------------
1 | zynamics RTTI Helper Scripts have moved to Google Code
2 | ======================================================
3 | 
4 | This repository has moved to Google Code:
5 | http://code.google.com/p/zynamics/source/checkout?repo=rtti-helper


--------------------------------------------------------------------------------
/vcg_GraphLink.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | class vcgGraphLink:
 3 |   """A class that represents the internal Graphlink used for dealing with .vcg's
 4 |   
 5 |   Some longer documentation will be put here in the future
 6 |   
 7 |   """      
 8 |   def __init__(self, source, target, attributes = { 'color':'black' }):
 9 |     self.sourcename = source
10 |     self.targetname = target
11 |     self.attributes = {}
12 |     for k, v in attributes.items():
13 |       self.attributes[ k ] = v
14 |   
15 |   def get_sourcename( self ):
16 |     return self.sourcename
17 |   
18 |   def get_targetname( self ):
19 |     return self.targetname
20 |   
21 |   def set_attribute( self, attrib_name, attrib_string ):
22 |     self.attributes[ attrib_name ] = attrib_string
23 |     
24 |   def get_attribute( self, attrib_name ):
25 |     return self.attributes[ attrib_name ]
26 |     
27 |   def add_attributes( self, attributes ):
28 |     for k, v in attributes.items():
29 |       self.attributes[ k ] = v
30 |     
31 |   def make_vcg_output( self ):
32 |     output = 'edge: { sourcename: "' + self.sourcename + '" targetname: "' + self.targetname +'" \n\t'
33 |     for k, v in self.attributes.items():
34 |       output = output + k + ": " + v + "\n\t"
35 |     output = output + '}\n'
36 |     return output
37 |     
38 |   def parse_vcg_output( self, vcg_string ):
39 |     print "Not implemented yet !"
40 | 
41 |   def make_daVinci_output( self ):
42 |     Unique_ID = self.sourcename+"_"+self.targetname
43 |     davinci_string = 'l("edge_' + Unique_ID + '", e("",[], r("node_'+ self.targetname +'")))'
44 |     return davinci_string
45 |     
46 |   def make_GML_output( self ):
47 |     GMLstring = '\tedge\n\t[\n\t\tsource "' + self.sourcename + '"\n'
48 |     GMLstring += '\t\ttarget "' + self.targetname + '"\n'
49 |     colorstring = '#000000'
50 |     for k, v in self.attributes.items():
51 |       if k == "label":
52 |         GMLstring += '\t\tlabel '+ self.attributes[ "label" ] + '\n'
53 |       if k == "color" and v == "red":
54 |         colorstring = '#CC0000'
55 |       if k == "color" and v == "green":
56 |         colorstring = '#00CC00'
57 |       if k == "color" and v == "blue":
58 |         colorstring = '#0000CC'
59 |     GMLstring += '\t\tgraphics\n\t\t[\n\t\t\tfill "'+ colorstring + '"\n\t\t\ttargetArrow "standard"\n\t\t]\n\t]\n'
60 |     return GMLstring


--------------------------------------------------------------------------------
/vcg_GraphNode.py:
--------------------------------------------------------------------------------
 1 | """A class hat represents the internal Graphnode used for dealing with .vcg's  
 2 | 
 3 | Some longer documentation will be put here in the future
 4 | """
 5 | import string, vcg_parse
 6 | class vcgGraphNode:
 7 |   """A class that represents the internal Graphnode used for dealing with .vcg's
 8 |   
 9 |   Some longer documentation will be put here in the future
10 |   
11 |   """  
12 |   def __init__(self, name_A, attributes_A = { 'color':'white' }):
13 |     self.name = name_A
14 |     self.attributes = {}
15 |     self.visited = 0
16 |     for k, v in attributes_A.items():
17 |       self.attributes[ k ] = v
18 |     
19 |   def get_name( self ):
20 |     return self.name
21 |     
22 |   def set_attribute( self, attrib_name, attrib_string ):
23 |     self.attributes[ attrib_name ] = attrib_string
24 |     
25 |   def get_attribute( self, attrib_name ):
26 |     return self.attributes[ attrib_name ]
27 |     
28 |   def add_attributes( self, attributes ):
29 |     for k, v in attributes.items():
30 |       self.attributes[ k ] = v
31 |   
32 |   def make_vcg_output( self ):
33 |     output = 'node:\t{\n\ttitle: "' + self.name + '"\n\t'
34 |     for k, v in self.attributes.items():
35 |       output = output + k + ": " + v + "\n"
36 |     output = output + '}\n'
37 |     return output
38 |     
39 |   def parse_vcg_output( self, vcg_string ):
40 | #    print "--->"+ vcg_string 
41 | # first off, retrieve the label and remove it (only multi-line attribute there is
42 | #    self.attributes[ "label" ] = vcg_parse.vcg_get_enclosed_attribute( vcg_string, "label" )
43 | #    print self.attributes[ "label" ]
44 | #    if self.attributes[ "label" ] == 0:
45 | #     self.attributes[ "label" ] = '" "'
46 | #      print "NO label found, weird !"
47 | #    else:
48 | #      vcg_string = string.replace( vcg_string, 'label: '+ self.attributes[ "label" ], "" )
49 |     tokens = string.split( vcg_string )
50 |     for k in tokens:
51 |       if k[-1] == ':':
52 |         # an attribute -- find the value
53 |         i = 0;
54 |         while i != len( tokens ):
55 |           if (tokens[ i ] == k) and (tokens[ i ] != "title:" ) and (tokens[ i ] != "node:"):# and (tokens[ i ] != "label:"):
56 | #            print "Setting attribute "+k+" to value "+ tokens[i + 1]+"\n"
57 |             self.attributes[ k[0:-1] ] = tokens[ i + 1 ]
58 |           i = 1+i
59 | 
60 |   def make_daVinci_output( self, graph ):
61 |     davinci_string = 'l("node_'+self.name+'", n("", [ a("OBJECT","'+ self.attributes[ "label" ][1:-1] + '")], [ '
62 |     # now add all the links
63 |     linkset = graph.Get_Downlinks_From( self.name )
64 |     if linkset != 0:
65 |       for k in linkset:
66 |         davinci_string += k.make_daVinci_output()
67 |         davinci_string += ','
68 |     davinci_string2 = davinci_string[:-1] + ']))\n'
69 |     return davinci_string2
70 |     
71 |   def make_GML_output( self, graph ):
72 |     gml_output = '\tnode\n\t[\n\t\tid "' + self.name + '"\n'
73 |     gml_output += '\t\tlabel '+ self.attributes[ "label" ] + '\n'
74 |     gml_output += '\t\tgraphics\n\t\t[\n\t\t\ttype "rectangle"\n\t\t\t'
75 |     if self.attributes["color"] == "red":
76 |         gml_output += 'fill "#FF2020"'
77 |     else:
78 |         gml_output += 'fill "#CCCCFF"'
79 |     gml_output += '\n\t\t\toutline "#000000"\n\t\t]\n\t]\n'
80 |     return gml_output


--------------------------------------------------------------------------------
/vcg_parse.py:
--------------------------------------------------------------------------------
  1 | import string
  2 | import vcg_Graph
  3 | 
  4 | def vcg_get_item( input_string, itemtype ):
  5 |   """
  6 |   Gets an item
  7 |   """
  8 |   index = string.find( input_string, itemtype+":" )
  9 |   if index == -1:
 10 |     return 0
 11 |   count = 1
 12 |   idx = index
 13 |   while input_string[ idx ] != "{":
 14 |     idx = idx + 1
 15 |     if idx > len( input_string ):
 16 |       return 0
 17 |   idx = idx + 1
 18 |   while count != 0:
 19 |     if input_string[ idx ] == "{":
 20 |       count = count + 1
 21 |     if input_string[ idx ] == "}":
 22 |       count = count - 1
 23 |     if idx > len( input_string ):
 24 |       idx = -1
 25 |       break
 26 |     idx = idx + 1
 27 |   if idx == -1:
 28 |     return 0
 29 |   return input_string[ index:idx ]
 30 | 
 31 | def get_enclosed( input_string, enclosechar ):
 32 |   """
 33 |   gets a string enclosed in enclosechar
 34 |   """
 35 |   index = 0
 36 |   while index <= len( input_string ):
 37 |     if input_string[ index ] == enclosechar[ 0 ]:
 38 |       break
 39 |     index = index + 1
 40 |   
 41 |   index2 = index + 1
 42 |   while index2 <= len( input_string ):
 43 |     if input_string[ index2 ] == enclosechar[ 0 ]:
 44 |       break
 45 |     index2 = index2 + 1
 46 |   
 47 |   return input_string[ index : index2+1 ]
 48 | 
 49 | def vcg_get_enclosed_attribute( input_string, attrib_name ):
 50 | #  print "Searching for " + attrib_name
 51 | #  print "Searching in " + input_string
 52 |   index = input_string.find(attrib_name + ":")
 53 |   if index == -1:
 54 |     return 0
 55 |   string = get_enclosed( input_string[index :], '"' )
 56 |   return string
 57 | 
 58 | def vcg_get_attribute( input_string, attrib_name ):
 59 |   index = input_string.find(attrib_name + ":")
 60 |   if index == -1:
 61 |     return 0
 62 |   string = input_string[ index + len(attrib_name) + 2 : ]
 63 |   string = string[ : string.find(" ")]
 64 |   return string
 65 | 
 66 | def vcg_node_attributes():
 67 |   attriblist = [ "color" ]
 68 |   return
 69 | 
 70 | def vcg_graph_attributes():
 71 |   fp = file( "graph_attributes.txt", "rt" )
 72 |   return fp.readlines()
 73 | 
 74 | def vcg_link_attributes():
 75 |   fp = file( "edge_attributes.txt", "rt" )
 76 |   return fp.readlines()
 77 |   
 78 | def vcg_node_attributes():
 79 |   fp = file( "node_attributes.txt", "rt" )
 80 |   return fp.readlines()
 81 | 
 82 | def vcg_string_to_graphs( input_string ):
 83 |   """
 84 |   Parses a VCG File to a list of graphs
 85 |   """
 86 |   graphlist = []
 87 |   graphstring = vcg_get_item( input_string, "graph" )   # get the first graph as a string
 88 |   
 89 |   while graphstring != 0:
 90 |     input_string.replace( graphstring, "" )     # remove the graph string from input
 91 |                                                 # get the title of the graph
 92 |     newgraph = vcg_Graph.vcgGraph( vcg_get_enclosed_attribute( graphstring, "title" ))   
 93 |                                                 # get the first node of the graph
 94 |     nodestring = vcg_get_item( graphstring, "node" )    
 95 |                                                 # loop over nodes...
 96 |     while nodestring != 0:
 97 |       graphstring.replace( nodestring, "" )     # remove node string from input
 98 |                                                 # get the name of the node
 99 |       newnode = newgraph.Add_Node( vcg_get_enclosed_attribute( nodestring, "title" ))
100 |                                                 # Iterate over all other attributes
101 |       
102 |       
103 |       
104 |     linkstring = vcg_get_item( graphstring, "link" )
105 |     while linkstring != 0:
106 |       graphstring.replace( linkstring, "" )
107 |     graphlist.append( newgraph )
108 |     graphstring = vcg_get_item( input_string, "graph" )
109 |     


--------------------------------------------------------------------------------
/vcg_Graph.py:
--------------------------------------------------------------------------------
  1 | """A generic file to manipulate graph description files (GDL)
  2 | 
  3 | (Put some longer documentation here in the future)
  4 | """
  5 | import vcg_GraphNode
  6 | import vcg_GraphLink
  7 | import vcg_parse
  8 | import string
  9 | 
 10 | 
 11 | class vcgGraph:
 12 | 	"""A class for manipulating .vcg Graph files (as used by Wingraph32 or AiSee)
 13 | 	
 14 | 	Some longer documentation will be put here in the future
 15 | 	
 16 | 	"""	
 17 | 	def __init__(self, attributes = {"title":'"Graph"', "manhattan_edges":"no", "layoutalgorithm":"maxdepth"}):
 18 | 		self.nodes = {}
 19 | 		self.links = []
 20 | 		self.attributes = {}
 21 | 		for k, v in attributes.items():
 22 | 			self.attributes[ k ] = v
 23 | 	
 24 | 	def add_attributes( self, attributes ):
 25 | 		for k, v in self.attributes.items():
 26 | 			self.attributes[ k ] = v
 27 | 
 28 | 	def has_node( self, name ):
 29 | 		if self.nodes.has_key( name ):
 30 | 			return 1
 31 | 		else:
 32 | 			return 0
 33 | 
 34 | 	def get_nodes( self ):
 35 | 		return self.nodes
 36 | 		
 37 | 	def get_links( self ):
 38 | 		return self.links
 39 | 
 40 | 	def set_attribute( self, attrib_name, attrib_string ):
 41 | 		self.attributes[ attrib_name ] = attrib_string
 42 | 		
 43 | 	def get_attribute( self, attrib_name ):
 44 | 		return self.attributes[ attrib_name ]
 45 | 
 46 | 	def gen_VCG_string(self):
 47 | 		output = "graph: { \n\t"
 48 | 		for k, v in self.attributes.items():
 49 | 			output = output + k + ": " + v + "\n\t"
 50 | 		for k, v in self.nodes.items():
 51 | 			output = output + v.make_vcg_output()
 52 | 		for k in self.links:
 53 | 			output = output + k.make_vcg_output()
 54 | 		output = output + "}\n"
 55 | 		return output
 56 | 		
 57 | 	def write_VCG_File(self, filename):
 58 | 		f = open( filename, 'w' )
 59 | 		s = self.gen_VCG_string()
 60 | 		f.write( s )
 61 | 		f.close()
 62 | 		
 63 | 	def load_VCG_File(self, filename):
 64 | 		file = open( filename, "r" )
 65 | 		completestring = file.read()				# read the entire file
 66 | 		# now start iterating through the file, first getting all nodes
 67 | 		in_node = vcg_parse.vcg_get_item( completestring, "node" )
 68 | 		while in_node != 0:
 69 | 			nodetitle = vcg_parse.vcg_get_enclosed_attribute( in_node, "title" )
 70 | 			print "Adding node with title: "+nodetitle[1:-1]
 71 | 			newnode = self.Add_Node( nodetitle[1:-1] ) 
 72 | 			newnode.parse_vcg_output( in_node )
 73 | 			# cut the node away ... 
 74 | 			chopstring = string.replace( completestring, in_node, "" )
 75 | 			completestring = chopstring
 76 | 			# get next one
 77 | 			in_node = vcg_parse.vcg_get_item( completestring, "node" )
 78 | 		in_link = vcg_parse.vcg_get_item( completestring, "edge" )		
 79 | 		while in_link != 0:
 80 | 			linksource = vcg_parse.vcg_get_enclosed_attribute( in_link, "sourcename" )
 81 | 			linktarget = vcg_parse.vcg_get_enclosed_attribute( in_link, "targetname" )
 82 | 			newlink = self.Add_Link( linksource[1:-1], linktarget[1:-1] )
 83 | 			# cut the link away
 84 | 			chopstring = string.replace( completestring, in_link, "" )
 85 | 			srchidx = string.find( chopstring, in_link )
 86 | 			completestring = chopstring
 87 | 			# get next one
 88 | 			in_link = vcg_parse.vcg_get_item( completestring, "edge" )
 89 | 			
 90 | 	def Add_Node(self, nodename):
 91 | 		self.nodes[ nodename ] = vcg_GraphNode.vcgGraphNode( nodename )
 92 | 		return self.nodes[ nodename ]
 93 | 	
 94 | 	def Add_Link(self, sourcename, targetname):
 95 | 		link = vcg_GraphLink.vcgGraphLink( sourcename, targetname )
 96 | 		self.links.append( link )
 97 | 		return link
 98 | 		
 99 | 	def Del_Node(self, nodename):
100 | 		# Create a temporary copy of the list
101 | 		removelinks = []
102 | 		for x in self.links:												# remove the links to/from 
103 | 			if x.targetname == nodename:
104 | 				removelinks.append( x )
105 | 			if x.sourcename == nodename:							# this node
106 | 				removelinks.append( x )
107 | 		for x in removelinks:
108 | 			if x in self.links:
109 | 				self.links.remove( x )
110 | 		del self.nodes[nodename]
111 | 		return
112 | 			
113 | 	def Get_Links( self, sourcename, targetname ):
114 | 		linkset = []
115 | 		for x in self.links:												# remove the relevant link
116 | 			if x.sourcename == sourcename and x.targetname == targetname:
117 | 				linkset.append( x )
118 | 		return linkset
119 | 		
120 | 	def Del_Link2(self, sourcename, targetname):
121 | 		for x in self.links:												# remove the relevant link
122 | 			if x.sourcename == sourcename and x.targetname == targetname:
123 | 				self.links.remove( x )
124 | 	
125 | 	def Del_Link( self, link ):
126 | 		self.links.remove( link )
127 | 	
128 | 	def Get_Downlinks_To( self, nodename ):
129 | 		linkslist = []
130 | 		for x in self.links:
131 | 			if x.targetname == nodename:
132 | 				linkslist.append( x )
133 | 		return linkslist
134 | 		
135 | 	def Get_Downlinks_From( self, nodename ):
136 | 		linkslist = []
137 | 		for x in self.links:
138 | 			if x.sourcename == nodename:
139 | 				linkslist.append( x )
140 | 		return linkslist
141 | 
142 | 	def Get_Parents( self, nodename ):
143 | 		parentnames = []
144 | 		linkslist = self.Get_Downlinks_To( nodename )
145 | 		for x in linkslist:
146 | 			parentnames.append( x.sourcename )
147 | 		return parentnames
148 | 		
149 | 	def Get_Children( self, nodename ):
150 | 		childnames = []
151 | 		linkslist = self.Get_Downlinks_From( nodename )
152 | 		for x in linkslist:
153 | 			childnames.append( x.targetname )
154 | 		return childnames
155 | 		
156 | 	def Get_Node( self, nodename ):
157 | 		is_valid_name = 0
158 | 		for x in self.nodes.keys():
159 | 			if x == nodename:
160 | 				is_valid_name = 1
161 | 		if is_valid_name:
162 | 			return self.nodes[ nodename ]
163 | 		return 0
164 | 
165 | 	def Get_Top_Nodes( self ):
166 | 		topnodes = []
167 | 		for x in self.nodes.keys():
168 | 			bLinkedTo = 0
169 | 			for y in self.links:
170 | 				if( x == y.targetname ):
171 | 					bLinkedTo = 1
172 | 					break
173 | 			if( bLinkedTo == 0 ):
174 | 				topnodes.append( x )
175 | 		return topnodes
176 | 		
177 | 	def Get_Bottom_Nodes( self ):
178 | 		bottom_nodes = []
179 | 		for x in self.nodes.keys():
180 | 			bLinkedFrom = 0
181 | 			for y in self.links:
182 | 				if( x == y.sourcename ):
183 | 					bLinkedFrom = 1
184 | 					break
185 | 			if( bLinkedFrom == 0 ):
186 | 				bottom_nodes.append( x )
187 | 		return bottom_nodes
188 | 	
189 | 	def Get_Nodes_Before( self, node ):
190 | 		for x in self.nodes.keys():
191 | 			self.nodes[ x ].visited = 0
192 | 		beforeset = []
193 | 		workset = []
194 | 		workset.append( node )
195 | 		print "Len(workset) is %d" % len( workset )
196 | 		while len( workset ) != 0:
197 | 			print beforeset
198 | 			nextnode = workset[ 0 ]
199 | 			workset = workset[1:]
200 | 			parentset = self.Get_Parents( nextnode )
201 | 			for x in parentset:
202 | 				if self.nodes[ x ].visited == 0:
203 | 					self.nodes[ x ].visited = 1
204 | 					beforeset.append( x )
205 | 					workset.append( x )
206 | 		print beforeset
207 | 		return beforeset
208 | 
209 | 	def get_subgraph_to( self, node ):
210 | 		tmpgrph = vcgGraph()
211 | 		tmpgrph.Add_Node( node )
212 | 		worklist = [ node ]
213 | 		while len( worklist ) > 0:
214 | 			currnode = worklist.pop( 0 )
215 | 			children = self.Get_Parents( currnode )
216 | 			for parent in parents:
217 | 				tmpgrph.Add_Link( parent, currnode )
218 | 				if not tmpgrph.has_node( parent ):
219 | 					tmpgrph.Add_Node( parent )
220 | 					worklist.append( parent )
221 | 		return tmpgrph
222 | 
223 | 	def get_subgraph_from( self, node ):
224 | 		tmpgrph = vcgGraph()
225 | 		tmpgrph.Add_Node( node )
226 | 		worklist = [ node ]
227 | 		while len( worklist ) > 0:
228 | 			currnode = worklist.pop( 0 )
229 | 			children = self.Get_Children( currnode )
230 | 			for child in children:
231 | 				tmpgrph.Add_Link( currnode, child )
232 | 				if not tmpgrph.has_node( child ):
233 | 					worklist.append( child )
234 | 					tmpgrph.Add_Node( child )
235 | 				
236 | 				
237 | 		return tmpgrph					
238 | 
239 | 	def Get_Nodes_After( self, node ):
240 | 		for x in self.nodes.keys():
241 | 			self.nodes[ x ].visited = 0
242 | 		beforeset = []
243 | 		workset = []
244 | 		workset.append( node )
245 | 		#		print "Len(workset) is %d" % len( workset )
246 | 		while len( workset ) != 0:
247 | 			#			print "Beforeset: "
248 | 			#			print beforeset
249 | 			nextnode = workset[ 0 ]
250 | 			workset = workset[1:]
251 | 			#			print "Getting children of %s" % nextnode
252 | 			parentset = self.Get_Children( nextnode )
253 | 			#			print "Childrenset is:"
254 | 			#			print parentset
255 | 			for x in parentset:
256 | 				if self.nodes[ x ].visited == 0:
257 | 					self.nodes[ x ].visited = 1
258 | 					beforeset.append( x )
259 | 					workset.append( x )
260 | 		#		print beforeset
261 | 		return beforeset
262 | 		
263 | 	def Get_Path_From_To( self, nodeBegin, nodeEnd ):
264 | 		preset = self.Get_Nodes_Before( nodeEnd )
265 | 		postset = self.Get_Nodes_After( nodeBegin )
266 | 		#		print "Preset is: " 
267 | 		#		print preset
268 | 		#		print "Postset is: " 
269 | 		#		print postset
270 | 		intersect = []
271 | 		for x in preset:
272 | 			for y in postset:
273 | 				if x == y:
274 | 					intersect.append( x )
275 | 		return intersect
276 | 		
277 | 	def make_daVinci_output( self ):
278 | 		davinci_output = '['
279 | 		for key, node in self.nodes.items():
280 | 			davinci_output += node.make_daVinci_output( self ) 
281 | 			davinci_output += ','
282 | 		return davinci_output[:-1] + ']'
283 | 
284 | 	def make_GML_output( self ):
285 | 		output = 'Creator: "vcgGraph.py"\nVersion 2.2\ngraph\n['
286 | 		output += "\thierarchic 1\n"
287 | 		# output += "\tlabel "+self.attributes[ "label" ]+"\n"
288 | 		for key, node in self.nodes.items():
289 | 			output += node.make_GML_output( self )
290 | 		for link in self.links:
291 | 			output += link.make_GML_output()
292 | 		output += "]\n"
293 | 		return output


--------------------------------------------------------------------------------
/rtti.parser.py:
--------------------------------------------------------------------------------
  1 | from x86_RE_lib import *
  2 | import sets
  3 | 
  4 | class TypeDescriptor:
  5 |     def __init__( self, RTTI, address ):
  6 |         self.address = address
  7 |         self.name = get_string( address + 8 )
  8 |     def __repr__( self ):
  9 |         return "TypeDescriptor(%lx,%s)" % (self.address, self.name)
 10 | 
 11 | class CompleteObjectLocator:
 12 |     def __init__( self, RTTI, address ):
 13 |         print "[!] Dealing with CompleteObjectLocator at %lx" % address 
 14 |         self.address = address
 15 |         self.signature = Dword(address)
 16 |         self.offset = Dword(address+4)
 17 |         self.cdoffset = Dword(address+8)
 18 |         self.type = TypeDescriptor(RTTI, Dword(address+12))
 19 |         self.hierarchy = RTTI.load_hierarchy_descriptor( Dword(address+16))
 20 | 
 21 | class HierarchyDescriptor:
 22 |     def __init__( self, RTTI, address ):
 23 |         self.address = address
 24 |         self.signature = Dword( address )
 25 |         self.attributes = Dword( address + 4 )
 26 |         self.number_of_bases = Dword( address + 8 )
 27 |         if self.number_of_bases > 20000:
 28 |           raise "Attempting to parse %lx as Hierarchy Descriptor, but it isn't!" % self.address
 29 |     def __load_full__( self, RTTI ):
 30 |         print "[!] dealing with the hierarchy descriptor at %lx" % self.address
 31 |         print "[!] Number of bases is %d" % self.number_of_bases
 32 |         self.bases = [ RTTI.load_base_class_descriptor( Dword( Dword( self.address+12 ) + i*4 )) for i in range(0, self.number_of_bases) ]
 33 |         #print self.bases
 34 |         
 35 |     def get_derivation_poset( self, name ):
 36 |         result = {}
 37 |         result[ name ] = sets.Set()
 38 |         result[ name ].update( [ base.type.name for base in self.bases ] )
 39 |         for baseclass in self.bases:
 40 |             if baseclass.type.name != name:
 41 |                 tempdict = baseclass.hierarchy_descriptor.get_derivation_poset( baseclass.type.name )
 42 |                 # Now join the sets
 43 |                 for classname in tempdict.keys():
 44 |                     if result.has_key( classname ):
 45 |                         result[ classname ].update( tempdict[classname] )
 46 |                     else:
 47 |                         result[ classname ] = tempdict[classname]
 48 |         return result
 49 |     def __repr__(self):
 50 |         return "HierarchyDescriptor(%lx)" % self.address
 51 | 
 52 | class BaseClassDescriptor:
 53 |     def __init__( self, RTTI, address ):
 54 |         self.address = address
 55 |         self.type = TypeDescriptor( RTTI, Dword(address))
 56 |         self.num_contained_bases = Dword( address + 4 )
 57 |         self.mdisp = Dword( address + 8 ) 
 58 |         self.pdisp = Dword( address + 12 )
 59 |         self.vdisp = Dword( address + 16 )
 60 |         self.attributes = Dword( address + 20 )
 61 |         
 62 |     def __load_full__( self, RTTI ):
 63 |         print "[!] Loading hierarchy descriptor for base class descriptor at %lx" % self.address
 64 |         self.hierarchy_descriptor = RTTI.load_hierarchy_descriptor( Dword( self.address+24))
 65 |         
 66 |     def __repr__( self ):
 67 |         s =     "BaseClassDescriptor(0x%lx)[%s]" % (self.address, self.type.name)
 68 |         return s
 69 | 
 70 | class VFTable:
 71 |     def __init__( self, RTTI, address ):
 72 |         self.address = address
 73 |         self.complete_object_locator = CompleteObjectLocator( RTTI, Dword( address - 4 ))
 74 |         # now scan forwards until no more code is found
 75 |         self.methods = []
 76 |         while isCode( getFlags( Dword(address) ) ):
 77 |             self.methods.append( Dword( address ))
 78 |             address = address + 4
 79 |         self.end_address = address
 80 |     
 81 |     def get_methods( self ):
 82 |       return self.methods
 83 |       
 84 |     def get_parent_class_and_offset( self, derivation_hasse_diagram, derivedclass ):
 85 |       """
 86 |         Returns the name of the parent class this vtable "comes from", if there are any
 87 |       """
 88 |       lastbase = ""
 89 |       vtable_offset = vtable.complete_object_locator.offset 
 90 |       parents = [ (base.mdisp, base) for base in vtable.complete_object_locator.hierarchy.bases \
 91 |         if base.type.name in self.derivation_hasse_diagram[ derivedclass ]]
 92 |       right_offset = max( [ x[0] for x in parents if vtable_offset > x[0]] )
 93 |       right_parent = [ x for x in parents if x[0] == right_offset ][0]
 94 |       vtable_in_parent = vtable_offset-x[0]
 95 |       return (vtable_in_parent, right_parent[1])
 96 |       
 97 |     def __repr__( self ):
 98 |         s =   "vtable at %lx with %d methods" % (self.address, len(self.methods))
 99 |         return s
100 |     
101 | 
102 | class RTTI:
103 |     def __init__( self, typeinfo_vtable ):
104 |         self.base_class_descriptors = {}            # A dictionary mapping address->base class descriptors
105 |         self.hierarchy_descriptors_by_address = {}  # A dictionary mapping address->hierarchy descriptors
106 |         print "[!] scanning for vtables..."
107 |         self.vtables_by_name = self.__scan_for_vtables_from_typeinfo( typeinfo_vtable ) # A dictionary mapping name->list of vtables
108 |         print "[!] creating inheritance-poset..."
109 |         self.derivation_poset = self.__create_derivation_poset( self.vtables_by_name )
110 |         print "[!] creating inheritance hasse diagram..."
111 |         self.derivation_hasse_diagram = self.__create_hasse_diagram( self.derivation_poset )
112 |         print "[!] creating UML diagram %s.gml" % get_root_filename()
113 |         self.create_UML_style_diagram( "c:\\%s.gml" % get_root_filename() )
114 |         print "[!] Renaming vtables ... " 
115 |         self.__rename_vtables( self.vtables_by_name )
116 |         print "[!] Renaming class methods ..." 
117 |         self.__rename_class_methods( self.vtables_by_name, self.derivation_hasse_diagram )
118 |         print "[!] Done"
119 |         
120 |         
121 |     def load_base_class_descriptor( self, address ):
122 |         if self.base_class_descriptors.has_key( address ):
123 |             return self.base_class_descriptors[ address ]
124 |         else:
125 |             base = BaseClassDescriptor( self, address )
126 |             self.base_class_descriptors[ address ] = base
127 |             base.__load_full__( self )
128 |             return base
129 |     
130 |     def load_hierarchy_descriptor( self, address ):
131 |         if self.hierarchy_descriptors_by_address.has_key( address ):
132 |             return self.hierarchy_descriptors_by_address[ address ]
133 |         else:
134 |             hierarchy = HierarchyDescriptor( self, address )
135 |             self.hierarchy_descriptors_by_address[ address ] = hierarchy
136 |             hierarchy.__load_full__( self )
137 |             return hierarchy
138 | 
139 |     def __create_derivation_poset( self, name_vtable_dict ):
140 |         """
141 |             The derivation poset is simply a dictionary mapping class names to sets of class names
142 |             
143 |         """
144 |         derivation_poset = {}
145 |         for classname in name_vtable_dict.keys():
146 |             for vtable in name_vtable_dict[ classname ]:
147 |                 temp_dict = vtable.complete_object_locator.hierarchy.get_derivation_poset( classname )
148 |                 for name in temp_dict.keys():
149 |                     if derivation_poset.has_key( name ):
150 |                         derivation_poset[ name ].update( temp_dict[ name ])
151 |                     else:
152 |                         derivation_poset[ name ] = temp_dict[ name ] 
153 |         return derivation_poset                    
154 |       
155 |     def __invert_edges( self, mapping ):
156 |         edges = [ (k,v) for k in mapping.keys() for v in mapping[k] ]
157 |         result_dictionary = {}
158 |         for v, k in edges:
159 |             if result_dictionary.has_key(k):
160 |                 result_dictionary.add( v )
161 |             else:
162 |                 result_dictionary[k] = set([v])
163 |         return result_dictionary
164 |     
165 |     def __assign_levels( self, hierarchy_diagram ):
166 |         roots = [ n for n in hierarchy_diagram.get_nodes().keys() if len(hierarchy_diagram.Get_Parents(n)) == 0 ]
167 |         name_to_hierarchy_index = {}
168 |         index_counter = 0
169 |         worklist = roots
170 |         while len(worklist) > 0:
171 |             for n in worklist:
172 |                 name_to_hierarchy_index[n] = index_counter
173 |             index_counter = index_counter + 1
174 |             new_worklists = [ hierarchy_diagram.Get_Children(n) for n in worklist ]
175 |             new_worklist = set()
176 |             for wk in new_worklists:
177 |                 new_worklist.update( wk )
178 |             worklist = new_worklist
179 |         return name_to_hierarchy_index
180 |     
181 |     def __create_function_to_class_map( self, name_to_vtables ):
182 |         result = {}
183 |         # Create a dictionary mapping each method to the set of submethods
184 |         methods_to_process = set()
185 |         full_method_count = 0
186 |         for vtables in name_to_vtables.values():
187 |             for vtable in vtables:
188 |                 methods_to_process.update( vtable.get_methods())
189 |                 full_method_count = full_method_count + len( vtable.get_methods())
190 |         total_methods = len(methods_to_process)
191 |         
192 |         print "[!] Calculating submethods for %d methods..." % total_methods 
193 |         methods_to_submethods = {}
194 |         count = 0
195 |         for m in methods_to_process:
196 |             methods_to_submethods[ m ] = set(get_subfuncs_with_same_thisptr_rec(m))
197 |             count = count + 1
198 |             print "[!] Done with %d/%d" % (count, total_methods)
199 |         
200 |         method_count = 0
201 |         for name, vtables in name_to_vtables.items():
202 |             for vtable in vtables:
203 |                 for method in vtable.get_methods():
204 |                     print "[!] Processing method %d out of %d" % (method_count, full_method_count)
205 |                     method_count = method_count+1
206 |                     if result.has_key( method ):
207 |                         result[method].add( name )
208 |                     else:
209 |                         result[method] = set( [name] )
210 |                     print "%lx: tracking into function" % method
211 |                     extra_subfuncs = methods_to_submethods[ method ]
212 |                     print "Got %d subfuncs..." % len(extra_subfuncs)
213 |                     for m in extra_subfuncs:
214 |                         if result.has_key( m ):
215 |                             result[m].add(name)
216 |                         else:
217 |                             result[m] = set([name])
218 |         return result
219 | 
220 |     def __rename_class_methods( self, name_to_vtables, name_to_parent_map ):
221 |         hierarchy_diagram = self.create_UML_style_diagram()
222 |         levels = self.__assign_levels( hierarchy_diagram )
223 |         function_to_classes = self.__create_function_to_class_map( name_to_vtables )
224 |         for function, classes in function_to_classes.items():
225 |             commentstring = ""
226 |             levels_for_classes = [ (levels[c], c) for c in classes ]
227 |             min_level = min( [levels[c] for c in classes ] )
228 |             minimum_classes = [ c for c in classes if levels[c] == min_level ]
229 |             if len( minimum_classes ) > 1:
230 |                 for classname in classes:
231 |                     commentstring = commentstring + "%d - %s\n" % (levels[classname], classname)
232 |                 MakeComm( function, commentstring )
233 |                 print "%lx: Warning -- ambiguous function assignment" % function
234 |             else:
235 |                 new_name = minimum_classes[0] + "::" + Name(function)
236 |                 MakeName( function, new_name )
237 |                 print "Calling create_struct_from_ea for %lx, ecx, %s" % (function, minimum_classes[0])
238 |                 create_struct_from_ea( function, "ecx", minimum_classes[0])
239 |       
240 |     def __rename_vtables( self, name_to_vtables ):
241 |         for name, vtables in name_to_vtables.items():
242 |             for vtable in vtables:
243 |                 MakeName( vtable.address, "%s_vftable_%d" % (name, vtable.complete_object_locator.offset ))
244 |     
245 |     def __create_hasse_diagram( self, poset ):
246 |         # Begin by duplicating the poset, but remove that X is derived from X
247 |         tempdict = {}
248 |         for key, derived_set in poset.items():
249 |             new_set = sets.Set()
250 |             new_set.update( derived_set )
251 |             new_set.remove( key )
252 |             tempdict[ key ] = new_set
253 |         # Ok. Now iterate over all items and remove their parents
254 |         for key in tempdict.keys():
255 |             remove_set = sets.Set()
256 |             for base_class in tempdict[ key ]:
257 |                 remove_set.update( tempdict[ base_class ] )
258 |             tempdict[ key ] = tempdict[ key ].difference( remove_set )
259 |         return tempdict
260 |         
261 |     def __scan_for_vtables_from_typeinfo( self, typeinfo_vtable ):
262 |         """    Attempts to find all vtable's in an RTTI-enabled executable by walking
263 |             backwards from the typeinfo vtable.
264 | 
265 |             Returns a dictionary mapping names to lists of vtables
266 |         """
267 |         result_dict = {}
268 |         vtable_starts = []
269 |         # 
270 |         #   What follows is rather ugly hackish code that attempts
271 |         #   to enumerate all vtables through references to the typeinfo
272 |         #   vtable
273 |         print typeinfo_vtable
274 |         print typeinfo_vtable.__class__
275 |         for reference in get_drefs_to( typeinfo_vtable ):
276 |             name = get_string( reference + 8 )
277 |             #name = self.__win32_demangle_CPP_symbol_name( name )
278 |             if len( name ) <= 4:
279 |                 continue
280 |             for reference2 in get_drefs_to( reference ):
281 |                 # We want only non-reffed ones
282 |                 if len( get_drefs_to( reference2 )) != 0:
283 |                     continue
284 |                 estimated_locator = reference2-12
285 |                 last_refs = get_drefs_to( estimated_locator ) 
286 |                 for ref in last_refs:
287 |                     # A vtable needs to be referenced, itself !
288 |                     if len( get_drefs_to( ref+4 )) == 0:
289 |                         continue
290 |                     if len( get_drefs_from( ref + 4 )) == 0:
291 |                         continue
292 |                     if isCode( getFlags( get_drefs_from( ref+4)[0])):
293 |                         vtable_starts.append( (name, ref+4) )
294 |         for name, start in vtable_starts:
295 |             #print "%lx: Adding vtable for %s" % (start, name)
296 |             if result_dict.has_key( name ):
297 |                 result_dict[ name ].append( VFTable(  self, start ))
298 |             else:
299 |                 result_dict[ name ] = [ VFTable( self, start) ]
300 |         return result_dict
301 |     
302 |     def create_UML_style_diagram( self, filename="" ):
303 |         diagram = vcg_Graph.vcgGraph()
304 |         complete_set = sets.Set()
305 |         hasse = self.derivation_hasse_diagram
306 |         complete_set.update( hasse.keys())
307 |         for values in hasse.values():
308 |             complete_set.update( values )
309 |         # Ok, we have all nodes in the graph
310 |         for name in complete_set:
311 |             node = diagram.Add_Node( name )
312 |             #
313 |             #   Now we have to produce a useful label
314 |             #
315 |             label = self.__create_UML_style_label( name, hasse[ name ], self.vtables_by_name )
316 |             node.set_attribute( 'label', '"'+label+'"' )
317 |         for edges in hasse.items():
318 |             for target in edges[1]:
319 |                 diagram.Add_Link( target, edges[0] )
320 |         #outfile = file(filename, "wt")
321 |         #outfile.write( diagram.gen_VCG_string() )
322 |         #outfile.close()
323 |         if filename != "":
324 |           outfile = file(filename + ".gml", "wt" )
325 |           outfile.write( diagram.make_GML_output() )
326 |           outfile.close()
327 |         return diagram
328 |     
329 |     def __create_UML_style_label( self, derivedclass, baseclasses, type_to_vtables ):
330 |         label = ""
331 |         
332 |         label = label+ "%s" % self.__win32_demangle_CPP_symbol_name( derivedclass )
333 |         if not type_to_vtables.has_key( derivedclass ):
334 |             return label
335 |         vtables = type_to_vtables[ derivedclass ]
336 |         label = label+ "\n    %d vtables, %d base classes" % (len(vtables), len(self.derivation_hasse_diagram[derivedclass]))
337 |         # Now construct the rest of the label:
338 |         label_list = []
339 |         
340 |         for vtable in vtables:
341 |             label_list.append( (vtable.complete_object_locator.offset, 1, "\n        +%lx " % vtable.complete_object_locator.offset + vtable.__repr__() ))
342 |         for base in vtable.complete_object_locator.hierarchy.bases:
343 |             if base.type.name in self.derivation_hasse_diagram[ derivedclass ]:
344 |                 label_list.append( (base.mdisp, 0, "\n    +%lx Base class %s" % (base.mdisp, self.__win32_demangle_CPP_symbol_name( base.type.name ))))
345 |         label_list.sort()
346 |         for l in label_list:
347 |             label = label+l[2]
348 |         return label
349 |     
350 |     def __win32_demangle_CPP_symbol_name( self, symbolname ):
351 |         """
352 |             Uses the DBHGHLP.DLL to demangle a symbol name
353 |         
354 |             We use ctypes to call the following API function
355 |             DWORD WINAPI UnDecorateSymbolName(
356 |             __in   PCTSTR DecoratedName,
357 |             __out  PTSTR UnDecoratedName,
358 |             __in   DWORD UndecoratedLength,
359 |             __in   DWORD Flags);
360 |         
361 |             The WinAPI CANNOT decode classnames (idiots!), it
362 |             seems to only work for method names. Highly annoying.
363 |         """
364 |         import ctypes
365 |         symbolname = symbolname[1:]
366 |         dbghelp = ctypes.cdll.LoadLibrary("dbghelp.dll")
367 |         demangledname = ctypes.c_char_p( ' ' * 10000 )
368 |         prototype= ctypes.WINFUNCTYPE( ctypes.c_int, ctypes.c_char_p, ctypes.c_char_p, ctypes.c_int, ctypes.c_int )
369 |         UnDecorateSymbolName = prototype( ( "UnDecorateSymbolName", dbghelp ) )
370 |         res = UnDecorateSymbolName( ctypes.c_char_p( symbolname ),
371 |             demangledname ,
372 |             ctypes.c_int( 10000 ),
373 |             ctypes.c_int( 0xFFFF ))  # flags
374 |         print "%d" % res
375 |         return demangledname.value.replace('&', '&amp;')
376 | 
377 | 
378 | #__win32_demangle_CPP_symbol_name( ".?AVVThreadedHostnameResolver@@")
379 | 
380 | #rtti = RTTI( 0x004F4314 )
381 | 
382 | dr = get_drefs_to( LocByName( "??_Etype_info@@UAEPAXI@Z_2" ))#"??_Etype_info@@UAEPAXI@Z"))
383 | if len(dr) == 0:
384 | 	dr = get_drefs_to( LocByName( "??_Etype_info@@UAEPAXI@Z" ))#"??_Etype_info@@UAEPAXI@Z"))
385 | 	
386 | if len(dr) > 0:
387 |   addr = dr[0]
388 |   print "[!] Parsing RTTI for %s" % get_root_filename()
389 |   rtti = RTTI( addr )
390 | else:
391 |   print "[!] No RTTI info for %s found" % get_root_filename()
392 | #Exit(0)
393 | #for i in rtti.derivation_poset.items():
394 | #    print "%s derives from %s" % (i[0], i[1])
395 | #hierarchy = create_hierarchy( res )
396 | #generate_UML_style_diagram( hierarchy, res, "c:\\awhlogon.gml" )
397 | #generate_dot_diagram( hierarchy, "" )
398 | 
399 | RTTI( 0x10023f24)


--------------------------------------------------------------------------------
/x86_RE_lib.py:
--------------------------------------------------------------------------------
   1 | import vcg_Graph, vcg_GraphLink, vcg_GraphNode, string, copy, sets
   2 | from idaapi import *
   3 | import idc
   4 | 
   5 | """
   6 | 	This file consists of a collection of utility functions that were written during various
   7 | 	reverse engineering projects to facilitate the process
   8 | """
   9 | 
  10 | #
  11 | #   A list of mnemonics that do not overwrite the first operand:
  12 | #
  13 | 
  14 | neutral_mnem = [ "cmp", "test", "push" ]
  15 | assign_mnem = [ "mov", "movzx", "movsx" ]
  16 | x86_registers = [ "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp"]
  17 | 
  18 | #
  19 | #   Utility function
  20 | #
  21 | 
  22 | def idaline_to_string( idaline ):
  23 | 	"""
  24 | 		Takes an IDA Pro disassembly line and removes all the formatting info
  25 | 		from it to make it a "regular" string.
  26 | 	"""
  27 | 	i = 0
  28 | 	new = ""
  29 | 	while i < len(idaline):
  30 | 		if idaline[i] == '\x01' or idaline[i] == '\x02':
  31 | 			i = i + 1
  32 | 		else:
  33 | 			new += idaline[i]
  34 | 		i = i + 1
  35 | 	return new
  36 | 
  37 | #
  38 | #   A function to get the name of the basic bloc a particular ea is in
  39 | #
  40 | def get_basic_block_begin( ea ):
  41 | 	return get_basic_block_begin_from_ea( ea )
  42 | 
  43 | def get_basic_block_begin_from_ea( ea ):
  44 | 	"""" Get basic block upper bound
  45 | 	
  46 | 	While the current instruction is not referenced from anywhere and the preceding instruction is not
  47 | 	referencing anywhere else, step backwards. Return the first address at which the above conditions
  48 | 	are no longer true.
  49 | 	"""
  50 | 	oldea = 0
  51 | 	while get_first_fcref_to( ea ) == BADADDR and get_first_fcref_from( get_first_cref_to( ea ) ) == BADADDR and ea != BADADDR:
  52 | 		oldea = ea
  53 | 		ea = get_first_cref_to( ea )
  54 | 	if ea == BADADDR:
  55 | 		return oldea
  56 | 	return ea
  57 | 
  58 | 
  59 | def get_basic_block_end( ea ):
  60 | 	return get_basic_block_end_from_ea( ea )
  61 | 
  62 | #
  63 | #   A function to get the name of the basic bloc a particular ea is in
  64 | #
  65 | 
  66 | def get_basic_block_end_from_ea( ea ):
  67 | 	""" Get basic block lower bound
  68 | 	
  69 | 	The same as get_basic_block_begin_from_ea(), just forwards.
  70 | 	"""   
  71 | 	lastea = ea
  72 | 	while get_first_fcref_from( ea ) == BADADDR and ea != BADADDR and \
  73 | 		get_first_fcref_to( get_first_cref_from(ea) ) == BADADDR:
  74 | 		lastea = ea
  75 | 		ea = get_first_cref_from( ea )
  76 | 	if ea == BADADDR:
  77 | 		return lastea
  78 | 	return ea
  79 | 
  80 | #
  81 | #
  82 | #
  83 | 
  84 | VCG_COLOR_WHITE = 0
  85 | VCG_COLOR_BLUE = 1
  86 | VCG_COLOR_RED = 2
  87 | VCG_COLOR_GREEN = 3
  88 | VCG_COLOR_YELLOW = 4
  89 | VCG_COLOR_MAGENTA = 5
  90 | VCG_COLOR_CYAN = 6
  91 | VCG_COLOR_DARKGREY = 7
  92 | VCG_COLOR_DARKBLUE = 8
  93 | VCG_COLOR_DARKRED = 9
  94 | VCG_COLOR_DARKGREEN = 10
  95 | VCG_COLOR_DARKYELLOW = 11
  96 | VCG_COLOR_DARKMAGENTA = 12
  97 | VCG_COLOR_DARKCYAN = 13
  98 | VCG_COLOR_GOLD = 14
  99 | VCG_COLOR_LIGHTGREY = 15
 100 | VCG_COLOR_LIGHTBLUE = 16
 101 | VCG_COLOR_LIGHTRED = 17
 102 | VCG_COLOR_LIGHTGREEN = 18
 103 | VCG_COLOR_LIGHTYELLOW = 19
 104 | VCG_COLOR_LIGHTMAGENTA = 20
 105 | VCG_COLOR_LIGHTCYAN = 21
 106 | VCG_COLOR_LILAC = 22
 107 | VCG_COLOR_TURQUOISE = 23
 108 | VCG_COLOR_AQUAMARINE = 24
 109 | VCG_COLOR_KHAKI = 25
 110 | VCG_COLOR_PURPLE = 26
 111 | VCG_COLOR_YELLOWGREEN = 27
 112 | VCG_COLOR_PINK = 28
 113 | VCG_COLOR_ORANGE = 29
 114 | VCG_COLOR_ORCHID = 30
 115 | VCG_COLOR_BLACK = 31
 116 | 
 117 | colormap = [
 118 | 	VCG_COLOR_WHITE,		#	IGNORE ! Just there to make array addressing nicer !
 119 | 	VCG_COLOR_BLACK,		#	Default
 120 | 	VCG_COLOR_RED,			# Regular comment
 121 | 	VCG_COLOR_LIGHTBLUE,	# Repeatable comment (comment defined somewhere else)
 122 | 	VCG_COLOR_LIGHTBLUE, 	# Automatic comment
 123 | 	VCG_COLOR_DARKBLUE,	# Instruction
 124 | 	VCG_COLOR_DARKGREEN,	# Dummy Data Name
 125 | 	VCG_COLOR_DARKGREEN,	# Regular Data Name
 126 | 	VCG_COLOR_MAGENTA,		# Demangled Name
 127 | 	VCG_COLOR_BLUE,		# Punctuation
 128 | 	VCG_COLOR_DARKCYAN,	# Char constant in instruction
 129 | 	VCG_COLOR_DARKCYAN,	# String constant in instruction
 130 | 	VCG_COLOR_DARKCYAN,	# Numeric constant in instruction
 131 | 	VCG_COLOR_RED,			# Void operand
 132 | 	VCG_COLOR_DARKGREY,	# Code reference
 133 | 	VCG_COLOR_DARKGREY,	# Data reference
 134 | 	VCG_COLOR_RED,			# Code reference to tail byte
 135 | 	VCG_COLOR_RED,			# Data reference to tail byte
 136 | 	VCG_COLOR_RED,			# Error or problem
 137 | 	VCG_COLOR_DARKGREY,	# Line prefix
 138 | 	VCG_COLOR_DARKGREY,	# Binary line prefix bytes
 139 | 	VCG_COLOR_DARKGREY,	# Extra line
 140 | 	VCG_COLOR_PINK,		# Alternative operand
 141 | 	VCG_COLOR_PINK,		# Hidden name
 142 | 	VCG_COLOR_MAGENTA,		# Library function name
 143 | 	VCG_COLOR_GREEN,		# Local variable name
 144 | 	VCG_COLOR_DARKGREY,	# Dummy code name
 145 | 	VCG_COLOR_DARKBLUE,	# Assembler directive
 146 | 	VCG_COLOR_DARKGREY,	# Macro
 147 | 	VCG_COLOR_DARKCYAN,	# String constant in data directive
 148 | 	VCG_COLOR_DARKCYAN,	# Char constant in data directive
 149 | 	VCG_COLOR_DARKCYAN,	# Numeric constant in data directive
 150 | 	VCG_COLOR_DARKBLUE,	# Keywords
 151 | 	VCG_COLOR_LIGHTBLUE,	# Register name
 152 | 	VCG_COLOR_MAGENTA,		# Imported name
 153 | 	VCG_COLOR_DARKGREY,	# Segment name
 154 | 	VCG_COLOR_DARKGREY,	# Dummy unknown name
 155 | 	VCG_COLOR_DARKGREY,	# Regular code name
 156 | 	VCG_COLOR_DARKGREY,	# Regular unknown name
 157 | 	VCG_COLOR_DARKGREY,	# Collapsed line
 158 | 	VCG_COLOR_LIGHTGREY	# hidden address marks
 159 | ]
 160 | 
 161 | def basic_block_to_pretty_vcg( blk ):
 162 | 	print "y1"
 163 | 	allblk = "\x0C22%lx:\r\n\x0Cb" % blk[0][0]
 164 | 	print "y2"
 165 | 	for line in blk:
 166 | 		print line
 167 | 		colorstack = []
 168 | 		idaline = generate_disasm_line( line[0] )
 169 | 		newline = ""
 170 | 		ignorenext = 0
 171 | 		for i in range( len(idaline)-1):
 172 | 			if ignorenext:
 173 | 				ignorenext = ignorenext - 1
 174 | 				continue
 175 | 			if idaline[i] == COLOR_ON and ord(idaline[i+1]) < len( colormap ) and ord(idaline[i+1]) < 28:
 176 | 				colorstack.append( idaline[i+1] )
 177 | 				newline = newline + "\x0C%.02d" % colormap[ ord(idaline[i+1]) ]
 178 | 				ignorenext = 1
 179 | 			elif idaline[i] == COLOR_OFF:
 180 | 				if len( colorstack ) == 0:
 181 | 					newline = newline + "\x0C%.02d" %  VCG_COLOR_BLACK
 182 | 				else:
 183 | 					newline = newline + "\x0C%.02d" % colormap[ ord( colorstack.pop())]
 184 | 				ignorenext = 1
 185 | 			elif idaline[i] == '\x01':
 186 | 				ignorenext = 1
 187 | 				continue
 188 | 			else:
 189 | 				if idaline[i] != '"' and idaline[i] != '\\':
 190 | 					newline = newline + idaline[i]
 191 | 				elif idaline[i] == '"':
 192 | 					newline = newline + "\x0C%.03d" % ord(idaline[i])
 193 | 				elif idaline[i] == '\\':
 194 | 					newline = newline + "\\\\"
 195 | 		newline = newline + "\x0C%.02d\r\n" % VCG_COLOR_BLACK
 196 | 		allblk = allblk + newline
 197 | 	return allblk
 198 | 
 199 | #
 200 | #   Retrieves a list of xrefs from a particular location
 201 | #
 202 | 
 203 | def get_drefs_to( ea ):
 204 | 	"""
 205 | 		Retrieves a list of locations that are referring ea (data only)
 206 | 	"""
 207 | 	ret = []
 208 | 	xrf = get_first_dref_to( ea )
 209 | 	if xrf != BADADDR:
 210 | 		ret.append( xrf )
 211 | 	xrf = get_next_dref_to( ea, xrf )
 212 | 	while xrf != BADADDR:
 213 | 		ret.append( xrf )
 214 | 		xrf = get_next_dref_to( ea, xrf )
 215 | 	return ret
 216 | 
 217 | def get_drefs_from( ea ):
 218 | 	"""
 219 | 		Retrieves a list of locations that are referred to from ea (data only)
 220 | 	"""
 221 | 	ret = []
 222 | 	xrf = get_first_dref_from( ea )
 223 | 	if xrf != BADADDR:
 224 | 		ret.append( xrf )
 225 | 	xrf = get_next_dref_from( ea, xrf )
 226 | 	while xrf != BADADDR:
 227 | 		ret.append( xrf )
 228 | 		xrf = get_next_dref_from( ea, xrf )
 229 | 	return ret
 230 | 
 231 | 
 232 | def get_short_crefs_from( ea ):
 233 | 	"""
 234 | 		Retrieves a list of locations that 
 235 | 	"""
 236 | 	ret = []
 237 | 	xrf = get_first_cref_from( ea )
 238 | 	xrf2 = get_first_fcref_from( ea )
 239 | 	if xrf != BADADDR and xrf != xrf2:
 240 | 		ret.append( xrf )
 241 | 	xrf = get_next_cref_from( ea, xrf )
 242 | 	while xrf != BADADDR and xrf != xrf2:
 243 | 		ret.append( xrf )
 244 | 		xrf = get_next_cref_from( ea, xrf )
 245 | 	return ret
 246 | 
 247 | def get_noncall_crefs_to( ea ):
 248 | 	"""
 249 | 		Retrieve a list of locations that branch to ea
 250 | 	"""
 251 | 	ret = []
 252 | 	xrf = get_first_cref_to( ea )
 253 | 	if xrf != BADADDR:
 254 | 		if ua_mnem( xrf ) != "call":
 255 | 			ret.append( xrf )
 256 | 	else:
 257 | 		if ea not in get_far_crefs_from( xrf ):
 258 | 			ret.append( xrf )
 259 | 	xrf = get_next_cref_to( ea, xrf )
 260 | 	while xrf != BADADDR:
 261 | 		if ua_mnem( xrf ) != "call":
 262 | 			ret.append( xrf )
 263 | 		xrf = get_next_cref_to( ea, xrf )
 264 | 	return ret        
 265 | 
 266 | def get_short_crefs_to( ea ):
 267 | 	"""
 268 | 		Retrieve a list of locations that refer to ea using a non-call
 269 | 	"""
 270 | 	ret = []
 271 | 	xrf = get_first_cref_to( ea )
 272 | 	xrf2 = get_first_fcref_to( ea )
 273 | 	if xrf != BADADDR and xrf != xrf2:
 274 | 		ret.append( xrf )
 275 | 	xrf = get_next_cref_to( ea, xrf )
 276 | 	while xrf != BADADDR and xrf != xrf2:
 277 | 		ret.append( xrf )
 278 | 		xrf = get_next_cref_to( ea, xrf )
 279 | 	return ret
 280 | 
 281 | def get_crefs_from( ea ):
 282 | 	"""
 283 | 		Retrieve a list of locations that ea branches to
 284 | 	"""
 285 | 	ret = []
 286 | 	xrf = get_first_cref_from( ea )
 287 | 	if xrf != BADADDR:
 288 | 		ret.append( xrf )
 289 | 	xrf = get_next_cref_from( ea, xrf )
 290 | 	while xrf != BADADDR:
 291 | 		ret.append( xrf )
 292 | 		xrf = get_next_cref_from( ea, xrf )
 293 | 	return ret
 294 | 	
 295 | def get_crefs_to( ea ):
 296 | 	"""
 297 | 		Retrieve a list of locations that branch to ea
 298 | 	"""
 299 | 	ret = []
 300 | 	xrf = get_first_cref_to( ea )
 301 | 	if xrf != BADADDR:
 302 | 		ret.append( xrf )
 303 | 	xrf = get_next_cref_to( ea, xrf )
 304 | 	while xrf != BADADDR:
 305 | 		ret.append( xrf )
 306 | 		xrf = get_next_cref_to( ea, xrf )
 307 | 	return ret        
 308 | 
 309 | def get_far_crefs_from( ea ):
 310 | 	"""
 311 | 		Retrieve list of locations that ea branches to 
 312 | 	"""
 313 | 	ret = []
 314 | 	xrf = get_first_fcref_from( ea )
 315 | 	if xrf != BADADDR:
 316 | 		ret.append( xrf )
 317 | 	xrf = get_next_fcref_from( ea, xrf )
 318 | 	while xrf != BADADDR:
 319 | 		ret.append( xrf )
 320 | 		xrf = get_next_fcref_from( ea, xrf )
 321 | 	return ret
 322 | 	
 323 | def get_far_crefs_to( ea ):
 324 | 	ret = []
 325 | 	xrf = get_first_fcref_to( ea )
 326 | 	if xrf != BADADDR:
 327 | 		ret.append( xrf )
 328 | 	xrf = get_next_fcref_to( ea, xrf )
 329 | 	while xrf != BADADDR:
 330 | 		ret.append( xrf )
 331 | 		xrf = get_next_fcref_to( ea, xrf )
 332 | 	return ret        
 333 | 
 334 | 
 335 | #
 336 | #   Retrieves a line of disassembled code
 337 | #
 338 | 
 339 | def get_disasm_line( ea ):
 340 | 	""" Returns a list [ int address, string mnem, string op1, string op2, string op3 ]
 341 | 	
 342 | 	"""
 343 | 	op1 = ua_outop2( ea, 0, 0 )	
 344 | 	op2 = ua_outop2( ea, 1, 0 )
 345 | 	op3 = ua_outop2( ea, 2, 0 )
 346 | 	if op1 == None:
 347 | 		op1 = ""
 348 | 	else:
 349 | 		op1 = idaline_to_string( op1 )
 350 | 	if op2 == None:
 351 | 		op2 = ""
 352 | 	else:
 353 | 		op2 = idaline_to_string( op2 )
 354 | 	if op3 == None:
 355 | 		op3 = ""
 356 | 	else:
 357 | 		op3 = idaline_to_string( op3 )
 358 | 	ret = [ ea, ua_mnem( ea ), op1, op2, op3 ]
 359 | 	return ret
 360 | 
 361 | #
 362 | #  Retrieves a string from the IDB
 363 | #
 364 | 
 365 | def get_string( ea ):
 366 | 	str = ""
 367 | 	while get_byte( ea ) != 0:
 368 | 		str = str + "%c" % get_byte( ea )
 369 | 		ea = ea+1
 370 | 	return str
 371 | 
 372 | 
 373 | #
 374 | #   Returns a string for a disasm line
 375 | #
 376 | 
 377 | def disasm_line_to_string( baseblock ):
 378 | 	str = "%lx:   %s " % (baseblock[0], baseblock[1])
 379 | 	if baseblock[2] != "":
 380 | 		str = str + baseblock[2]
 381 | 	if baseblock[3] != "":
 382 | 		str = str + ", %s" % baseblock[3]
 383 | 	if baseblock[4] != "":
 384 | 		str = str + ", %s" % baseblock[4]
 385 | 	return str
 386 | 
 387 | 
 388 | #
 389 | #   Returns all the instructions in a basic block
 390 | #
 391 | 
 392 | def get_basic_block( ea ):
 393 | 	"""
 394 | 		A basic block will be a list of lists that contain all the instructions
 395 | 		in this particular basic block.
 396 | 		[ 
 397 | 			[ firstaddress, mnem, op1, op2, op3 ]
 398 | 			...
 399 | 			[ lastaddress, mnem, op1, op2, op3 ]
 400 | 		]
 401 | 	"""
 402 | 	begin = get_basic_block_begin_from_ea( ea )
 403 | 	realbegin = begin
 404 | 	end = get_basic_block_end_from_ea( ea )
 405 | 	ret = []
 406 | 	while begin <= end and begin >= realbegin:
 407 | 		ret.append( get_disasm_line( begin ) )
 408 | 		if get_first_cref_from( begin ) <= begin:
 409 | 			break
 410 | 		begin = get_first_cref_from( begin )
 411 | 	return ret
 412 | 
 413 | def get_basic_block_from( ea ):
 414 | 	x = get_basic_block( ea )
 415 | 	blk = []
 416 | 	for line in x:
 417 | 		if line[0] >= ea:
 418 | 			blk.append( line )
 419 | 	return blk
 420 | 	"""begin = ea
 421 | 	end = get_basic_block_end_from_ea( ea )
 422 | 	ret = []
 423 | 	#print "%lx: (end)" % end
 424 | 	while begin <= end and begin != BADADDR:
 425 | 		ret.append( get_disasm_line( begin ) )
 426 | 		begin = get_first_cref_from( begin )
 427 | 		if get_first_fcref_to( begin ) != BADADDR:
 428 | 			break
 429 | 		if begin == get_first_fcref_from( begin ):
 430 | 			break
 431 | 	return ret"""
 432 | 
 433 | def get_basic_block_to( ea ):
 434 | 	x = get_basic_block( ea )
 435 | 	blk = []
 436 | 	for line in x:
 437 | 		if line[0] <= ea:
 438 | 			blk.append( line )
 439 | 	return blk
 440 | 	"""
 441 | 	end = ea
 442 | 	begin = get_basic_block_begin_from_ea( ea )
 443 | 	ret = []
 444 | 	while begin <= end and begin != BADADDR:
 445 | 		ret.append( get_disasm_line( begin ) )
 446 | 		begin = get_first_cref_from( begin )
 447 | 		if get_first_fcref_to( begin ) != BADADDR:
 448 | 			break
 449 | 		if begin == get_first_fcref_from( begin ):
 450 | 			break
 451 | 	return ret"""
 452 | 
 453 | 
 454 | 
 455 | def might_be_immediate( str ):
 456 | 	if str == "":
 457 | 		return 0
 458 | 	if str == None:
 459 | 		return 0
 460 | 	try:
 461 | 		if str[-1] == 'h':
 462 | 			string.atol( str[:-1], 16 )
 463 | 		else:
 464 | 			string.atol( str, 10 )
 465 | 		return 1
 466 | 	except ValueError:
 467 | 		return 0
 468 | 
 469 | def print_basic_block( baseblock ):
 470 | 	#print baseblock
 471 | 	for line in baseblock:
 472 | 		print disasm_line_to_string( line )
 473 | 
 474 | def basic_block_to_string( baseblock ):
 475 | 	r = ""
 476 | 	for line in baseblock:
 477 | 		r = r + disasm_line_to_string(line) + "\n"
 478 | 	return r
 479 | 
 480 | def slice_basic_block_for_reg( baseblock, reg ):
 481 | 	retblk = []
 482 | 	for line in baseblock:
 483 | 		if reg == "eax" and line[1] == "call":
 484 | 			retblk.append( line )            
 485 | 		elif line[2].find( reg ) != -1 or line[3].find( reg ) != -1 or \
 486 | 			line[4].find( reg ) != -1:
 487 | 			retblk.append( line )
 488 | 	return retblk 
 489 | 
 490 | class slice_node:
 491 | 	def __init__( self, startea, endea, reg ):
 492 | 		self.startea = startea
 493 | 		self.endea = endea
 494 | 		self.reg = reg
 495 | 		#print "find_end!"
 496 | 		if( startea == 0 ):
 497 | 			self.find_begin()
 498 | 		if( endea == 0 ):
 499 | 			self.find_end()
 500 | 	def to_name( self ):
 501 | 		return "%lx-%lx-%s" % ( self.startea, self.endea, self.reg )
 502 | 	def find_end( self ):
 503 | 		bb = get_basic_block_from( self.startea )
 504 | 		self.endea = bb[-1][0]
 505 | 		bb2 = slice_basic_block_for_reg( bb, self.reg )
 506 | 		bb3 = []
 507 | 		for line in bb2:
 508 | 			bb3.append( line )
 509 | 			if self.reg == "eax" and line[1] == "call":
 510 | 				self.endea = line[0]
 511 | 				break
 512 | 			if line[1] not in neutral_mnem and (line[2] == self.reg or line[3] == self.reg):
 513 | 				self.endea = line[0]
 514 | 				break
 515 | 		self.lines = bb3
 516 | 		return self.endea
 517 | 	def find_begin( self ):
 518 | 		bb = get_basic_block_to( self.endea )
 519 | 		self.startea = bb[0][0]
 520 | 		bb2 = slice_basic_block_for_reg( bb, self.reg )
 521 | 		bb3 = []
 522 | 		for i in range( len(bb2)-1, -1, -1):
 523 | 			line = bb2[i]
 524 | 			bb3.insert( 0, line )
 525 | 			if self.reg == "eax" and line[1] == "call":
 526 | 				self.startea = line[0]
 527 | 				break
 528 | 			if line[1] not in neutral_mnem and (line[2] == self.reg or line[3] == self.reg):
 529 | 				self.startea = line[0]
 530 | 				break
 531 | 		self.lines = bb3
 532 | 		return self.startea
 533 | 	def get_target_reg_bwd( self ):
 534 | 		"""		if len( self.lines ) > 0:
 535 | 			if self.reg == "eax" and self.lines[0][1] == "call":
 536 | 				# call is overwriting eax
 537 | 				return ["END",0]
 538 | 			if self.lines[0][1] == "xor" and self.lines[0][2] == self.reg and self.lines[0][3] == self.reg:
 539 | 				return ["END",0]
 540 | 			if self.lines[0][1] == "or" and self.lines[0][3] == "0FFFFFFFFh":
 541 | 				return ["END", 0]
 542 | 			if self.lines[0][1] == "or" and self.lines[0][3] == "-1":
 543 | 				return ["END", 0]
 544 | 			if self.lines[0][1] == "and" and self.lines[0][2] == self.reg and self.lines[0][3] == "0":
 545 | 				return ["END",0]
 546 | 			if self.lines[0][2] == self.reg and self.lines[0][1] not in neutral_mnem:
 547 | 				if self.lines[0][3] in x86_registers and self.lines[0][1] == "mov":
 548 | 					return [self.lines[0][3], 0 ]
 549 | 			if self.lines[0][3] in x86_registers and self.lines[0][1] != "mov":
 550 | 				return [ self.lines[0][3], 1]
 551 | 			if might_be_immediate( self.lines[0][3]) and self.lines[0][1] != "mov":
 552 | 				return [ self.lines[0][2], 0]
 553 | 			else:
 554 | 				return ["END",0]
 555 | 		return ["",0]
 556 | 		"""
 557 | 		if len( self.lines ) > 0:
 558 | 		    if self.reg == "eax" and self.lines[0][1] == "call":
 559 | 			# call is overwriting eax
 560 | 			return ["END",0]
 561 | 		    if self.lines[0][1] == "xor" and self.lines[0][2] == self.reg and self.lines[0][3] == self.reg:
 562 | 			return ["END",0]
 563 | 		    if self.lines[0][1] == "or" and self.lines[0][3] == "0FFFFFFFFh":
 564 | 			return ["END", 0]
 565 | 		    if self.lines[0][1] == "or" and self.lines[0][3] == "-1":
 566 | 			return ["END", 0]
 567 | 		    if self.lines[0][1] == "and" and self.lines[0][2] == self.reg and self.lines[0][3] == "0":
 568 | 			return ["END",0]
 569 | 		    if self.lines[0][2] == self.reg and self.lines[0][1] not in neutral_mnem:
 570 | 			if self.lines[0][3] in x86_registers and self.lines[0][1] == "mov":
 571 | 			   return [self.lines[0][3], 0 ]
 572 | 			if self.lines[0][3] in x86_registers and self.lines[0][1] != "mov":
 573 | 			   return [ self.lines[0][3], 1]
 574 | 			if might_be_immediate( self.lines[0][3]) and self.lines[0][1] != "mov":
 575 | 			   return [ self.lines[0][2], 0]
 576 | 			else:
 577 | 			   return ["END",0]
 578 | 		return ["",0]
 579 | 
 580 | 	def get_target_reg( self ):
 581 | 		"""	Returns either "END", "", or the new register to track at the end of this block
 582 | 	
 583 | 		This code returns eiter "END" if the register is fatally overwritten, "" if the register is dereferenced
 584 | 		or the new register in other cases
 585 | 		"""
 586 | 		if len( self.lines ) > 0:
 587 | 			if self.reg == "eax" and self.lines[-1][1] == "call":
 588 | 		# We have a call that overwrites EAX
 589 | 				return "END"
 590 | 			if self.lines[-1][2] == self.reg and self.lines[-1][1] not in neutral_mnem:
 591 | 		# We have a non-neutral instruction that writes to the register we're tracking
 592 | 				return "END"
 593 | 			elif self.lines[-1][2].find( self.reg ) != -1:
 594 | 		# We have memory access to the location this register is pointing to or an operation on itself
 595 | 				return ""
 596 | 			else:
 597 | 		# If the target is a register, return this register
 598 | 				if self.lines[-1][2] in x86_registers:
 599 | 					return self.lines[-1][2]
 600 | 				else:
 601 | 					return ""
 602 | 		else:
 603 | 			return ""
 604 | 	def get_lines( self ):
 605 | 		return self.lines
 606 | 	def self_to_string( self ):
 607 | 		str = "StartEA: %lx\nEndEA: %lx\nReg: %s\n" % (self.startea, self.endea\
 608 | 			,self.reg)
 609 | 		for line in self.lines:
 610 | 			str = str + disasm_line_to_string( line ) + "\n"
 611 | 		return str
 612 | 	def print_self( self ):
 613 | 		print self.self_to_string()
 614 | 
 615 | def add_data_to_slice_graph( graph, bib ):
 616 | 	for name in bib.keys():
 617 | 		node = graph.Get_Node( name )
 618 | 		node.set_attribute( "label", '"'+bib[name].self_to_string()+'"')
 619 | 	return
 620 | 
 621 | def slice_graph_bwd( endea, reg ):
 622 | 	"""
 623 | 		Creates a slice graph for this register from an EA (no recursion)
 624 | 	""" 
 625 | 	graph = vcg_Graph.vcgGraph({"title":'"Slice for %s"' % reg, \
 626 | 		"manhattan_edges":"no", "layoutalgorithm":"maxdepth"})
 627 | 	#
 628 | 	#   Retrieve the name of the current basic block
 629 | 	#    
 630 | 	worklist = []
 631 | 	data_bib = {}
 632 | 	
 633 | 	startnode = slice_node( 0, endea, reg )		# start at the end of the slice node
 634 | 	rootnode = graph.Add_Node( startnode.to_name() )
 635 | 	data_bib[ startnode.to_name() ] = startnode
 636 | 	worklist.insert( 0, rootnode )
 637 | 	while len( worklist ) > 0:
 638 | 		currnode = worklist.pop()
 639 | 		currslice = data_bib[ currnode.get_name() ]
 640 | 		[tgt_reg, split] = currslice.get_target_reg_bwd()
 641 | 		print tgt_reg
 642 | 		print split
 643 | 		if tgt_reg == "END":
 644 | 			# Do not process this node any further
 645 | 			pass
 646 | 		elif tgt_reg == "" or (( len( currslice.get_lines()) > 0) and \
 647 | 			currslice.startea != currslice.get_lines()[0][0]):
 648 | 			# Do process this node further, nothing really going on 
 649 | 			print "ZEZ"
 650 | 			xrefs = get_crefs_to( currslice.startea )
 651 | 			for ref in xrefs:
 652 | 				newslice = slice_node(  0,ref, currslice.reg )
 653 | 				if graph.Get_Node( newslice.to_name() ) == 0:
 654 | 					newnode = graph.Add_Node( newslice.to_name() )
 655 | 					worklist.insert( 0, newnode )
 656 | 					data_bib[ newslice.to_name() ] = newslice
 657 | 				graph.Add_Link( newslice.to_name(), currnode.get_name() )
 658 | 		else:
 659 | 			xrefs = get_crefs_to( currslice.startea )
 660 | 			for ref in xrefs:
 661 | 				newslice = slice_node( 0,ref, tgt_reg )
 662 | 				if graph.Get_Node( newslice.to_name() ) == 0:
 663 | 					newnode = graph.Add_Node( newslice.to_name() )
 664 | 					worklist.insert( 0, newnode )
 665 | 					data_bib[ newslice.to_name() ] = newslice
 666 | 				graph.Add_Link( newslice.to_name(), currnode.get_name())
 667 | 			xrefs = get_crefs_to( currslice.startea )
 668 | 			if split:
 669 | 				for ref in xrefs:
 670 | 					newslice = slice_node( 0,ref, currslice.reg )
 671 | 					if graph.Get_Node( newslice.to_name() ) == 0:
 672 | 						newnode = graph.Add_Node( newslice.to_name() )
 673 | 						worklist.insert( 0, newnode )
 674 | 						data_bib[ newslice.to_name() ] = newslice
 675 | 					graph.Add_Link( newslice.to_name(), currnode.get_name())
 676 | 	return [ graph, data_bib ]
 677 | 
 678 | def slice_graph_fwd( startea, reg ):
 679 | 	"""
 680 | 		Creates a slice graph for this register from an EA (no recursion)
 681 | 	""" 
 682 | 	graph = vcg_Graph.vcgGraph({"title":'"Slice for %s"' % reg, \
 683 | 		"manhattan_edges":"no", "layoutalgorithm":"maxdepth"})
 684 | 	#
 685 | 	#   Retrieve the name of the current basic block
 686 | 	#    
 687 | 	worklist = []
 688 | 	data_bib = {}
 689 | 	startnode = slice_node( startea, 0, reg )
 690 | 	rootnode = graph.Add_Node( startnode.to_name() )
 691 | 	data_bib[ startnode.to_name() ] = startnode
 692 | 	worklist.insert( 0, rootnode )
 693 | 	while len( worklist ) > 0:
 694 | 		currnode = worklist.pop()
 695 | 		currslice = data_bib[ currnode.get_name() ]
 696 | 		tgt_reg = currslice.get_target_reg()
 697 | 		if tgt_reg == "END":
 698 | 		# Do not process this node any further
 699 | 			pass
 700 | 		elif tgt_reg == "" or (( len( currslice.get_lines()) > 0) and \
 701 | 			currslice.endea != currslice.get_lines()[-1][0]):
 702 | 			# Nothing much happening here, just proceed to parent bocks
 703 | 			if ua_mnem( currslice.endea ) == "call":
 704 | 				xrefs = get_short_crefs_from( currslice.endea )
 705 | 			else:
 706 | 				xrefs = get_crefs_from( currslice.endea )
 707 | 			for ref in xrefs:
 708 | 				newslice = slice_node( ref, 0, currslice.reg )
 709 | 				if graph.Get_Node( newslice.to_name() ) == 0:
 710 | 					newnode = graph.Add_Node( newslice.to_name() )
 711 | 					worklist.insert( 0, newnode )
 712 | 					data_bib[ newslice.to_name() ] = newslice
 713 | 				graph.Add_Link( currnode.get_name(), newslice.to_name())
 714 | 		else:
 715 | 			# Register was modified, use new register
 716 | 			xrefs = get_crefs_from( currslice.endea )
 717 | 			for ref in xrefs:
 718 | 				newslice = slice_node( ref, 0, tgt_reg )
 719 | 				if graph.Get_Node( newslice.to_name() ) == 0:
 720 | 					newnode = graph.Add_Node( newslice.to_name() )
 721 | 					worklist.insert( 0, newnode )
 722 | 					data_bib[ newslice.to_name() ] = newslice
 723 | 				graph.Add_Link( currnode.get_name(), newslice.to_name())
 724 | 			xrefs = get_crefs_from( currslice.endea )
 725 | 			for ref in xrefs:
 726 | 				newslice = slice_node( ref, 0, currslice.reg )
 727 | 				if graph.Get_Node( newslice.to_name() ) == 0:
 728 | 					newnode = graph.Add_Node( newslice.to_name() )
 729 | 					worklist.insert( 0, newnode )
 730 | 					data_bib[ newslice.to_name() ] = newslice
 731 | 				graph.Add_Link( currnode.get_name(), newslice.to_name())
 732 | 	return [ graph, data_bib ]
 733 | 
 734 | def write_slice_graph( intuple, fname ):
 735 | 	newgraph = copy.deepcopy( intuple[0] )
 736 | 	add_data_to_slice_graph( newgraph, intuple[1] )
 737 | 	newgraph.write_VCG_File( fname )
 738 | 
 739 | def get_resolvable_calls( ea_func ):
 740 | 	[graph, bib] = slice_graph_fwd( ea_func, "ecx" )
 741 | 	# search for a node containing "[ecx]" in it's line
 742 | 	vtable_loads = []
 743 | 	calls = []
 744 | 	for name in bib.keys():
 745 | 		lines = bib[name].get_lines()
 746 | 		for line in lines:
 747 | 			if line[3] == "["+ bib[name].reg +"]":
 748 | 				vtable_loads.append( [line[0], line[2]] )
 749 | 	for load in vtable_loads:
 750 | 		[graph, bib] = slice_graph_fwd( load[0] + get_item_size( load[0]) \
 751 | 			, load[1] )
 752 | 		for name in bib.keys():
 753 | 			lines = bib[name].get_lines()
 754 | 			for line in lines:
 755 | 				if line[1] == "call":
 756 | 					calls.append( [line[0], line[2]] )
 757 | 	#for x in calls:
 758 | 	#    print "%lx:" % x[0]
 759 | 	return calls
 760 | 
 761 | def get_subfuncs_with_same_thisptr( ea_func ):
 762 | 	[graph, bib] = slice_graph_fwd( ea_func, "ecx" )
 763 | 	funcs = []
 764 | 	#
 765 | 	#   Now get all slice blocks which have "ecx" on them and look for subfunction
 766 | 	#   calls in them
 767 | 	#
 768 | 	for slicename in bib.keys():
 769 | 		slice = bib[ slicename ]
 770 | 		if slice.reg == "ecx":
 771 | 			begin = slice.startea
 772 | 			while begin <= slice.endea:
 773 | 				if ua_mnem( begin ) == "call":
 774 | 					tgt = get_first_fcref_from( begin )
 775 | 					if tgt != BADADDR:
 776 | 						funcs.append( tgt )
 777 | 				begin = begin + get_item_size( begin )
 778 | 	return funcs
 779 | 
 780 | def get_subfuncs_with_same_thisptr_rec( ea_func ):
 781 | 	funcdict = {}
 782 | 	worklist = []
 783 | 	worklist.append( ea_func )
 784 | 	funcdict[ ea_func ] = 1
 785 | 	while len( worklist ) > 0:
 786 | 		ea = worklist.pop()
 787 | 		funcs = get_subfuncs_with_same_thisptr( ea )
 788 | 		for func in funcs:
 789 | 			if not funcdict.has_key( func ):
 790 | 				funcdict[ func ] = 1
 791 | 				worklist.append( func )
 792 | 	funcs = []
 793 | 	for x in funcdict.keys():
 794 | 		funcs.append( x )
 795 | 	return funcs
 796 | 			
 797 | def resolve_indirect_calls_in_vtable_recursive( vtable_begin, vtable_end ):
 798 | 	targetdict = {}
 799 | 	current = vtable_begin
 800 | 	changed = 1
 801 | 	newlist = []
 802 | 	while changed:
 803 | 		changed = 0
 804 | 		current = vtable_begin
 805 | 		while current <= vtable_end:
 806 | 			tgts = get_subfuncs_with_same_thisptr_rec( get_first_dref_from( current ))
 807 | 			for tgt in tgts:
 808 | 				if targetdict.has_key( tgt ):
 809 | 					pass
 810 | 				else:
 811 | 					targetdict[ tgt ] = tgt
 812 | 					changed = 1
 813 | 					newlist.append( tgt )
 814 | 			current = current + 4
 815 | 		# iterated over vtable once, now resolve one step
 816 | 		if changed == 1:
 817 | 			while len( newlist ) > 0:
 818 | 				f = newlist.pop()
 819 | 				#print "%lx" % f
 820 | 				calls = get_resolvable_calls( f )
 821 | 				for call in calls:
 822 | 					#print "%lx: %s" % ( call[0], call[1])
 823 | 					resolve_call( call, vtable_begin )
 824 | #
 825 | #   Excuse the erratic indentation
 826 | #
 827 | def resolve_call( call, vtable_begin ):
 828 | 	if call[1].find( "dword" ) != -1:
 829 | 		newcall = "0x" + call[1][ call[1].find('[')+5:-2]
 830 | 		if newcall == "0x":
 831 | 			newcall = "0"
 832 | 		offset = string.atol( newcall, 16 )
 833 | 		target = get_first_dref_from( vtable_begin + offset )
 834 | 		if target == BADADDR:
 835 | 			print "%lx: BADADDR as target from vtable at %lx, offset %lx\n" \
 836 | 				% (call[0], vtable_begin, offset)
 837 | 		else:
 838 | 			xrefs = get_far_crefs_from( call[0] )
 839 | 			if target not in xrefs:
 840 | 				if get_cmt( call[0], 0 ) != None:
 841 | 					newcmt = get_cmt( call[0], 0 ) + "target: 0x%lx\n" % target
 842 | 				else:
 843 | 					newcmt = "target: 0x%lx\n" % target
 844 | 				set_cmt( call[0], newcmt, 0 )
 845 | 				add_cref( call[0], target, fl_CN )
 846 | 			print "%lx: --> %lx" % ( call[0], target )
 847 | 
 848 | def resolve_indirect_calls_in_vtable( vtable_begin, vtable_end):
 849 | 	current = vtable_begin
 850 | 	while current <= vtable_end:
 851 | 		#print "%lx: getting graph..." % get_first_dref_from( current )
 852 | 		calls = get_resolvable_calls( get_first_dref_from( current ) )
 853 | 		for call in calls:
 854 | 			#
 855 | 			#   strip stuff from call
 856 | 			#
 857 | 			resolve_call( call, vtable_begin )
 858 | 		current = current + 4
 859 | 
 860 | def find_vtables_aggressive( firstaddr = 0, lastaddr = 0x7FFFFFFF ):
 861 | 	"""
 862 | 		Returns list of begin/end tuples for vtables found in the executable
 863 | 		A table is considered a vtable if:
 864 | 			it consists of at least 1 pointers to functions
 865 | 			it's offset is written to a register in the form [reg]
 866 | 	"""
 867 | 	valid_reg_strings = [ "[eax", "[ebx", "[ecx", "[edx", "[esi", "[edi",\
 868 | 		"[ebp" ]
 869 | 	if firstaddr == 0:
 870 | 		startaddr = nextaddr( firstaddr)
 871 | 	else:
 872 | 		startaddr = firstaddr
 873 | 	vtables = []
 874 | 	while startaddr != BADADDR:
 875 | 		#
 876 | 		#   Check if the offset is written 
 877 | 		#
 878 | 		xrefs = get_drefs_to( startaddr )
 879 | 		is_written_to_beginning = 0
 880 | 		for xref in xrefs:
 881 | 			line = get_disasm_line( xref )
 882 | 			if len( line ) >= 3:
 883 | 				for reg in valid_reg_strings:
 884 | 					if line[2].find( reg ) != -1:
 885 | 						is_written_to_beginning = 1
 886 | 		#
 887 | 		#   Check if 
 888 | 		#
 889 | 		i = 0
 890 | 		if is_written_to_beginning == 1:
 891 | 			while get_first_dref_from( startaddr + (4 * (i+1))) != BADADDR:
 892 | 				ea = get_first_dref_from( startaddr + (4*i))
 893 | 				func = get_func( ea )
 894 | 				try:
 895 | 					if func.startEA != ea:
 896 | 						break
 897 | 				except( AttributeError ):
 898 | 					break;
 899 | 				i = i + 1
 900 | 				if len( get_drefs_to( startaddr + ( 4 * (i)))) != 0:
 901 | 					break;
 902 | 		if i > 0:
 903 | 			vtables.append( [ startaddr, startaddr + (4*i) ] )
 904 | 		if i > 0:
 905 | 			startaddr = startaddr + i*4
 906 | 		elif get_item_size( startaddr ) != 0:
 907 | 			startaddr = startaddr + get_item_size( startaddr )
 908 | 		else:
 909 | 			startaddr = startaddr + 1
 910 | 		if nextaddr( startaddr ) == BADADDR:
 911 | 			break
 912 | 		if startaddr >= lastaddr:
 913 | 			break
 914 | 	return vtables
 915 | 
 916 | def find_vtables( firstaddr = 0, lastaddr = 0x7FFFFFFF ):
 917 | 	"""
 918 | 		Returns list of begin/end tuples for vtables found in the executable
 919 | 		A table is considered a vtable if:
 920 | 			it consists of at least 2 pointers to functions
 921 | 			it's offset is written to a register in the form [reg]
 922 | 	"""
 923 | 	valid_reg_strings = [ "[eax]", "[ebx]", "[ecx]", "[edx]", "[esi]", "[edi]",\
 924 | 		"[ebp]" ]
 925 | 	if firstaddr == 0:
 926 | 		startaddr = nextaddr( firstaddr)
 927 | 	else:
 928 | 		startaddr = firstaddr
 929 | 	vtables = []
 930 | 	while startaddr != BADADDR:
 931 | 		#
 932 | 		#   Check if the offset is written 
 933 | 		#
 934 | 		xrefs = get_drefs_to( startaddr )
 935 | 		is_written_to_beginning = 0
 936 | 		for xref in xrefs:
 937 | 			line = get_disasm_line( xref )
 938 | 			if len( line ) >= 3:
 939 | 				for reg in valid_reg_strings:
 940 | 					if line[2].find( reg ) != -1:
 941 | 						is_written_to_beginning = 1
 942 | 		#
 943 | 		#   Check if 
 944 | 		#
 945 | 		i = 0
 946 | 		if is_written_to_beginning == 1:
 947 | 			while get_first_dref_from( startaddr + (4 * (i+1))) != BADADDR:
 948 | 				ea = get_first_dref_from( startaddr + (4*i))
 949 | 				func = get_func( ea )
 950 | 				try:
 951 | 					if func.startEA != ea:
 952 | 						break
 953 | 				except( AttributeError ):
 954 | 					break;
 955 | 				i = i + 1
 956 | 		if i > 2:
 957 | 			vtables.append( [ startaddr, startaddr + (4*i) ] )
 958 | 		if i > 0:
 959 | 			startaddr = startaddr + i*4
 960 | 		elif get_item_size( startaddr ) != 0:
 961 | 			startaddr = startaddr + get_item_size( startaddr )
 962 | 		else:
 963 | 			startaddr = startaddr + 1
 964 | 		if nextaddr( startaddr ) == BADADDR:
 965 | 			break
 966 | 		if startaddr >= lastaddr:
 967 | 			break
 968 | 	return vtables
 969 | 
 970 | def create_class_from_constructor( constr_addr, strucname ):
 971 | 	liste = get_addr_ofs_list_from_func( constr_addr, 'ecx' )
 972 | 	addr_ofs_list_to_IDC( liste, strucname, "c:\\makestruc.idc" )
 973 | 
 974 | def create_struct_from_ea( ea, reg, strucname):
 975 | 	[graph, bib] = slice_graph_fwd( ea, reg )
 976 | 	addr_ofs_list = []
 977 | 	for key in bib.keys():
 978 | 		slice = bib[ key ]
 979 | 		for line in slice.get_lines():
 980 | 			#   check if the register is in Op1
 981 | 			if line[2].find( slice.reg ) != -1:
 982 | 				op_parts = line[2].split()
 983 | 				op = op_parts[-1]
 984 | 				if op[-1] == ']':
 985 | 					opoffset = op[4:-1]
 986 | 					if opoffset == "":
 987 | 						offset = 0
 988 | 					else:
 989 | #                        print "%s" % opoffset
 990 | 						if opoffset[-1] == 'h':
 991 | 							try:
 992 | 								offset = string.atol( opoffset[1:-1], 16 )
 993 | 							except ValueError:
 994 | 								op2 = opoffset[1:-1].split('+')[-1]
 995 | 								try:
 996 | 									offset = string.atol( op2, 16 )
 997 | 								except ValueError:
 998 | 									print op2
 999 | 									offset = 0
1000 | 						else:
1001 | 							try:
1002 | 								offset = string.atol( opoffset[1:], 16 )
1003 | 							except ValueError:
1004 | 								print opoffset[1:]
1005 | 								offset = 0
1006 | 					addr_ofs_list.append( (line[0], offset, 0) )
1007 | 				# Work on operand 1
1008 | 			if line[3].find( slice.reg ) != -1:
1009 | 				# Work on operand 2
1010 | 				op_parts = line[3].split()
1011 | 				op = op_parts[-1]
1012 | 				if op[-1] == ']':
1013 | 					opoffset = op[4:-1]
1014 | 					opoffset = op[4:-1]
1015 | 					if opoffset == "":
1016 | 						offset = 0
1017 | 					else:
1018 | #                        print "%s" % opoffset
1019 | 						if opoffset[-1] == 'h':
1020 | 							if opoffset[1:-1].find("+") != -1:
1021 | 								print opoffset
1022 | 								opoffset = opoffset[1:-1].split("+")[-1]
1023 | 							offset = string.atol( opoffset[1:-1], 16 )
1024 | 						else:
1025 | 							try:
1026 | 								offset = string.atol( opoffset[1:], 16 )
1027 | 							except ValueError:
1028 | 								print opoffset[1:]
1029 | 								offset = 0
1030 | 					addr_ofs_list.append( (line[0], offset, 1) )
1031 | 	#addr_ofs_list_to_IDC( addr_ofs_list, strucname, "c:\\makestruc.idc" )
1032 | 	assign_structure_members( addr_ofs_list, strucname )
1033 | 
1034 | def assign_structure_members( results, structure_name ):
1035 | 	strucid = idc.GetStrucIdByName( structure_name )
1036 | 	if strucid == 0xFFFFFFFF:
1037 | 		print "Adding structure %s" % structure_name
1038 | 		strucid = idc.AddStrucEx( -1, structure_name, 0 )
1039 | 	for ref in results:
1040 | 		idc.AddStrucMember( strucid, "mem_%lx" % ref[1], ref[1], FF_BYTE, -1, 1 );
1041 | 		#AddStrucMember( strucid, "mem_%lx" % ref[1], ref[1], FF_BYTE|FF_DATA, -1, 1 )
1042 | 		idc.OpStroffEx( ref[0], ref[2], strucid, 0 )
1043 | 
1044 | def track_register( address, register ):
1045 | 	return get_addr_ofs_list_from_addr( address, register )
1046 | 
1047 | def get_addr_ofs_list_from_func( funcea, register ):
1048 | 	return get_addr_ofs_list_from_addr( funcea, register )
1049 | 
1050 | def get_addr_ofs_list_from_addr( funcea, register ):
1051 | 	"""
1052 | 		Since a lot of structure manipulation can't be done from IDAPython(yet),
1053 | 		we have to create an external IDC :-(((((
1054 | 	"""
1055 | 	ea = funcea
1056 | 	reg = register
1057 | 	[graph, bib] = slice_graph_fwd( ea, reg )
1058 | 	addr_ofs_list = []
1059 | 	for key in bib.keys():
1060 | 		slice = bib[ key ]
1061 | 		for line in slice.get_lines():
1062 | 			#   check if the register is in Op1
1063 | 			if line[2].find( slice.reg ) != -1:
1064 | 				op_parts = line[2].split()
1065 | 				op = op_parts[-1]
1066 | 				if op[-1] == ']':
1067 | 					opoffset = op[4:-1]
1068 | 					if opoffset == "":
1069 | 						offset = 0
1070 | 					else:
1071 | #                        print "%s" % opoffset
1072 | 						if opoffset[-1] == 'h':
1073 | 							try:
1074 | 								offset = string.atol( opoffset[1:-1], 16 )
1075 | 							except ValueError:
1076 | 								op2 = opoffset[1:-1].split('+')[-1]
1077 | 								try:
1078 | 									offset = string.atol( op2, 16 )
1079 | 								except ValueError:
1080 | 									print op2
1081 | 									offset = 0
1082 | 						else:
1083 | 							try:
1084 | 								offset = string.atol( opoffset[1:], 16 )
1085 | 							except ValueError:
1086 | 								print opoffset[1:]
1087 | 								offset = 0
1088 | 					addr_ofs_list.append( (line[0], offset, 0) )
1089 | 				# Work on operand 1
1090 | 			if line[3].find( slice.reg ) != -1:
1091 | 				# Work on operand 2
1092 | 				op_parts = line[3].split()
1093 | 				op = op_parts[-1]
1094 | 				if op[-1] == ']':
1095 | 					opoffset = op[4:-1]
1096 | 					opoffset = op[4:-1]
1097 | 					if opoffset == "":
1098 | 						offset = 0
1099 | 					else:
1100 | #                        print "%s" % opoffset
1101 | 						if opoffset[-1] == 'h':
1102 | 							if opoffset[1:-1].find("+") != -1:
1103 | 								print opoffset
1104 | 								opoffset = opoffset[1:-1].split("+")[-1]
1105 | 								offset = string.atol( opoffset, 16 )
1106 | 							else:
1107 | 								offset = string.atol( opoffset[1:-1], 16 )
1108 | 						else:
1109 | 							try:
1110 | 								offset = string.atol( opoffset[1:], 16 )
1111 | 							except ValueError:
1112 | 								print opoffset[1:]
1113 | 								offset = 0
1114 | 					addr_ofs_list.append( (line[0], offset, 1) )
1115 | 	return addr_ofs_list
1116 | 
1117 | def track_register_back( funcea, register ):
1118 | 	"""
1119 | 		Since a lot of structure manipulation can't be done from IDAPython(yet),
1120 | 		we have to create an external IDC :-(((((
1121 | 	"""
1122 | 	ea = funcea
1123 | 	reg = register
1124 | 	[graph, bib] = slice_graph_bwd( ea, reg )
1125 | 	addr_ofs_list = []
1126 | 	for key in bib.keys():
1127 | 		slice = bib[ key ]
1128 | 		for line in slice.get_lines():
1129 | 			#   check if the register is in Op1
1130 | 			if line[2].find( slice.reg ) != -1:
1131 | 				op_parts = line[2].split()
1132 | 				op = op_parts[-1]
1133 | 				if op[-1] == ']':
1134 | 					opoffset = op[4:-1]
1135 | 					if opoffset == "":
1136 | 						offset = 0
1137 | 					else:
1138 | #                        print "%s" % opoffset
1139 | 						if opoffset[-1] == 'h':
1140 | 							offset = string.atol( opoffset[1:-1], 16 )
1141 | 						else:
1142 | 							try:
1143 | 								offset = string.atol( opoffset[1:], 16 )
1144 | 							except ValueError:
1145 | 								print opoffset[1:]
1146 | 								offset = 0
1147 | 					addr_ofs_list.append( (line[0], offset, 0) )
1148 | 				# Work on operand 1
1149 | 			if line[3].find( slice.reg ) != -1:
1150 | 				# Work on operand 2
1151 | 				op_parts = line[3].split()
1152 | 				op = op_parts[-1]
1153 | 				if op[-1] == ']':
1154 | 					opoffset = op[4:-1]
1155 | 					opoffset = op[4:-1]
1156 | 					if opoffset == "":
1157 | 						offset = 0
1158 | 					else:
1159 | #                        print "%s" % opoffset
1160 | 						if opoffset[-1] == 'h':
1161 | 							if opoffset[1:-1].find("+") != -1:
1162 | 								print opoffset
1163 | 								opoffset = opoffset[1:-1].split("+")[-1]
1164 | 							offset = string.atol( opoffset[1:-1], 16 )
1165 | 						else:
1166 | 							try:
1167 | 								offset = string.atol( opoffset[1:], 16 )
1168 | 							except ValueError:
1169 | 								print opoffset[1:]
1170 | 								offset = 0
1171 | 					addr_ofs_list.append( (line[0], offset, 1) )
1172 | 	return addr_ofs_list
1173 | 
1174 | def reconstruct_class_from_vtable( classname, vtable_begin, vtable_end ):
1175 | 	current = vtable_begin
1176 | 	whole_list = []
1177 | 	while current <= vtable_end:
1178 | 		tgts = get_drefs_from( current )
1179 | 		for x in tgts:
1180 | 			list = get_addr_ofs_list_from_func( x, "ecx")
1181 | 			for y in list:
1182 | 				whole_list.append( y )
1183 | 		current = current+4
1184 | 	addr_ofs_list_to_IDC( whole_list, classname, "c:\\makestruc.idc" )
1185 | 
1186 | def addr_ofs_list_to_IDC( addr_ofs_list, strucname, idcname = "makestruc.idc" ):
1187 | 	idc_intro = '#include <idc.idc>\n'
1188 | 	idc_intro = idc_intro+'static main(){\n'
1189 | 	idc_intro = idc_intro+'\tauto strucid;\n'
1190 | 	idc_intro = idc_intro+'\tstrucid = GetStrucIdByName("%s");\n'
1191 | 	idc_intro = idc_intro+'\tif( strucid == -1 )\n'
1192 | 	idc_intro = idc_intro+'\t\tstrucid = AddStruc( 0, "%s" );\n'
1193 | 	idc_intro = idc_intro % (strucname, strucname)
1194 | 	outidc = file( idcname, "wt" )
1195 | 	outidc.write(idc_intro)
1196 | 	offset_dict = {}
1197 | 	for add_ofs in addr_ofs_list:
1198 | 		if offset_dict.has_key( add_ofs[1] ):
1199 | 			outidc.write( "\tOpStroffEx( 0x%lx, %d, strucid, 0 );\n" % (add_ofs[0], add_ofs[2] ))
1200 | 		else:
1201 | 			offset_dict[add_ofs[1]] = add_ofs[0]
1202 | 			outidc.write( '\tAddStrucMember( strucid, "mem_%lx", %d, FF_BYTE, -1, 1 );\n' % (add_ofs[1], add_ofs[1]) )
1203 | 			outidc.write( "\tOpStroffEx( 0x%lx, %d, strucid, 0 );\n" % (add_ofs[0], add_ofs[2] ))
1204 | 	outidc.write( "}" )
1205 | 	outidc.close()
1206 | 	
1207 | def count_indirect_calls():
1208 | 	startaddr = nextaddr( 0 )
1209 | 	pass
1210 | 
1211 | def oop_indirect_call_resolver():
1212 | 	print "============================================"
1213 | 	print " SABRE OOP IDAPython Scripts       (c) 2005 "
1214 | 	print " [!] Counting unresolved indirect calls ... "
1215 | 	
1216 | 	print " [!] Detecting vtables ... "
1217 | 	vtbls = find_vtables()
1218 | 	print " [!] Resolving indirect calls ... "
1219 | 	for table in vtbls:
1220 | 		resolve_indirect_calls_in_vtable_recursive( table[0], table[1] )
1221 | 	print " [!] Counting unresolved calls again ... "
1222 | 
1223 | 
1224 | def create_cluster_graph():
1225 | 	i = 0
1226 | 	cluster_graph = vcg_Graph.vcgGraph()
1227 | 	while i < get_func_qty():
1228 | 		current_func = getn_func( i )
1229 | 		print "Processing function at %lx" % current_func.startEA
1230 | 		start_ea = current_func.startEA
1231 | 		subfuncs = get_subfuncs_with_same_thisptr( start_ea )
1232 | 		if len( subfuncs ) > 0:
1233 | 			sourcename = get_name( BADADDR, start_ea )
1234 | 			if sourcename == None:
1235 | 				sourcename = "%lx" % start_ea
1236 | 			node = cluster_graph.Get_Node( sourcename )
1237 | 			if node == 0:
1238 | 				node = cluster_graph.Add_Node( sourcename )
1239 | 				node.set_attribute( "label", '"' + sourcename + '"')
1240 | 			for targetea in subfuncs:
1241 | 				targetname = get_name( BADADDR, targetea )
1242 | 				if targetname == None:
1243 | 					targetname = "%lx" % targetea 
1244 | 				targetnode = cluster_graph.Get_Node( targetname )
1245 | 				if targetnode == 0:
1246 | 					targetnode = cluster_graph.Add_Node( targetname )
1247 | 					targetnode.set_attribute( "label", '"' + targetname + '"')
1248 | 			cluster_graph.Add_Link( sourcename, targetname )
1249 | 		i = i + 1
1250 | 	cluster_graph.write_VCG_File( "c:\\cluster.vcg" )
1251 | 	foo = cluster_graph.make_GML_output()
1252 | 	outfile = file("c:\\cluster.gml", "wt" )
1253 | 	outfile.write( foo )
1254 | 	outfile.close()
1255 | 	return cluster_graph
1256 | 
1257 | def merge_flowgraphs_no_link( graphlist ):
1258 | 	merged_graph = vcg_Graph.vcgGraph()
1259 | 	for graph in graphlist:
1260 | 		nodes = graph.get_nodes()
1261 | 		for nodename in nodes.keys():
1262 | 			merged_graph.Add_Node( nodename )
1263 | 	# Done adding all nodes. Now add all edges
1264 | 	for graph in graphlist:
1265 | 		links = graph.get_links()
1266 | 		for link in links:
1267 | 			merged_graph.Add_Link( link.get_sourcename(), link.get_targetname())
1268 | 	return merged_graph
1269 | 	
1270 | def merge_flowgraphs( graphlist ):
1271 | 	merged_graph = vcg_Graph.vcgGraph()
1272 | 	for graph in graphlist:
1273 | 		nodes = graph.get_nodes()
1274 | 		for nodename in nodes.keys():
1275 | 			merged_graph.Add_Node( nodename )
1276 | 	# Done adding all nodes. Now add all edges
1277 | 	for graph in graphlist:
1278 | 		links = graph.get_links()
1279 | 		for link in links:
1280 | 			merged_graph.Add_Link( link.get_sourcename(), link.get_targetname())
1281 | 	merged_graph.write_VCG_File( "c:\\merged.vcg" )
1282 | 	# Now add edges to link the graphs together
1283 | 	i = len( graphlist )-1
1284 | 	while i != 0:
1285 | 		parentg = graphlist[i-1]
1286 | 		childg = graphlist[i]
1287 | 		child_topnode = childg.Get_Top_Nodes()[0]
1288 | 		parent_topnode = parentg.Get_Top_Nodes()[0]
1289 | 		parent_calls = get_calls_in_function( string.atol( parent_topnode, 16) )
1290 | 		print "processing parent %s, child %s" % (parent_topnode, child_topnode)
1291 | 		for call in parent_calls:
1292 | 			# check and link
1293 | 			targetlist = get_far_crefs_from( call[0] )
1294 | 			for target in targetlist:
1295 | 				if "%x" % target == child_topnode:
1296 | 					block = get_basic_block( call[0] )
1297 | 					print "Adding link from %x to %s" % (block[0][0], child_topnode)
1298 | 					merged_graph.Get_Node( "%x" % target ).set_attribute("color", "red")
1299 | 					merged_graph.Add_Link( "%x" % block[0][0], child_topnode )
1300 | 		i = i - 1
1301 | 	return merged_graph
1302 | 
1303 | def inline_subfuncs_special( flowgraph ):
1304 | 	newedgelist = []
1305 | 	mergelist = [ flowgraph ]
1306 | 	for address in flowgraph.get_nodes().keys():
1307 | 		block = get_basic_block( string.atol( address, 16 ))
1308 | 		if block[ -1 ][1] == "call":
1309 | 			print "%lx: %s" % ( block[-1][0], block[-1][1])
1310 | 			xrefs_from = get_far_crefs_from( block[-1][0] )	
1311 | 			print xrefs_from
1312 | 			for target in xrefs_from:
1313 | 				if flowgraph.Get_Node( "%x" % target ) == 0:
1314 | 					reroute_edge_source = "%x" % block[0][0]
1315 | 					callchild = flowgraph.Get_Children( "%x" % block[0][0] )
1316 | 					if len( callchild ) > 0:
1317 | 						reroute_edge_target = callchild[ 0 ] 
1318 | 					else:
1319 | 						print "Could not find children of '%x'!\n" % block[0][0]
1320 | 						add_disasm_lines_to_flowgraph( flowgraph )
1321 | 						flowgraph.write_VCG_File("c:\\foo.vcg")
1322 | 						reroute_edge_target = ""
1323 | 						#print callchild[1]
1324 | 					# Get the flowgraph and merge it in
1325 | 					newgraph = create_flowgraph_from( target )
1326 | 					mergelist.append( newgraph )
1327 | 					print "inlining %lx between nodes %s and %s" % (target, reroute_edge_source, reroute_edge_target)
1328 | 					newedgelist.append( ( reroute_edge_source, reroute_edge_target, newgraph ) )
1329 | 	# allright, now merge stuff
1330 | 	print mergelist
1331 | 	mergedgraph = merge_flowgraphs_no_link( mergelist )
1332 | 	#mergedgraph.write_VCG_File("C:\\murged.vcg")
1333 | 	for triplet in newedgelist:
1334 | 		topnode = triplet[2].Get_Top_Nodes()[0]
1335 | 		bottomnodes = triplet[2].Get_Bottom_Nodes()
1336 | 		mergedgraph.Add_Link( triplet[0], topnode )
1337 | 		reroute_edge_target = triplet[1]
1338 | 		if reroute_edge_target != "":
1339 | 			for node in bottomnodes:
1340 | 				mergedgraph.Add_Link( node, reroute_edge_target )
1341 | 			mergedgraph.Del_Link2( reroute_edge_source, reroute_edge_target )
1342 | 	for address in flowgraph.get_nodes().keys():
1343 | 		mergedgraph.Get_Node( address ).set_attribute( "color", "lightblue" )
1344 | 	return mergedgraph
1345 | 
1346 | def inline_subfuncs_into( flowgraph ):
1347 | 	newedgelist = []
1348 | 	mergelist = [ flowgraph ]
1349 | 	for address in flowgraph.get_nodes().keys():
1350 | 		block = get_basic_block( string.atol( address, 16 ))
1351 | 		if block[ -1 ][1] == "call":
1352 | 			print "%lx: %s" % ( block[-1][0], block[-1][1])
1353 | 			xrefs_from = get_far_crefs_from( block[-1][0] )	
1354 | 			callchild = flowgraph.Get_Children( "%x" % block[0][0] )
1355 | 			if len( callchild ) > 0:
1356 | 				childaddr = string.atol( callchild[0], 16 )
1357 | 				if childaddr in xrefs_from:
1358 | 					continue
1359 | 			for target in xrefs_from:
1360 | 				if flowgraph.Get_Node( "%x" % target ) == 0:
1361 | 					reroute_edge_source = "%x" % block[0][0]
1362 | 					callchild = flowgraph.Get_Children( "%x" % block[0][0] )
1363 | 					if len( callchild ) > 0:
1364 | 						reroute_edge_target = callchild[ 0 ] 
1365 | 					else:
1366 | 						print "Could not find children of '%x'!\n" % block[0][0]
1367 | 						add_disasm_lines_to_flowgraph( flowgraph )
1368 | 						flowgraph.write_VCG_File("c:\\foo.vcg")
1369 | 						reroute_edge_target = ""
1370 | 						#print callchild[1]
1371 | 					# Get the flowgraph and merge it in
1372 | 					newgraph = create_flowgraph_from( target )
1373 | 					mergelist.append( newgraph )	
1374 | 					newedgelist.append( ( reroute_edge_source, reroute_edge_target, newgraph ) )
1375 | 	# allright, now merge stuff
1376 | 	print mergelist
1377 | 	mergedgraph = merge_flowgraphs_no_link( mergelist )
1378 | 	#mergedgraph.write_VCG_File("C:\\murged.vcg")
1379 | 	for triplet in newedgelist:
1380 | 		topnode = triplet[2].Get_Top_Nodes()[0]
1381 | 		bottomnodes = triplet[2].Get_Bottom_Nodes()
1382 | 		mergedgraph.Add_Link( triplet[0], topnode )
1383 | 		reroute_edge_target = triplet[1]
1384 | 		if reroute_edge_target != "":
1385 | 			for node in bottomnodes:
1386 | 				mergedgraph.Add_Link( node, reroute_edge_target )
1387 | 			mergedgraph.Del_Link2( reroute_edge_source, reroute_edge_target )
1388 | 	for address in flowgraph.get_nodes().keys():
1389 | 		mergedgraph.Get_Node( address ).set_attribute( "color", "lightblue" )
1390 | 	return mergedgraph
1391 | 	
1392 | def create_flowgraph_from( address ):
1393 | 	"""
1394 | 		Simple function to generate a flowgraph from an address (forwards)
1395 | 	"""
1396 | 	flowgraph = vcg_Graph.vcgGraph()
1397 | 	worklist = [ get_basic_block( address ) ]
1398 | 	flowgraph.Add_Node( "%x" % worklist[0][0][0] )	
1399 | 	while len( worklist ) != 0:
1400 | 		current_block = worklist.pop(0)
1401 | 		if current_block[-1][1] != "call":
1402 | 			nextblocks = get_crefs_from( current_block[-1][0] )
1403 | 		else:
1404 | 			nextblocks = get_short_crefs_from( current_block[-1][0] )
1405 | 		for blockaddr in nextblocks:
1406 | 			block = get_basic_block( blockaddr )
1407 | 			if not flowgraph.has_node( "%x" % block[0][0] ):
1408 | 				newnode = flowgraph.Add_Node( "%x" % block[0][0] )
1409 | 				worklist.append( block )
1410 | 			flowgraph.Add_Link( "%x" % current_block[0][0], "%x" % block[0][0] )
1411 | 	return flowgraph
1412 | 
1413 | def remove_nodes_below( flowgraph, list_of_eas ):
1414 | 	# initialize the begin list
1415 | 	preservedict = {}
1416 | 	worklist = []
1417 | 	for node in flowgraph.get_nodes().keys():
1418 | 		preservedict[ node ] = 0
1419 | 	for addr in list_of_eas:
1420 | 		block = get_basic_block( addr )
1421 | 		worklist.append(  "%x" % block[0][0] )
1422 | 	while len( worklist ) != 0:
1423 | 		if preservedict[ worklist[0] ] == 0:
1424 | 			preservedict[ worklist[0] ] = 1
1425 | 			newlist = flowgraph.Get_Parents( worklist[0] )
1426 | 			worklist = worklist[1:] + newlist
1427 | 		else:
1428 | 			worklist = worklist[1:]
1429 | 	for (node,val) in preservedict.items():
1430 | 		if val == 0:
1431 | 			flowgraph.Del_Node( node )
1432 | 	return flowgraph
1433 | 
1434 | def write_flowgraph( flowgraph, fname ):
1435 | 	newgraph = copy.deepcopy( flowgraph )
1436 | 	add_disasm_lines_to_flowgraph( newgraph )
1437 | 	newgraph.write_VCG_File( fname )
1438 | 
1439 | #def write_flowgraph_syntax_highlighted( flowgraph, fname ):
1440 | #	newgraph = copy.deepcopy( flowgraph )
1441 | #	add_disasm_lines_to_flowgraph( flowgraph )
1442 | #	flowgraph.write_VCG_File( fname )
1443 | 
1444 | def add_disasm_lines_to_flowgraph( flowgraph ):
1445 | 	for nodetup in flowgraph.nodes.items():
1446 | 		node = nodetup[1]
1447 | 		block = get_basic_block_from( string.atol( node.get_name(), 16 ))
1448 | 		insn_string = ""
1449 | 		for instruction in block:
1450 | 			insn_string = insn_string + ( "%x: %s %s %s\n" % 
1451 | 				(instruction[0], instruction[1], instruction[2], instruction[3]))
1452 | 			insn_string = idaline_to_string( insn_string )
1453 | 		node.set_attribute("label", '"'+insn_string+'"')
1454 | 
1455 | def get_calls_in_function( ea ):
1456 | 	"""
1457 | 		Returns a list with call instructions in a given function
1458 | 	"""
1459 | 	callist = []
1460 | 	flowgraph = create_flowgraph_from( ea )
1461 | 	for x in flowgraph.nodes.items():
1462 | 		name = x[0]
1463 | 		block = get_basic_block( string.atol( name, 16 ))
1464 | 		for instruction in block:
1465 | 			if instruction[ 1 ] == "call":
1466 | 				callist.append( instruction )
1467 | 	return callist
1468 | 
1469 | def get_calls_in_function_ext( ea ):
1470 | 	"""
1471 | 		Like get_calls_in_function, but returns list with instructions prepended by function EA in which they are
1472 | 	"""
1473 | 	calls = get_calls_in_function( ea )
1474 | 	for call in calls:
1475 | 		call.insert(0, ea)
1476 | 	return calls
1477 | 
1478 | def create_reachgraph_from_delta_graph( deltagraph, distance ):
1479 | 	"""
1480 | 		Returns a graph of "reachable's with a given stack delta"
1481 | 	"""
1482 | 	reachgraph = vcg_Graph.vcgGraph()
1483 | 	reachgraph.set_attribute("manhattan_edges", "no" )
1484 | 	print "Creating Reachgraph"
1485 | 	original_node_dict = {}
1486 | 	delta_node_dict = {}
1487 | 	
1488 | 	rootlist = deltagraph.Get_Top_Nodes()
1489 | 	new_node_str = rootlist[0] + "::0"
1490 | 	original_node_dict[ new_node_str ] = rootlist[0]
1491 | 	delta_node_dict[ new_node_str ] = 0
1492 | 	
1493 | 	worklist = [ reachgraph.Add_Node( new_node_str ) ]
1494 | 	worklist[0].set_attribute( "label", '"%s(%d)(%d)-%s"' % (get_name( 0, string.atol( rootlist[0] ,16 )),0,get_real_frame_size(string.atol(rootlist[0],16)), rootlist[0]))
1495 | 	while len( worklist ) != 0:
1496 | 		current_node = worklist.pop(0)
1497 | 		#	Retrieve the delta and original node of this node 
1498 | 		orig_node = original_node_dict[ current_node.get_name() ]
1499 | 		curr_delta = delta_node_dict[ current_node.get_name() ]
1500 | 		#	Get the outgoing edges of the original node
1501 | 		down_links = deltagraph.Get_Downlinks_From( orig_node )
1502 | 		for link in down_links:
1503 | 			#	Get the delta associated with this edge
1504 | 			link_delta_str = link.get_attribute("label")[1:-1]
1505 | 			link_delta = string.atol( link_delta_str, 10 )
1506 | 			#	Calculate the target's new delta
1507 | 			target_delta = curr_delta + link_delta - 4	# - 4 is for the size of EIP on the stack
1508 | 			#	Construct the name for the new node to be added
1509 | 			new_node_str = link.get_targetname() + "::" + "%d" % target_delta 
1510 | 			newaddr = string.atol( link.get_targetname(), 16)
1511 | 			framesize = get_real_frame_size(  newaddr )
1512 | 				
1513 | 			original_node_dict[ new_node_str ] = link.get_targetname()
1514 | 			delta_node_dict[ new_node_str ] = target_delta
1515 | 			
1516 | 			if not reachgraph.has_node( new_node_str ):
1517 | 				new_node = reachgraph.Add_Node( new_node_str )
1518 | 				new_node.set_attribute( "target_delta", "%d" % target_delta )
1519 | 				new_node.set_attribute( "function_framesize", "%d" % framesize )
1520 | 				newlabel = '"%s(%d)' % ( get_name( 0, newaddr), abs(target_delta))
1521 | 				newlabel = newlabel + '(%d)' % framesize
1522 | 				newlabel = newlabel + '-%s"' % link.get_targetname()
1523 | 				
1524 | 				print newlabel
1525 | 				
1526 | 				new_node.set_attribute( "label", newlabel )
1527 | 				if target_delta - framesize <= distance:
1528 | 					currlabel = new_node.get_attribute( "label" )
1529 | 					if currlabel.find( "fin_" ) == -1:
1530 | 						newlabel = currlabel [0] + "fin_" + currlabel[1:]
1531 | 					else:
1532 | 						newlabel = currlabel
1533 | 					new_node.set_attribute("label", newlabel )
1534 | 				else:
1535 | 					worklist.append( new_node )
1536 | 			new_link = reachgraph.Add_Link( current_node.get_name(), new_node_str )
1537 | 			if target_delta <= distance:
1538 | 				new_link.set_attribute( "label", "%d" % target_delta )
1539 | 	#reachgraph.write_VCG_File("c:\\reach.vcg")
1540 | 	return reachgraph
1541 | 	
1542 | def create_stack_delta_graph_from_function( ea, recursion_depth ):
1543 | 	"""
1544 | 		Returns a graph and a node data map
1545 | 	"""
1546 | 	stack_delta_graph = vcg_Graph.vcgGraph()
1547 | 	stack_delta_graph.set_attribute("manhattan_edges", "no" )
1548 | 	edge_dict = {}
1549 | 	firstnode = stack_delta_graph.Add_Node( "%x" % ea )
1550 | 	firstnode.set_attribute( "label", '"%s-%x"' % ("START",ea))
1551 | 	
1552 | 	calls = get_calls_in_function_ext( ea )
1553 | 	nextcalls = []
1554 | 	# Add all the subfunctions
1555 | 	while len( calls ) != 0 and recursion_depth != 0:
1556 | 		for call in calls:
1557 | 			#	create list of possible targets
1558 | 			target_list = get_far_crefs_from( call[1] )
1559 | 			target_ea = get_name_ea( call[1], call[ 3 ] )
1560 | 			if target_ea != BADADDR:
1561 | 				target_list.append( target_ea )
1562 | 			for target_ea in target_list:
1563 | #				if target_ea != BADADDR:
1564 | 				if not stack_delta_graph.has_node( "%x" % target_ea ):
1565 | 					new_node = stack_delta_graph.Add_Node( "%x" % target_ea )
1566 | 					new_node.set_attribute( "label", '"%s-%x"' % (get_name(0, target_ea) ,target_ea))
1567 | 					nextcalls.extend( get_calls_in_function_ext( target_ea ))
1568 | 				source = "%x" % call[0]
1569 | 				targetstr = "%x" % target_ea
1570 | 				delta = "%d" % get_spd( get_func( call[1] ), call[1])
1571 | 				edge_dict_sig = (source, targetstr, delta)
1572 | 				if not edge_dict.has_key( edge_dict_sig ):
1573 | 					if edge_dict_sig[1] != firstnode.get_name():
1574 | 						link = stack_delta_graph.Add_Link( edge_dict_sig[0], edge_dict_sig[1] )
1575 | 						link.set_attribute( "label", '"'+edge_dict_sig[2]+'"' )
1576 | 						edge_dict[ edge_dict_sig ] = 1
1577 | 		calls = nextcalls
1578 | 		nextcalls = []
1579 | 		recursion_depth = recursion_depth - 1
1580 | 	stack_delta_graph.write_VCG_File("c:\\test.vcg")
1581 | 	return stack_delta_graph
1582 | 
1583 | 	
1584 | def retrieve_all_fpo_funcs( ):
1585 | 	ret = []
1586 | 	i = 0
1587 | 	while i < get_func_qty():
1588 | 		func = getn_func( i )
1589 | 		end = func.endEA - 1
1590 | 		if func.flags == 0:
1591 | 			lastinsns = []
1592 | 			for k in range(0, 3):
1593 | 				lastinsns.insert( 0, get_disasm_line( end ))
1594 | 				end = get_first_cref_to( end )
1595 | 				if end == BADADDR:
1596 | 					break
1597 | 			if len( lastinsns ) > 1:
1598 | 				if lastinsns[1][1] == "add":
1599 | 					print "%x: %s %s %s" % ( lastinsns[1][0], lastinsns[1][1], lastinsns[1][2], lastinsns[1][3])
1600 | 			if lastinsns[0][1] == "pop" and lastinsns[1][1] == "pop":
1601 | 				print "%x: %s %s %s" % ( lastinsns[0][0], lastinsns[0][1], lastinsns[0][2], lastinsns[0][3])
1602 | 				print "%x: %s %s %s" % ( lastinsns[1][0], lastinsns[1][1], lastinsns[1][2], lastinsns[1][3])
1603 | 				print "%x: %s %s %s" % ( lastinsns[2][0], lastinsns[2][1], lastinsns[2][2], lastinsns[2][3])
1604 | 		i = i + 1
1605 | 
1606 | #deltagraph = create_stack_delta_graph_from_function( 0x432870 , 25)
1607 | #reachgraph = create_reachgraph_from_delta_graph( deltagraph, -200 )
1608 | #sicken = deltagraph.make_GML_output()
1609 | #f = file("c:\\sicken.gml", "wt")
1610 | #f.write(sicken)
1611 | #f.close()
1612 | #sicken = reachgraph.make_GML_output()
1613 | #f = file("c:\\sickreach.gml", "wt")
1614 | #f.write(sicken)
1615 | #f.close()
1616 | 
1617 | def build_reachgraph_from_path( path_delta_list, bottom_func_delta ):
1618 | 	"""
1619 | 		Build graphs for a number of different reachgraphs (one for each in the chain)
1620 | 	"""
1621 | 	lastidx = len( path_delta_list ) - 1
1622 | 	while lastidx != 0:
1623 | 		# create the first delta graph
1624 | 		deltagraph = create_stack_delta_graph_from_function( path_delta_list[ lastidx ][0], 25 )
1625 | 		sicken = deltagraph.make_GML_output()
1626 | 		f = file("c:\\deltagraph_%lx.gml" % path_delta_list[ lastidx ][0], "wt")
1627 | 		f.write(sicken)
1628 | 		f.close()
1629 | 		#reachgraph = create_reachgraph_from_delta_graph( deltagraph, -200 )
1630 | 		# calculate total delta of the chain
1631 | 		delta = 0
1632 | 		count = lastidx
1633 | 		while count != 0:
1634 | 			delta = delta - path_delta_list[ count ][1]
1635 | 			delta = delta - 4
1636 | 			count = count - 1
1637 | 		delta = delta - path_delta_list [ 0 ][1]
1638 | 		print "Calling create_reachgraph from %lx with delta %d" % (path_delta_list[ lastidx ][0], delta )
1639 | 		reachgraph = create_reachgraph_from_delta_graph( deltagraph, delta )	
1640 | 		sicken = reachgraph.make_GML_output()
1641 | 		f = file("c:\\reach_%lx_%d.gml" % (path_delta_list[lastidx][0], delta), "wt")
1642 | 		f.write(sicken)
1643 | 		f.close()
1644 | 		lastidx = lastidx - 1
1645 | 
1646 | def get_real_frame_size( address ):
1647 | 	"""
1648 | 		Retrieves the real size of a frame
1649 | 	"""
1650 | 	frame = get_frame( get_func( address ))
1651 | 	retaddr = get_member_by_name(frame, " r" )
1652 | 	if retaddr != None:
1653 | 		return (get_max_offset(frame) - (get_max_offset( frame ) - retaddr.get_soff()))
1654 | 	else:
1655 | 		print "Could not find retaddr at %lx\n" % address
1656 | 		return 0
1657 | 
1658 | def strip_below_calls( firstaddress, flowgraph, targetaddress ):
1659 | 	subcalls = get_calls_in_function( firstaddress )
1660 | 	call_list = []
1661 | 	for call in subcalls:
1662 | 		targets = get_far_crefs_from( call[0] )
1663 | 		if len( targets ) != 0:
1664 | 			if targetaddress in targets:
1665 | 				call_list.append( call[0] )
1666 | 	remove_nodes_below( flowgraph, call_list )
1667 | 		
1668 | 
1669 | def build_ibb_graph_from( ea_source, sourcenode, reachgraph ):
1670 | 	"""
1671 | 		Walks a reachgraph upwards, inlining every function on the path. 
1672 | 	
1673 | 		Allright, describe the algorithm first before writing shit down
1674 | 	
1675 | 		1. Retrieve first flowgraph and node
1676 | 		2. Remove all that is not before node
1677 | 		3. Scan upwards. Notice stack access in each basic block
1678 | 			3a.	If you run into a call, and the target of the call is in the
1679 | 				reachgraph, inline it
1680 | 			3b. 	If you run into the beginning of the function, add the return
1681 | 				nodes of all parents in the reachgraph to the graph
1682 | 	"""
1683 | 	flowgraph = create_flowgraph_from( 0x4423D0 )
1684 | 	add_disasm_lines_to_flowgraph( flowgraph )
1685 | 	flowgraph.write_VCG_File("C:\\test.vcg")
1686 | 
1687 | def build_flowgraph_from_path( path, target_addr ):
1688 | 	i = len( path ) - 1
1689 | 	graphlist = []
1690 | 	while i != 0:
1691 | 		print "Trying to generate flowgraph from %d" % i
1692 | 		flowgraph = create_flowgraph_from( path[ i ][0] )
1693 | 		flowgraph.write_VCG_File("c:\\%d.vcg" % i )
1694 | 		print "Trying to strip %lx calls from %lx graph" % (path[i-1][0], path[i][0])
1695 | 		strip_below_calls( path[i][0], flowgraph, path[i-1][0] )
1696 | 		flowgraph.write_VCG_File("c:\\%d_strip.vcg" % i )
1697 | 		graphlist.append( flowgraph )
1698 | 		i = i - 1
1699 | 	flowgraph = create_flowgraph_from( path[0][0] )
1700 | 	remove_nodes_below( flowgraph, [target_addr] )
1701 | 	graphlist.append( flowgraph )
1702 | 	return merge_flowgraphs( graphlist )
1703 | 
1704 | def get_function_end_addresses( address ):
1705 | 	flow = create_flowgraph_from( address )
1706 | 	endnodes = flow.Get_Bottom_Nodes()
1707 | 	end_list = []
1708 | 	for x in endnodes:
1709 | 		block = get_basic_block( string.atol( x, 16 ))
1710 | 		end_list.append( block[-1][0] )
1711 | 	return end_list
1712 | 	
1713 | 		
1714 | def get_basic_block_stack_delta( address ):
1715 | 	"""
1716 | 		Returns the difference between ESP upon entry and end of the basic block
1717 | 	"""
1718 | 	delta = 0
1719 | 	blk = get_basic_block( address )
1720 | 	if blk[-1][1] == "call":
1721 | 		delta = 4
1722 | 	if blk[-1][1] == "retn":
1723 | 		delta = -4
1724 | 	func = get_func( blk[0][0] )
1725 | 	if blk[0][0] == blk[-1][0]:
1726 | 		return get_sp_delta( func, blk[0][0] )
1727 | 	else:
1728 | 		spdelta = get_spd( func, blk[0][0] ) - get_spd( func, blk[-1][0] )
1729 | 		spdelta = spdelta + delta
1730 | 		return spdelta
1731 | 
1732 | 
1733 | def create_reachgraph_from_pathgraph( pathgraph, address ):
1734 | 	"""
1735 | 		Returns a graph of "reachable's with a given stack delta"
1736 | 	"""
1737 | 	reachgraph = vcg_Graph.vcgGraph()
1738 | 	reachgraph.set_attribute("manhattan_edges", "no" )
1739 | 	print "Creating Path Reachgraph"
1740 | 	original_node_dict = {}
1741 | 	delta_node_dict = {}
1742 | 	
1743 | 	rootnode = pathgraph.Get_Node("%lx" % address)
1744 | 	new_node_str = rootnode.get_name() + "::0"
1745 | 	print new_node_str
1746 | 	print "parents" 
1747 | 	print pathgraph.Get_Parents( rootnode.get_name() )
1748 | 	original_node_dict[ new_node_str ] = rootnode
1749 | 	delta_node_dict[ new_node_str ] = 0
1750 | 	
1751 | 	worklist = [ reachgraph.Add_Node( new_node_str ) ]
1752 | 	worklist[0].set_attribute( "label", '"%s"' % new_node_str )
1753 | 	counter = 0
1754 | 	while len( worklist ) != 0:
1755 | 		counter = counter + 1
1756 | 		if counter > 1100:
1757 | 			reachgraph.write_VCG_File("c:\\pathreach_%d.vcg" % counter )
1758 | 			return reachgraph
1759 | 		current_node = worklist.pop(0)
1760 | 		#	Retrieve the delta and original node of this node 
1761 | 		orig_node = original_node_dict[ current_node.get_name() ]
1762 | 		curr_delta = delta_node_dict[ current_node.get_name() ]
1763 | 		#	Get the incoming edges of the original node
1764 | 		parents = pathgraph.Get_Parents( orig_node.get_name() )
1765 | 		print parents
1766 | 		for parent in parents:
1767 | 			parent_delta = get_basic_block_stack_delta( string.atol ( parent, 16 ))
1768 | 			target_delta = curr_delta + parent_delta
1769 | 			#	Construct the name for the new node to be added
1770 | 			new_node_str = parent + "::" + "%d" % target_delta 
1771 | 			original_node_dict[ new_node_str ] = pathgraph.Get_Node( parent )
1772 | 			delta_node_dict[ new_node_str ] = target_delta
1773 | 			if not reachgraph.has_node( new_node_str ):
1774 | 				new_node = reachgraph.Add_Node( new_node_str )
1775 | 				newlabel = '"%s"' % new_node_str
1776 | 				new_node.set_attribute( "label", newlabel )
1777 | 				if target_delta < 0:	# delta dipped below zero !
1778 | 					new_node.set_attribute( "color", "lightblue" )
1779 | 					new_node.set_attribute( "bordercolor", "red" )
1780 | 					new_node.set_attribute( "borderwidth", "10" )
1781 | 				else:
1782 | 					worklist.append( new_node )
1783 | 			new_link = reachgraph.Add_Link( new_node_str, current_node.get_name() )
1784 | 	reachgraph.write_VCG_File("c:\\pathreach.vcg")
1785 | 	return reachgraph
1786 | 
1787 | def build_flowgraph_from_to( ea_source, ea_target ):
1788 | 	#
1789 | 	# Start out by getting the functions the basic blocks are in respectively
1790 | 	#
1791 | 	source_func = get_func_ea_from_ea( ea_source )
1792 | 	target_func = get_func_ea_from_ea( ea_target )
1793 | 	#
1794 | 	# Because it's easier, construct a stack delta graph
1795 | 	#
1796 | 
1797 | def get_return_value_summary( target_function ):
1798 | 	""" Returns a (possibly sound) set of return values -- these can be:
1799 | 		1) Concrete values
1800 | 		2) Function names (if the return value of that function is returned)
1801 | 		3) The term "THIS" if it returns ECX
1802 | 		3) The term "UNKN" for anything else
1803 | 		
1804 | 	"""
1805 | 	retvalset = sets.Set()
1806 | 	flowgraph = create_flowgraph_from( target_function )
1807 | 	endnodes = flowgraph.Get_Bottom_Nodes()
1808 | 	i = 0
1809 | 	for node in endnodes:
1810 | 		blk = get_basic_block( string.atol( node, 16))
1811 | 		[grph, bib] = slice_graph_bwd( blk[-1][0], "eax" )
1812 | 		write_slice_graph( [grph,bib], "c:\\garbage\\%s-%d.vcg" % (node, i))
1813 | 		i = i+1
1814 | 		topnodes = grph.Get_Top_Nodes()
1815 | 		for topnode in topnodes:
1816 | 			if len( bib[topnode].lines ) == 0:
1817 | 				if bib[topnode].reg == "ecx":
1818 | 					retvalset.add( "THIS" )
1819 | 				else:
1820 | 					print "%s: Look at this, reg passed in ? !" % topnode
1821 | 				continue
1822 | 			line = bib[topnode].lines[0]
1823 | 			addr = line[0]
1824 | 			if line[ 1 ] == "call":
1825 | 				target = get_name(0, get_first_fcref_from( line[0] ))
1826 | 				if target != None:
1827 | 					#if target == "@__security_check_cookie@4":
1828 | 					# HANDLE sec check !	
1829 | 					retvalset.add( target )
1830 | 				else:
1831 | 					retvalset.add( "UNKN")
1832 | 			elif line[ 1 ] == "mov":
1833 | 				if might_be_immediate( line[3] ):
1834 | 					retvalset.add( line[3] )
1835 | 				else:
1836 | 					retvalset.add("UNKN")
1837 | 			elif line[ 1 ] == "xor" and line[ 2] == line[3]:
1838 | 				retvalset.add("0")
1839 | 			elif line[ 1 ] == "and" and line[ 3] == "0":
1840 | 				retvalset.add("0")
1841 | 			elif line[ 1 ] == "or" and line[ 3 ] == "0FFFFFFFFh":
1842 | 				retvalset.add("-1")
1843 | 			elif line[ 1 ] == "or" and line[ 3 ] == "-1":
1844 | 				retvalset.add("-1")
1845 | 			else:
1846 | 				print "Can't yet handle:"
1847 | 				print line
1848 | 	#print retvalset
1849 | 	return retvalset
1850 | 
1851 | def get_method_calls_in_method( funcea ):
1852 | 	g = slice_graph_fwd( funcea, "ecx")
1853 | 	ddict = g[1]
1854 | 	methods = set()
1855 | 	for slice_node in ddict.values():
1856 | 		if slice_node.reg == "ecx":
1857 | 			instruc = get_disasm_line( slice_node.endea )
1858 | 			if instruc[1] == "call":
1859 | 				refs = get_far_crefs_from( slice_node.endea )
1860 | 				for ref in refs:
1861 | 					methods.add( ref )
1862 | 	return methods
1863 | 		
1864 | 
1865 | def get_retval_summaries():
1866 | 	dict = {}
1867 | 	ea = 0
1868 | 	while ea != BADADDR :
1869 | 		func = get_next_func( ea )
1870 | 		if func:
1871 | 			newea = func.startEA
1872 | 		else:
1873 | 			return dict		
1874 | 		if newea == ea:
1875 | 			return
1876 | 		dict[newea] = get_return_value_summary( newea )
1877 | 		ea = newea
1878 | 	return dict
1879 | 
1880 | def get_retval_summaries_transitive():
1881 | 	dict = get_retval_summaries()
1882 | 	transitive_dict = {}
1883 | 	for (ea, summary) in dict.items():
1884 | 		"""for item in summary:
1885 | 			if dict.hasKey( get_name_ea( item )):
1886 | 				if not transitive_dict.hasKey( ea ):
1887 | 					transitive_dict[ea] = sets.Set()
1888 | 				transitive_dict[ ea ] = transitive_dict[ea].union( dict"""
1889 | 				
1890 | 		print "%lx:" % ea
1891 | 		print summary
1892 | 
1893 | def get_pushes_before_call( callea, n ):
1894 | 	"""
1895 | 	"""
1896 | 	
1897 | def get_push_before_call( callea, n ):
1898 | 	x = get_spd( get_func( callea ), callea ) + (n*4)
1899 | 	block = get_basic_block( callea )
1900 | 	block.reverse()
1901 | 	for insn2 in block:
1902 | 		if get_spd( get_func( callea ), insn2[0]) == x:
1903 | 			return insn2
1904 | 	return None
1905 | 	
1906 | 
1907 | class vtable:
1908 | 	def __init__( self, begin, end, name ):
1909 | 		self.begin = begin
1910 | 		self.end = end
1911 | 		self.name = name
1912 | 		self.methods = []
1913 | 		for addr in range( begin, end, 4 ):
1914 | 			ref = get_drefs_from( addr )[ 0 ]
1915 | 			self.methods.append( ref )
1916 | 		self.constructors = []
1917 | 		for ref in get_drefs_to( begin ):
1918 | 			self.constructors.append( 
1919 | 				get_func( ref ).startEA )
1920 | 		return
1921 | 	def __str__( self ):
1922 | 		return "vtable"
1923 | 	def __repr__( self ):
1924 | 		return "vtable"
1925 | 
1926 | def vtables_to_relations_graph( vtablelist ):
1927 | 	graph = vcg_Graph.vcgGraph()
1928 | 	for vtable in vtablelist:
1929 | 		vtable_node = graph.Add_Node( vtable.name )
1930 | 		for method in vtable.methods:
1931 | 			method_node = graph.Add_Node( get_name( 0,method ))
1932 | 			graph.Add_Link( vtable.name, get_name( 0,method) )
1933 | 		for constructor in vtable.constructors:
1934 | 			ctr_node = graph.Add_Node( get_name( 0,constructor) )
1935 | 			graph.Add_Link( get_name( 0,constructor ), vtable.name )
1936 | 	graph.write_VCG_File( "c:\\output.vcg")
1937 | 	return graph
1938 | 		
1939 | def cpp_code_primer( begin, end ):
1940 | 	vtables = []
1941 | 	p = find_vtables_aggressive( begin, end)
1942 | 	count = 0
1943 | 	for i in p:
1944 | 		set_name( i[0], "class_%d_vtable" % count, 0)
1945 | 		vtables.append( vtable( i[0], i[1], "class_%d_vtable" % count ))
1946 | 		count = count + 1
1947 | 	graph = vtables_to_relations_graph( vtables )
1948 | 
1949 | 	# Retrieve the constructors, e.g. roots of this graph
1950 | 	
1951 | 	# Retrieve the leafs
1952 | 	
1953 | 	for l_vtable in vtables:
1954 | 		count = 0
1955 | 		for method in l_vtable.methods:
1956 | 			name = get_name( 0, method )
1957 | 			parents = graph.Get_Parents( name )
1958 | 			print "Node: " + name
1959 | 			print "Parents: " + parents.__repr__()
1960 | 			if len( parents ) == 1:
1961 | 				set_name( method, "%s_method_%d" % (parents[0], count), 0 )
1962 | 				print "Setting name of %lx to %s_method_%d" % (method, parents[0], count)
1963 | 			count = count + 1
1964 | 	
1965 | 	return vtables
1966 | 		
1967 | 
1968 | #get_retval_summaries_transitive()
1969 | #get_return_value_summary( get_screen_ea())
1970 | 
1971 | 
1972 | 


--------------------------------------------------------------------------------