├── release_notes.txt ├── README.md └── msdnGrab.py /release_notes.txt: -------------------------------------------------------------------------------- 1 | msdnGrab Release Notes 2 | ---------------------- 3 | 4 | msdnGrab 1.2.1 5 | . Improved grabbing of information from MSDN page when there are multiple 6 | possible places to grab from 7 | . Handles more cases of descriptions to ignore (not the right description) 8 | . Handles case where the MSDN page does not have a code listing 9 | . Added dbgPrint() functionality for debugging purposes 10 | (Set _MSDN_DEBUG = True to turn it on) 11 | . Bugfixes 12 | 13 | msdnGrab 1.2 14 | . Added open MSDN page in browser (Ctrl-Shift-F3) 15 | . Bugfixes 16 | 17 | msdnGrab 1.1 18 | . Implemented querying of C/C++ functions, from MSDN as well (Ctrl-F3) 19 | . Fixed parsing of newlines between UNIX and Windows formats 20 | (was affecting comments) 21 | 22 | msdnGrab 1.0 23 | . Initial implementation 24 | 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | msdnGrab 2 | ======== 3 | 4 | Allows a user to grab documentation from online MSDN for a given function name in IDA, and import the documentation as a repeatable comment for that function. 5 | 6 | Handles queries for the Win32 API and C/C++. 7 | 8 | ___ 9 | 10 | ### Usage 11 | #### Grab MSDN docs as comment: 12 | 1. Highlight a given term that you want to query documentation for (e.g. you may highlight "fopen", "HeapAlloc"). 13 | 2. Decide whether that is a Win32 API function, or a C/C++ function (i.e. CRT). 14 | 3. If it's a Win32 API function, hit F3, and if it's a C/C+ function, hit Ctrl-F3. 15 | 4. The results should populate as a repeating comment. 16 | 17 | ####Open MSDN page in browser: 18 | 1. Highlight a given term that you want to query documentation for (e.g. you may highlight "fopen", "HeapAlloc"). 19 | 2. Hit Ctrl-Shift-F3. 20 | 3. Select the language (Win32 API, C/C++). 21 | 22 | ### Notes about comments 23 | 1. If it's an external library call (i.e. function is an extern in the data segment), a (code) repeating comment is used. 24 | 2. If it's a direct call (i.e. function is in the code segment), a (function) repeating comment is used. 25 | 26 | Note: I am using the words external and direct calls very loosely. 27 | 28 | -------------------------------------------------------------------------------- /msdnGrab.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # msdnGrab 3 | # 4 | # Allows a user to grab documentation from online MSDN for a given function 5 | # name in IDA, and import the documentation as a repeatable comment for that 6 | # function. 7 | # 8 | # The script assumes that the function name is valid, and queries Google for 9 | # the MSDN page. It then pulls the short description of the function, and 10 | # the function definition into the comment. It also allows you to open the 11 | # MSDN page to view the rest of the information. 12 | # 13 | # Usage: 14 | # 15 | # Hotkey to grab as comment (Win32 API): F3 16 | # Hotkey to grab as comment (C/C++): Ctrl-F3 17 | # Hotkey to open query in browser: Ctrl-Shift-F3 18 | # 19 | # Just highlight a function (e.g. CreateFileA) in IDA, and hit the 20 | # corresponding hotkey what you want. 21 | # 22 | # Copyright (c) 2012 - * | Eugene Ching 23 | # 24 | # All rights reserved. 25 | # 26 | ############################################################################### 27 | 28 | import idautils 29 | import idc 30 | import idaapi 31 | 32 | import bs4 33 | import HTMLParser 34 | import urllib 35 | import urllib2 36 | import webbrowser 37 | 38 | 39 | ############################################################################### 40 | # Globals 41 | ############################################################################### 42 | 43 | _MSDN_HOTKEY_WIN32API = 'F3' 44 | _MSDN_HOTKEY_C = 'Ctrl-F3' 45 | _MSDN_HOTKEY_BROWSER = 'Ctrl-Shift-F3' 46 | 47 | COMMENT_NOT_REPEATABLE = 0 48 | COMMENT_REPEATABLE = 1 49 | MAX_COMMENT_WIDTH = 50 50 | 51 | _SEARCHTYPE_WIN32API = 0 52 | _SEARCHTYPE_C = 1 53 | 54 | _MSDN_DEBUG = False 55 | 56 | 57 | ############################################################################### 58 | # Helper functions 59 | ############################################################################### 60 | 61 | def dbgPrint(sMessage): 62 | if (_MSDN_DEBUG == True): 63 | print sMessage 64 | 65 | class TagStripper(HTMLParser.HTMLParser): 66 | def __init__(self): 67 | self.reset() 68 | self.fed = [] 69 | def handle_data(self, d): 70 | self.fed.append(d) 71 | def get_data(self): 72 | return ''.join(self.fed) 73 | 74 | def stripTags(text): 75 | s = TagStripper() 76 | s.feed(str(text)) 77 | return s.get_data() 78 | 79 | def stripBlankLines(text): 80 | return os.linesep.join([s for s in text.splitlines() if s]) 81 | 82 | def multiLineString(text): 83 | multiLine = '' 84 | lengthFromPreviousNewLine = 0 85 | words = text.split() 86 | for word in words: 87 | multiLine = multiLine + word + ' ' 88 | if (len(multiLine)-lengthFromPreviousNewLine > MAX_COMMENT_WIDTH): 89 | multiLine = multiLine + '\n' 90 | lengthFromPreviousNewLine = len(multiLine) 91 | return multiLine.rstrip('\n') 92 | 93 | 94 | 95 | ############################################################################### 96 | # Search Google for MSDN page 97 | ############################################################################### 98 | 99 | def grabMsdnPageFromGoogle(searchTerm, searchType): 100 | # Get the Google URL 101 | if (searchType == _SEARCHTYPE_WIN32API): 102 | ''' 103 | Queries for WIN32 API. 104 | 105 | Such queries are fairly easy, Google returns the right hit 106 | as the first entry almost all the time, without much ado. 107 | We simply query it. 108 | 109 | ''' 110 | googleUrl='https://www.google.com/search?hl=en&q=%s+function+msdn+desktop+apps&sa=N&safe=off&filter=0' % searchTerm 111 | print '(msdnGrab) [Querying against Win32API] %s' % googleUrl 112 | 113 | elif (searchType == _SEARCHTYPE_C): 114 | ''' 115 | Queries for C/C++. 116 | 117 | These queries are harder to get right, and if possible we 118 | want the right hit at the top of Google's results. We use 119 | Google's intitle and inurl to ensure that we get the right 120 | page, and an English one, in that order. 121 | 122 | ''' 123 | googleUrl='https://www.google.com/search?hl=en&q=intitle:%s+msdn+crt+inurl:msdn*en-us&sa=N&safe=off&filter=0' % searchTerm 124 | print '(msdnGrab) [Querying against C/C++] %s' % googleUrl 125 | 126 | else: 127 | googleUrl = None 128 | 129 | # Check failure 130 | if (googleUrl is None): 131 | print '(msdnGrab) Error: Could not build a suitable Google search query.' 132 | return None 133 | 134 | # Read the page 135 | opener = urllib2.build_opener() 136 | opener.addheaders = [('User-agent', 'Mozilla/5.0')] 137 | page = opener.open(googleUrl).read() 138 | soup = bs4.BeautifulSoup(page) 139 | 140 | # Extract the first MSDN link 141 | links = soup.findAll('a') 142 | msdnLinks = [re.search('http://msdn.microsoft.com/(.*?)&', str(link)) for link in soup.find_all('a') if ('msdn.microsoft.com/en-us' in str(link))] 143 | try: 144 | msdnUrl = 'http://msdn.microsoft.com/en-us/' + msdnLinks[0].group(1) 145 | except: 146 | msdnUrl = None 147 | 148 | # Return the first link 149 | return msdnUrl 150 | 151 | 152 | ############################################################################### 153 | # Launch browser with search term 154 | ############################################################################### 155 | 156 | class QuietChooser(Choose): 157 | def enter(self, n): 158 | pass 159 | 160 | def openMsdnPageInBrowser(): 161 | # Get the highlighted identifier 162 | searchTerm = idaapi.get_highlighted_identifier() 163 | 164 | # Get the address 165 | ea = ScreenEA() 166 | 167 | # Make sure we have something highlighted 168 | if not searchTerm: 169 | print "(msdnGrab) Error: No identifier to use as search term was highlighted." 170 | return None 171 | 172 | # Select "language" 173 | languages = ['Win32 API', 'C/C++'] 174 | chooser = QuietChooser([], "(Open in browser) Language to query", 1) # Get a modal Choose instance 175 | chooser.list = languages # List to choose from 176 | chooser.width = 40 # Set the width 177 | ch = chooser.choose() # Run the chooser 178 | 179 | # Decode the selection 180 | if (chooser.list[ch-1] == 'Win32 API'): 181 | searchType = _SEARCHTYPE_WIN32API 182 | elif (chooser.list[ch-1] == 'C/C++'): 183 | searchType = _SEARCHTYPE_C 184 | else: 185 | print '(msdnGrab) Error: Invalid language type selection made.' 186 | return None 187 | 188 | # Handle IDA's naming conventions for the identifier 189 | searchTerm = searchTerm.replace('__imp_', '') 190 | print '(msdnGrab) Using search term: %s' % searchTerm 191 | 192 | # Get the MSDN page URL 193 | msdnUrl = grabMsdnPageFromGoogle(searchTerm, searchType) 194 | if (msdnUrl is None): 195 | print '(msdnGrab) Error: Could not find a suitable MSDN page.' 196 | return None 197 | 198 | # Launch the browser 199 | webbrowser.open(msdnUrl) 200 | 201 | 202 | ############################################################################### 203 | # Search MSDN page for definition 204 | ############################################################################### 205 | 206 | def grabDefinitionFromMsdn(searchType): 207 | # Get the highlighted identifier 208 | searchTerm = idaapi.get_highlighted_identifier() 209 | 210 | # Get the address 211 | ea = ScreenEA() 212 | 213 | # Make sure we have something highlighted 214 | if not searchTerm: 215 | print "(msdnGrab) Error: No identifier to use as search term was highlighted." 216 | return None 217 | 218 | # Handle IDA's naming conventions for the identifier 219 | searchTerm = searchTerm.replace('__imp_', '') 220 | print '(msdnGrab) Using search term: %s' % searchTerm 221 | 222 | # Get the MSDN page URL 223 | msdnUrl = grabMsdnPageFromGoogle(searchTerm, searchType) 224 | 225 | while (msdnUrl is None): 226 | # Try again, in case underscores are causing trouble 227 | if (searchTerm.startswith('_')): 228 | searchTerm = searchTerm[1:] 229 | print '(msdnGrab) Using search term: %s' % searchTerm 230 | msdnUrl = grabMsdnPageFromGoogle(searchTerm, searchType) 231 | else: 232 | print '(msdnGrab) Error: Could not find a suitable MSDN page.' 233 | return None 234 | 235 | # Read the page 236 | opener = urllib2.build_opener() 237 | opener.addheaders = [('User-agent', 'Mozilla/5.0')] 238 | page = opener.open(msdnUrl).read() 239 | page = page.replace('\xc2\xa0', ' ') 240 | soup = bs4.BeautifulSoup(page) 241 | 242 | # Find the first (code) definition 243 | dbgPrint('Searching for code...') 244 | code = 'No code found.' 245 | for code in soup.findAll('pre'): 246 | code = stripBlankLines(stripTags(code)) 247 | dbgPrint('Code found: \n%s' % code) 248 | if (code != ''): 249 | break 250 | code = code.replace('\r', '') 251 | 252 | # Find the description 253 | dbgPrint('Searching for description...') 254 | desc = 'No description found.' 255 | for desc in soup.findAll('p'): 256 | desc = stripBlankLines(stripTags(desc)).strip() 257 | dbgPrint('Description found: \n%s' % desc) 258 | if (desc != '' and 259 | 'updated' not in desc.lower() and 260 | 'applies to' not in desc.lower() and 261 | 'rated this helpful' not in desc.lower() and 262 | not desc.startswith('[') and not desc.endswith(']') 263 | ): 264 | break 265 | 266 | # Pretty format the description 267 | desc = stripBlankLines(stripTags(desc)) 268 | 269 | # Find the actual library call 270 | codeReferences = list(XrefsFrom(ea, 1)) 271 | if (codeReferences == []): 272 | nextEa = ea 273 | else: 274 | nextEa = codeReferences[0].to 275 | 276 | # Put it as a repeatable comment (don't clobber existing comment) 277 | print '(msdnGrab) Setting repeatable comment at 0x%s:' % str(hex(nextEa)) 278 | print desc 279 | print code 280 | print '' 281 | 282 | if ('data' in idc.SegName(nextEa)): 283 | ''' 284 | Assume we're in an external library. 285 | 286 | The behavior of GetFunctionCmt() and RptCmt() is different. 287 | The check for None and '' is for robustness, although it looks 288 | quirky. Handles both cases. Nothing will fail otherwise, 289 | just that the output will have a double line space when 290 | it's not needed. 291 | ''' 292 | existingComment = idc.RptCmt(nextEa) 293 | if (existingComment is None or existingComment == ''): 294 | existingComment = '' 295 | else: 296 | existingComment = existingComment + '\n\n' 297 | 298 | idc.MakeRptCmt(nextEa, existingComment + multiLineString(desc) + '\n\n' + code) 299 | 300 | else: 301 | ''' 302 | Assume we're in code. 303 | 304 | The behavior of GetFunctionCmt() and RptCmt() is different. 305 | The check for None and '' is for robustness, although it looks 306 | quirky. Handles both cases. Nothing will fail otherwise, 307 | just that the output will have a double line space when 308 | it's not needed. 309 | 310 | ''' 311 | existingComment = idc.GetFunctionCmt(nextEa, COMMENT_REPEATABLE) 312 | if (existingComment is None or existingComment == ''): 313 | existingComment = '' 314 | else: 315 | existingComment = existingComment + '\n\n' 316 | 317 | idc.SetFunctionCmt(nextEa, existingComment + multiLineString(desc) + '\n\n' + code, COMMENT_REPEATABLE) 318 | 319 | # Refresh the screen 320 | idc.Refresh() 321 | 322 | 323 | ############################################################################### 324 | # Register hotkey 325 | ############################################################################### 326 | 327 | if __name__ == "__main__": 328 | # Register the hotkeys 329 | print '(msdnGrab) Press "%s" grab definition from MSDN (for win32 API).' % _MSDN_HOTKEY_WIN32API 330 | print '(msdnGrab) Press "%s" grab definition from MSDN (for C/C++).' % _MSDN_HOTKEY_C 331 | print '(msdnGrab) Press "%s" to open MSDN page in browser.' % _MSDN_HOTKEY_BROWSER 332 | 333 | # Add the hotkeys 334 | idaapi.CompileLine('static __grabDefinitionFromMsdn_win32api() { RunPythonStatement("grabDefinitionFromMsdn(_SEARCHTYPE_WIN32API)"); }') 335 | idc.AddHotkey(_MSDN_HOTKEY_WIN32API, '__grabDefinitionFromMsdn_win32api') 336 | 337 | idaapi.CompileLine('static __grabDefinitionFromMsdn_c() { RunPythonStatement("grabDefinitionFromMsdn(_SEARCHTYPE_C)"); }') 338 | idc.AddHotkey(_MSDN_HOTKEY_C, '__grabDefinitionFromMsdn_c') 339 | 340 | idaapi.CompileLine('static __openMsdnPageInBrowser() { RunPythonStatement("openMsdnPageInBrowser()"); }') 341 | idc.AddHotkey(_MSDN_HOTKEY_BROWSER, '__openMsdnPageInBrowser') 342 | 343 | --------------------------------------------------------------------------------