├── README.md ├── analyzer └── darwin │ ├── __init__.py │ ├── analysis.conf │ ├── analyzer.py │ ├── dylib │ ├── cuckoohooks.c │ ├── cuckoohooks.dylib │ ├── cuckoohooks.h │ ├── cuckoohooks.o │ ├── cuckoohooks_32.dylib │ ├── cuckoohooks_64.dylib │ └── makefile │ ├── lib │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ ├── apitrace │ │ ├── process.py │ │ └── screenshot.py │ ├── common │ │ ├── __init__.py │ │ ├── abstracts.py │ │ ├── constants.py │ │ ├── exceptions.py │ │ ├── hashing.py │ │ ├── rand.py │ │ └── results.py │ └── core │ │ ├── __init__.py │ │ ├── config.py │ │ ├── packages.py │ │ └── startup.py │ └── modules │ ├── __init__.py │ ├── auxiliary │ ├── __init__.py │ ├── human.py │ └── screenshots.py │ └── packages │ ├── __init__.py │ ├── doc.py │ ├── generic.py │ ├── html.py │ ├── jar.py │ ├── macho.py │ ├── pdf.py │ ├── python.py │ ├── rtf.py │ ├── safari.py │ └── zip.py └── modules └── processing ├── behavior_osx.py ├── filter_syscall.py ├── macho_data.py └── static_macho.py /README.md: -------------------------------------------------------------------------------- 1 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. 2 | 3 | NOTICE: 4 | 5 | For five (5) years from the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE. 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws. 8 | 9 | This analyzer extends the open-source [Cuckoo Sandbox](https://github.com/cuckoobox/cuckoo) with functionality for analyzing OS X malware in an OS X guest VM. 10 | 11 | 12 | -------------------------------------------------------------------------------- /analyzer/darwin/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /analyzer/darwin/analysis.conf: -------------------------------------------------------------------------------- 1 | [analysis] 2 | category = file 3 | target = /Example/path/to/Sample 4 | clock = 20140619T11:15:56 5 | file_type = Mach-O 64-bit x86_64 executable 6 | ip = 10.0.0.1 7 | package = 8 | options = 9 | enforce_timeout = True 10 | timeout = 120 11 | file_name = Sample 12 | id = 1 13 | port = 2042 14 | -------------------------------------------------------------------------------- /analyzer/darwin/analyzer.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 4 | this work by or on behalf of the U.S. Government. 5 | NOTICE: 6 | For five (5) years from the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE. 7 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. 8 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws. 9 | 10 | This is the darwin analyzer for Cuckoo - OS X is built on Darwin, which is a UNIX and FreeBSD based open-source OS. 11 | It was released by Apple in 2000. That's why the OS X analyzer is called darwin - Cuckoo's choice, not mine. 12 | 13 | The process for the analyzer seems to be as follows: 14 | 15 | 1. Cuckoo checks the status of agent.py (running on the guest) until it comes up 16 | 2. Cuckoo sends a zipped file of analyzer/darwin/ to agent.py 17 | 3. The guest agent unzips the files to /ANALYZER_FOLDER// 18 | (ANALYZER_FOLDER is the home folder of the user agent.py runs as) 19 | 4. Cuckoo sends the key/value pair options for the analysis. 20 | 5. The agent writes those options to analyzer.conf in the folder with the unzipped files 21 | 6. Cuckoo sends the file sample to the agent 22 | 7. The agent writes the file to /tmp 23 | 8. Cuckoo sends the execute commands 24 | 9. The agent executes analyzer.py 25 | 10. Cuckoo polls the agent until the analyzer has finished 26 | 11. The agent sends the results folder to Cuckoo 27 | 12. Cuckoo stores the results in storage/ on the host 28 | 29 | Source: http://public.honeynet.org/pipermail/cuckoo/2013-June/001489.html 30 | """ 31 | 32 | import logging 33 | import sys 34 | import os 35 | import os.path 36 | import traceback 37 | import xmlrpclib 38 | import random 39 | import hashlib 40 | import socket 41 | import time 42 | import fnmatch 43 | import fcntl 44 | import termios 45 | import array 46 | import pkgutil 47 | import subprocess 48 | import psutil 49 | 50 | from threading import Lock, Thread 51 | from ctypes import create_unicode_buffer, create_string_buffer 52 | from ctypes import c_wchar_p, byref, c_int, sizeof 53 | 54 | ''' These are Cuckoo files we are importing here ''' 55 | from lib.common.constants import PATHS, PIPE, SHUTDOWN_MUTEX #OS path constants 56 | from lib.core.startup import create_folders, init_logging #creates results folders and configures logger 57 | from lib.core.config import Config #parses the analysis.conf configuration file 58 | from lib.common.results import upload_to_host 59 | from lib.common.hashing import hash_file 60 | from lib.api.process import Process 61 | from lib.common.abstracts import Package, Auxiliary 62 | from lib.core.packages import choose_package 63 | from lib.common.exceptions import CuckooError, CuckooPackageError 64 | from modules import auxiliary 65 | 66 | #get logger 67 | log = logging.getLogger() 68 | 69 | FILES_LIST = [] #list of files to transfer to the Cuckoo host 70 | DUMPED_LIST = [] #files already transferred to host 71 | PROCESS_LIST = [] #list of watched processes, either started by us or by the sample 72 | 73 | PROTECTED_LIST = ["sleep", "kernel_task"] #processes that are not traced 74 | 75 | def add_pid(pid): 76 | """Add a process to process list.""" 77 | if isinstance(pid, (int, long, str)): 78 | log.info("Added new process to list with pid: %s", pid) 79 | PROCESS_LIST.append(int(pid)) 80 | 81 | def add_pids(pids): 82 | """Add PID.""" 83 | if isinstance(pids, (tuple, list)): 84 | for pid in pids: 85 | add_pid(pid) 86 | else: 87 | add_pid(pids) 88 | 89 | def add_file(file_path): 90 | """Add a file to list of files to be copied to the host.""" 91 | if file_path not in FILES_LIST: 92 | log.info("Added new file to list with path: %s", 93 | unicode(file_path).encode("utf-8", "replace")) 94 | FILES_LIST.append(file_path) 95 | 96 | def dump_file(file_path): 97 | """Create a copy of the given file path and send it to the host.""" 98 | try: 99 | if os.path.exists(file_path): 100 | sha256 = hash_file(hashlib.sha256, file_path) 101 | if sha256 in DUMPED_LIST: 102 | # The file was already dumped 103 | # Cuckoo normally just skips the file, I have chosen not to 104 | #return 105 | log.warning("File at path \"%s\" has a hash that is a duplicate of another dumped file.", 106 | file_path) 107 | else: 108 | log.warning("File at path \"%s\" does not exist, skip.", 109 | file_path) 110 | return 111 | except IOError as e: 112 | log.warning("Unable to access file at path \"%s\": %s", file_path, e) 113 | return 114 | 115 | log.info("File path is %s and file size is %d.", file_path, os.stat(file_path).st_size) 116 | 117 | #choose the correct folder 118 | if "logs" in file_path: 119 | upload_path = os.path.join("logs", os.path.basename(file_path)) 120 | elif "drop" in file_path: 121 | upload_path = os.path.join("files", os.path.basename(file_path)) 122 | else: 123 | upload_path = os.path.join("files", 124 | str(random.randint(100000000, 9999999999)), 125 | os.path.basename(file_path)) 126 | log.info("Upload path is %s.", upload_path) 127 | 128 | #send file to host 129 | try: 130 | upload_to_host(file_path, upload_path) 131 | DUMPED_LIST.append(sha256) 132 | except (IOError, socket.error) as e: 133 | log.error("Unable to upload dropped file at path \"%s\": %s", 134 | file_path, e) 135 | 136 | def dump_files(): 137 | """Dump all the dropped files. 138 | This function transfers all the results files to the host.""" 139 | for file_path in FILES_LIST: 140 | log.info("Dumping %s to host,", file_path) 141 | dump_file(file_path) 142 | 143 | class PipeServer(Thread): 144 | """Cuckoo PIPE server. 145 | 146 | This Pipe Server receives notifications from the injected processes for 147 | new processes being spawned and for files being created or deleted. 148 | """ 149 | 150 | def __init__(self, pipe_name=PIPE): 151 | """@param pipe_name: Cuckoo PIPE server name.""" 152 | log.info("Starting PipeSever") 153 | Thread.__init__(self) 154 | self.pipe_name = pipe_name 155 | self.fd = -1 156 | self.do_run = True 157 | self.handlers = [] 158 | self.created = False 159 | self.iocbuf = array.array('i', [0]) 160 | self.h_pipe = None 161 | 162 | def stop(self): 163 | """Stop PIPE server.""" 164 | start_time = time.time() 165 | end_time = start_time + 30 166 | #wait until we get everything...up to 30 seconds of extra time 167 | while self.get_content() > 0 and time.time() < end_time: 168 | if len(self.handlers) <= 20: 169 | handle = PipeHandler(self.h_pipe) 170 | handle.daemon = True 171 | handle.start() 172 | self.handlers.append(handle) 173 | log.info("Waiting to process all pipe data") 174 | time.sleep(0.5) 175 | self.do_run = False 176 | #delete the FIFO pipe 177 | os.unlink(self.pipe_name) 178 | 179 | def is_done(self): 180 | return not self.do_run 181 | 182 | def run(self): 183 | """Create and run PIPE server. 184 | @return: operation status. 185 | """ 186 | try: 187 | if not self.created: 188 | # create the pipe 189 | os.mkfifo(self.pipe_name) 190 | self.created = True 191 | # If you use the normal open() function, it will block and no input will be received 192 | self.fd = os.open(self.pipe_name, os.O_NONBLOCK) #returns a file descriptor 193 | #wrap the file descriptor in a file object 194 | self.h_pipe = os.fdopen(self.fd, 'r') 195 | log.info("Opened pipe file %s with fd %d", self.pipe_name, self.fd) 196 | except: 197 | log.exception("Unable to start process communication pipe.") 198 | 199 | while self.do_run: 200 | # There is no way to poll for a connection to a named pipe on Unix 201 | # So we check the file size 202 | if self.get_content() > 0 and len(self.handlers) < 20: 203 | #log.info("Starting a pipe handler...") 204 | handle = PipeHandler(self.h_pipe) 205 | handle.daemon = True 206 | handle.start() 207 | self.handlers.append(handle) 208 | else: 209 | # Sleep for a bit 210 | time.sleep(0.05) 211 | 212 | return True 213 | 214 | def get_content(self): 215 | """ 216 | Get the number of bytes in the pipe that can be read. 217 | @return: the number of bytes 218 | """ 219 | try: 220 | fcntl.ioctl(self.fd, termios.FIONREAD, self.iocbuf, True) 221 | except: 222 | log.exception("Unable to read bytes from pipe.") 223 | return self.iocbuf[0] 224 | 225 | 226 | class PipeHandler(Thread): 227 | """Pipe Handler. 228 | 229 | This class handles the notifications received through the Pipe Server and 230 | decides what to do with them. 231 | """ 232 | 233 | F_LOGFILE = "file_activity.log" 234 | W_LOGFILE = "file_writes.log" 235 | C_LOGFILE = "file_creation.log" 236 | D_LOGFILE = "file_deletion.log" 237 | read_lock = Lock() 238 | 239 | def __init__(self, h_pipe): 240 | """@param h_pipe: PIPE to read.""" 241 | Thread.__init__(self) 242 | self.h_pipe = h_pipe 243 | self.part = '' 244 | self.done = False 245 | 246 | def run(self): 247 | """Run handler. 248 | @return: operation status. 249 | """ 250 | data = "" 251 | wait = False 252 | proc = None 253 | 254 | # Read the data submitted to the Pipe Server. 255 | while True: 256 | while True: 257 | try: 258 | PipeHandler.read_lock.acquire() 259 | data = self.h_pipe.readline() 260 | PipeHandler.read_lock.release() 261 | break 262 | except IOError: 263 | log.error("Unable to open process communication pipe, retrying.") 264 | 265 | if data == '': 266 | break 267 | 268 | if data: 269 | #one line = one logging command 270 | c = [data] 271 | for command in c: 272 | 273 | if not command.endswith('\n'): #if we have read a partial line 274 | log.info("Saving a part of a log") 275 | self.part = command #save it for later 276 | continue 277 | 278 | if self.part != '': # append any pieces to the end 279 | log.info("Using a part of a log") 280 | command = self.part + command 281 | self.part = '' 282 | 283 | if command.startswith("FILE_ACTIVITY:"): 284 | self.writeToLogFile(os.path.join(PATHS["logs"], self.F_LOGFILE), command[14:len(command)]) 285 | elif command.startswith("FILE_CREATE:"): 286 | self.writeToLogFile(os.path.join(PATHS["logs"], self.C_LOGFILE), command[12:len(command)]) 287 | elif command.startswith("FILE_DELETE:"): 288 | self.writeToLogFile(os.path.join(PATHS["logs"], self.D_LOGFILE), command[12:len(command)]) 289 | elif command.startswith("FILE_WRITE:"): 290 | self.writeToLogFile(os.path.join(PATHS["logs"], self.W_LOGFILE), command[11:len(command)]) 291 | elif command.startswith("PROCESS:"): 292 | process_id = int(command[8:len(command)]) 293 | if process_id not in PROCESS_LIST: 294 | if psutil.pid_exists(process_id): 295 | h_p = psutil.Process(process_id) 296 | proc = Process(pid=process_id, h_process=h_p, thread_id=None) 297 | filename = proc.get_filepath() 298 | log.info("Announced new process name: %s with pid %d", filename, process_id) 299 | if not filename in PROTECTED_LIST: 300 | proc.start_trace() 301 | add_pids(process_id) 302 | elif command.startswith("EXEC:"): 303 | log.info(command) 304 | else: 305 | log.error("Invalid pipe command: %s", command) 306 | continue 307 | 308 | #break 309 | 310 | # We wait until the injected library reports back. 311 | if wait: 312 | proc.wait() 313 | 314 | if proc: 315 | proc.close() 316 | 317 | self.done = True 318 | return True 319 | 320 | def writeToLogFile(self, logfile, data): 321 | try: 322 | flog = open(logfile, 'a+') 323 | flog.write(data) 324 | flog.close() 325 | except: 326 | log.error("Unable to write to logfile %s.", logfile) 327 | 328 | class Analyzer: 329 | """Cuckoo Darwin (OS X) Analyzer. 330 | """ 331 | 332 | PIPE_SERVER_COUNT = 1 333 | 334 | def __init__(self): 335 | self.pipes = [None]*self.PIPE_SERVER_COUNT 336 | self.config = None 337 | self.target = None 338 | 339 | def complete(self): 340 | """Mark the analysis as completed and return files""" 341 | 342 | # Oh look, it's done 343 | log.info("Analysis completed") 344 | # Stop the Pipe Servers. 345 | for x in xrange(self.PIPE_SERVER_COUNT): 346 | self.pipes[x].stop() 347 | while not self.pipes[x].is_done(): 348 | log.info("Waiting for Pipe Servers to finish") 349 | time.sleep(0.1) 350 | 351 | # pick up log files and created files from the system file call hooks 352 | # these are stored in ~/tmp/ 353 | flog_root = os.path.join(os.getenv("HOME"), "tmp") #get the path 354 | if os.path.exists(flog_root): 355 | #transfer the log files to the "logs" directory - there could be 3 or there could be none 356 | for f in os.listdir(flog_root): 357 | if fnmatch.fnmatch(f, 'file_*.log'): #if the file is one of our log files 358 | os.rename(os.path.join(flog_root, f), os.path.join(PATHS["logs"], f)) #then move it 359 | else: #all the other files in the directory will be files that were deleted 360 | log.info("Adding file %s with size %d", os.path.join(flog_root, f), os.path.getsize(os.path.join(flog_root, f))) 361 | os.rename(os.path.join(flog_root, f), os.path.join(PATHS["files"], f)) #move those to the files folder 362 | # copy over any created files that were not in the deleted files list 363 | if os.path.exists(os.path.join(PATHS["logs"], "file_creation.log")): 364 | #open the log file for reading - it will have one file path per line 365 | flog = open(os.path.join(PATHS["logs"], "file_creation.log")) 366 | for row in flog.readlines(): 367 | row = row.strip("\n") 368 | # get rid of the timestamp in front 369 | split = row.split(":") 370 | row = split[len(split)-1].strip(" ") 371 | log.info("Looking for %s - exists: %s", row, str(os.path.exists(row))) 372 | #if the file has not already been copied, copy it over 373 | if os.path.exists(row) and os.path.isfile(row) and not os.path.exists(os.path.join(PATHS["files"], os.path.basename(row))): 374 | try: 375 | log.info("Adding file %s with size %d", row, os.path.getsize(row)) 376 | os.rename(row, os.path.join(PATHS["files"], os.path.basename(row))) 377 | except IOError: 378 | log.error("Failed to extract created file %s.", row) 379 | 380 | flog.close() 381 | 382 | # Dump all the relevant files to the host 383 | for folder, subs, files in os.walk(PATHS["root"]): 384 | for filename in files: 385 | path = os.path.join(folder, filename) 386 | add_file(path) 387 | 388 | dump_files() 389 | 390 | 391 | def prepare(self): 392 | """ 393 | Prepare the environment for analysis. 394 | """ 395 | # Create the folders used for storing the results. 396 | create_folders() 397 | 398 | # Initialize logging. 399 | init_logging() 400 | 401 | # Parse the analysis configuration file generated by the agent. 402 | self.config = Config(cfg="analysis.conf") 403 | 404 | # Initialize and start the Pipe Servers. This is going to be used for 405 | # communicating with the injected and monitored processes. 406 | for x in xrange(self.PIPE_SERVER_COUNT): 407 | self.pipes[x] = PipeServer() 408 | self.pipes[x].daemon = True 409 | self.pipes[x].start() 410 | 411 | # We update the target according to its category. If it's a file, then 412 | # we store the path. 413 | if self.config.category == "file": 414 | #Note: The /tmp directory is specified in agent.py for linux and darwin 415 | self.target = os.path.join("/tmp", 416 | str(self.config.file_name)) 417 | # If it's a URL, well.. we store the URL. 418 | else: 419 | self.target = self.config.target 420 | log.info("Target is at %s", self.target) 421 | 422 | # Execsnoop traces process creation using Dtrace 423 | pargs = ["execsnoop", "-a", "-e"] 424 | results = open(os.path.join(PATHS["logs"], "processes.log"), "a+") 425 | try: 426 | proc = subprocess.Popen(pargs, stdout=results, stderr=results) 427 | log.info("Starting Execsnoop") 428 | except (OSError, ValueError): 429 | log.exception("Failed to start execsnoop.") 430 | results.close() 431 | 432 | def run(self): 433 | """Run analysis. 434 | @return: operation status. 435 | """ 436 | #set up the analysis 437 | self.prepare() 438 | 439 | log.info("Starting analyzer from: %s", os.getcwd()) 440 | log.info("Storing results at: %s", PATHS["root"]) 441 | 442 | # If no analysis package was specified at submission, we try to select 443 | # one automatically. 444 | if not self.config.package: 445 | log.info("No analysis package specified, trying to detect " 446 | "it automagically.") 447 | # If the analysis target is a file, we choose the package according 448 | # to the file format. 449 | if self.config.category == "file": 450 | package = choose_package(self.config.file_type, self.config.file_name) 451 | # If it's an URL, try to use Safari 452 | else: 453 | package = "safari" 454 | 455 | # If we weren't able to automatically determine the proper package, 456 | # we need to abort the analysis. 457 | if not package: 458 | raise CuckooError("No valid package available for file " 459 | "type: {0}".format(self.config.file_type)) 460 | 461 | log.info("Automatically selected analysis package \"%s\"", package) 462 | # Otherwise just select the specified package. 463 | else: 464 | package = self.config.package 465 | 466 | # Generate the package path. 467 | package_name = "modules.packages.%s" % package 468 | 469 | # Try to import the analysis package. 470 | try: 471 | __import__(package_name, globals(), locals(), ["dummy"], -1) 472 | # If it fails, we need to abort the analysis. 473 | except ImportError: 474 | raise CuckooError("Unable to import package \"{0}\", does " 475 | "not exist.".format(package_name)) 476 | 477 | # Initialize the package parent abstract. 478 | Package() 479 | 480 | # Enumerate the abstract's subclasses. 481 | try: 482 | package_class = Package.__subclasses__()[0] 483 | except IndexError as e: 484 | raise CuckooError("Unable to select package class " 485 | "(package={0}): {1}".format(package_name, e)) 486 | 487 | # Initialize the analysis package. 488 | pack = package_class(self.get_options()) 489 | 490 | # Initialize Auxiliary modules 491 | Auxiliary() 492 | prefix = auxiliary.__name__ + "." 493 | for loader, name, ispkg in pkgutil.iter_modules(auxiliary.__path__, prefix): 494 | if ispkg: 495 | continue 496 | 497 | # Import the auxiliary module. 498 | try: 499 | __import__(name, globals(), locals(), ["dummy"], -1) 500 | except ImportError as e: 501 | log.warning("Unable to import the auxiliary module " 502 | "\"%s\": %s", name, e) 503 | 504 | # Walk through the available auxiliary modules. 505 | aux_enabled, aux_avail = [], [] 506 | for module in Auxiliary.__subclasses__(): 507 | # Try to start the auxiliary module. 508 | try: 509 | aux = module() 510 | aux_avail.append(aux) 511 | aux.start() 512 | except (NotImplementedError, AttributeError): 513 | log.warning("Auxiliary module %s was not implemented", 514 | aux.__class__.__name__) 515 | continue 516 | except Exception as e: 517 | log.warning("Cannot execute auxiliary module %s: %s", 518 | aux.__class__.__name__, e) 519 | continue 520 | finally: 521 | log.info("Started auxiliary module %s", 522 | aux.__class__.__name__) 523 | aux_enabled.append(aux) 524 | 525 | # Initialize the analysis package. 526 | pack = package_class(self.get_options()) 527 | 528 | # Start analysis package. If for any reason, the execution of the 529 | # analysis package fails, we have to abort the analysis. 530 | try: 531 | pids = pack.start(self.target) 532 | except NotImplementedError: 533 | raise CuckooError("The package \"{0}\" doesn't contain a run " 534 | "function.".format(package_name)) 535 | except CuckooPackageError as e: 536 | raise CuckooError("The package \"{0}\" start function raised an " 537 | "error: {1}".format(package_name, e)) 538 | except Exception as e: 539 | raise CuckooError("The package \"{0}\" start function encountered " 540 | "an unhandled exception: " 541 | "{1}".format(package_name, e)) 542 | 543 | # If the analysis package returned a list of process IDs, we add them 544 | # to the list of monitored processes and enable the process monitor. 545 | if pids: 546 | add_pids(pids) 547 | pid_check = True 548 | 549 | # If the package didn't return any process ID (for example in the case 550 | # where the package isn't enabling any behavioral analysis), we don't 551 | # enable the process monitor. 552 | else: 553 | log.info("No process IDs returned by the package, running " 554 | "for the full timeout") 555 | pid_check = False 556 | 557 | 558 | time_counter = 0 559 | 560 | while True: 561 | time_counter += 1 562 | if time_counter == int(self.config.timeout): 563 | log.info("Analysis timeout hit, terminating analysis") 564 | break 565 | 566 | try: 567 | # If the process monitor is enabled we start checking whether 568 | # the monitored processes are still alive. 569 | if pid_check: 570 | for pid in PROCESS_LIST: 571 | if not Process(pid=pid).is_alive(): 572 | log.info("Process with pid %s has terminated", pid) 573 | PROCESS_LIST.remove(pid) 574 | 575 | # If none of the monitored processes are still alive, we 576 | # can terminate the analysis. 577 | if not PROCESS_LIST: 578 | log.info("Process list is empty, " 579 | "terminating analysis.") 580 | break 581 | 582 | # Update the list of monitored processes available to the 583 | # analysis package. It could be used for internal 584 | # operations within the module. 585 | pack.set_pids(PROCESS_LIST) 586 | 587 | try: 588 | # The analysis packages are provided with a function that 589 | # is executed at every loop's iteration. If such function 590 | # returns False, it means that it requested the analysis 591 | # to be terminate. 592 | if not pack.check(): 593 | log.info("The analysis package requested the " 594 | "termination of the analysis...") 595 | break 596 | 597 | # If the check() function of the package raised some exception 598 | # we don't care, we can still proceed with the analysis but we 599 | # throw a warning. 600 | except Exception as e: 601 | log.warning("The package \"%s\" check function raised " 602 | "an exception: %s", package_name, e) 603 | finally: 604 | # Sleep for one second 605 | time.sleep(1) 606 | 607 | try: 608 | # Before shutting down the analysis, the package can perform some 609 | # final operations through the finish() function. 610 | pack.finish() 611 | except Exception as e: 612 | log.warning("The package \"%s\" finish function raised an " 613 | "exception: %s", package_name, e) 614 | 615 | # Try to terminate remaining active processes. We do this to make sure 616 | # that we clean up remaining open handles (sockets, files, etc.). 617 | log.info("Terminating remaining processes before shutdown...") 618 | 619 | for pid in PROCESS_LIST: 620 | proc = Process(pid=pid) 621 | if proc.is_alive(): 622 | try: 623 | proc.terminate() 624 | except: 625 | continue 626 | 627 | 628 | # Call the completion procedure 629 | self.complete() 630 | 631 | return True 632 | 633 | def get_options(self): 634 | """Get analysis options. 635 | @return: options dict. 636 | """ 637 | # The analysis package can be provided with some options in the 638 | # following format: 639 | # option1=value1,option2=value2,option3=value3 640 | # 641 | # Here we parse such options and provide a dictionary that will be made 642 | # accessible to the analysis package. 643 | options = {} 644 | if self.config.options: 645 | try: 646 | # Split the options by comma. 647 | fields = self.config.options.strip().split(",") 648 | except ValueError as e: 649 | log.warning("Failed parsing the options: %s", e) 650 | else: 651 | for field in fields: 652 | # Split the name and the value of the option. 653 | try: 654 | key, value = field.strip().split("=") 655 | except ValueError as e: 656 | log.warning("Failed parsing option (%s): %s", field, e) 657 | else: 658 | # If the parsing went good, we add the option to the 659 | # dictionary. 660 | options[key.strip()] = value.strip() 661 | 662 | return options 663 | 664 | #executed when this file is run 665 | if __name__ == "__main__": 666 | success = False #did the analyzer successfully run? 667 | error = "" #any error messages the analyzer returns 668 | 669 | try: 670 | # Initialize the main analyzer class. 671 | analyzer = Analyzer() 672 | 673 | # Run it and wait for the response. 674 | success = analyzer.run() 675 | 676 | # This is not likely to happen. 677 | except KeyboardInterrupt: 678 | error = "Keyboard Interrupt" 679 | 680 | # If the analysis process encountered a critical error, it will raise a 681 | # CuckooError exception, which will force the termination of the analysis. 682 | # Notify the agent of the failure. Also catches unexpected exceptions. 683 | except Exception as e: 684 | # Store the error. 685 | error_exc = traceback.format_exc() 686 | error = str(e) 687 | 688 | # Just to be paranoid. 689 | if len(log.handlers) > 0: 690 | log.exception(error_exc) 691 | else: 692 | sys.stderr.write("{0}\n".format(error_exc)) 693 | 694 | # Once the analysis is completed or terminated for any reason, we report 695 | # back to the agent, notifying that it can report back to the host. 696 | finally: 697 | # Establish connection with the agent XMLRPC server. 698 | server = xmlrpclib.Server("http://127.0.0.1:8000", allow_none=True) 699 | logging.critical("success: %s, error: %s, PATHS[root]: %s" % (success, error, PATHS["root"])) 700 | server.complete(success, error, PATHS["root"]) -------------------------------------------------------------------------------- /analyzer/darwin/dylib/cuckoohooks.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 3 | * this work by or on behalf of the U.S. Government. 4 | * NOTICE: 5 | * For five (5) years from the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE. 6 | * NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. 7 | * Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws. 8 | * 9 | * This file is the source code of cuckoohooks.dylib for the darwin analyzer of the Cuckoo sandbox. 10 | * Using process injection, it hooks system calls of interest to monitor for activity. 11 | * The full list of syscalls is in sys/syscall.h 12 | * It is compiled using the commands: 13 | * gcc -fno-common -c cuckoohooks.c 14 | * gcc -dynamiclib -o cuckoohooks.dylib cuckoohooks.o 15 | * or, for a 32-and-64-bit one: 16 | * gcc -fno-common -c cuckoohooks.c -arch i386 17 | * gcc -dynamiclib -o cuckoohooks_32.dylib cuckoohooks.o -arch i386 18 | * gcc -fno-common -c cuckoohooks.c -arch x86_64 19 | * gcc -dynamiclib -o cuckoohooks_64.dylib cuckoohooks.o -arch x86_64 20 | * lipo -create cuckoohooks_32.dylib cuckoohooks_64.dylib -output cuckoohooks.dylib 21 | * 22 | * A makefile is included for convenience 23 | * It is injected at runtime into the desired process with: 24 | * DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES=/cuckoohooks.dylib ./ 25 | * 26 | */ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | 51 | #include "cuckoohooks.h" 52 | 53 | #define LOGFOLDER "/tmp/" 54 | #define F_LOGFILE "file_activity.log" 55 | #define W_LOGFILE "file_writes.log" 56 | #define C_LOGFILE "file_creation.log" 57 | #define D_LOGFILE "file_deletion.log" 58 | 59 | int got_config = 0; 60 | int pipe_open = 0; 61 | 62 | /* 63 | * This section contains the hooked syscalls. 64 | */ 65 | 66 | /* 67 | * The open(2) syscall hook. The "..." means this function takes an unspecified number of variables. 68 | * We are mostly interested in files being created and files being written, so that we can inform Cuckoo about them. 69 | * However, the function will also log all files read, for completeness. 70 | * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man2/open.2.html#//apple_ref/doc/man/2/open 71 | */ 72 | 73 | int open(const char *path, int oflag,...) { 74 | bool was_create; 75 | //fprintf(stderr, "Starting open within pid %d and file %s and mode %d\n", getpid(), path, oflag); 76 | //get a pointer to the real function so we can call it from this wrapper 77 | int (*real_open)(const char*, int, ...) = 78 | (int (*)(const char*, int,...)) dlsym(RTLD_NEXT, "open"); 79 | 80 | //log the file and its flags to the logfile 81 | char* log = timestamp(); 82 | char* front = append_strings(log, ": "); 83 | if (in_logfolder(path)) { 84 | switch(oflag & O_ACCMODE) { 85 | case O_RDONLY: 86 | write_to_file(append_strings(front, append_strings("Read - ", path)), "FILE_ACTIVITY:"); 87 | case O_WRONLY: 88 | write_to_file(append_strings(front, append_strings("Write - ", path)), "FILE_ACTIVITY:"); 89 | case O_RDWR: 90 | write_to_file(append_strings(front, append_strings("ReadWrite - ", path)), "FILE_ACTIVITY:"); 91 | } 92 | } 93 | 94 | //printf("File %s opened with %d\n", path, oflag); //for debugging 95 | 96 | //The most essential unnamed argument is the file permissions, so we need to pass that to the real open 97 | va_list args; 98 | va_start(args,oflag); //list of unnamed arguments 99 | int perm = va_arg(args, int); //permissions 100 | va_end(args); 101 | 102 | //check to see if the "create if not exists" flag is set 103 | //and also if the file exists 104 | if (((oflag & O_CREAT) != 1) & (file_exists(path) == 0) & in_logfolder(path)) { 105 | //then log that this file was created 106 | write_to_file(append_strings(front, path), "FILE_CREATE:"); 107 | } 108 | 109 | //call the real function for the results 110 | int result = real_open(path, oflag, perm); 111 | if (result == -1) { //if there was an error, note it 112 | write_to_file(append_strings(front, append_strings("Open failed on ", path)), "FILE_ACTIVITY:"); 113 | } 114 | return result; 115 | } 116 | 117 | /* 118 | * The creat() call is not a system call, but it is hooked for thoroughness. 119 | * Header can be found in fcntl.h 120 | * 121 | */ 122 | 123 | int creat(const char *pathname, mode_t mode) { 124 | //get a pointer to the real function so we can call it from this wrapper 125 | int (*real_creat)(const char*, mode_t) = 126 | (int (*)(const char*, mode_t)) dlsym(RTLD_NEXT, "creat"); 127 | 128 | if (in_logfolder(pathname)) { 129 | //write to the logfile of created files 130 | char* log = timestamp(); 131 | char* front = append_strings(log, ": "); 132 | write_to_file(append_strings(front, pathname), "FILE_CREATE:"); 133 | } 134 | 135 | return real_creat(pathname, mode); 136 | 137 | } 138 | 139 | /* 140 | * The unlink command is basically the "delete file" command. 141 | * In this case, we save all files to be deleted to the ~/tmp folder 142 | * before doing the delete. These are then exported back to cuckoo for reference. 143 | * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man2/unlink.2.html 144 | */ 145 | 146 | int unlink(const char *path) { 147 | //get a pointer to the real function so we can call it from this wrapper 148 | int (*real_unlink)(const char*) = 149 | (int (*)(const char*)) dlsym(RTLD_NEXT, "unlink"); 150 | 151 | //write the file to the log 152 | char* log = timestamp(); 153 | char* front = append_strings(log, ": "); 154 | write_to_file(append_strings(front, path), "FILE_DELETE:"); 155 | 156 | //copy the file into our results folder 157 | copy_file(path, g_config.results); 158 | 159 | return real_unlink(path); 160 | } 161 | 162 | /* 163 | * The rename command, also serves as the "move" command. The new file name 164 | * is considered to be a "created" file and the old one is considered a "deleted" 165 | * file, so the end result will have two copies of the file, one under each name. 166 | * https://developer.apple.com/library/ios/documentation/System/Conceptual/ManPages_iPhoneOS/man2/rename.2.html 167 | */ 168 | int rename(const char *old, const char *new) { 169 | //get a pointer to the real function so we can call it from this wrapper 170 | int (*real_rename)(const char*, const char*) = 171 | (int (*)(const char*, const char*)) dlsym(RTLD_NEXT, "rename"); 172 | 173 | //write the "old" file to the deleted log 174 | char* log = timestamp(); 175 | char* front = append_strings(log, ": "); 176 | write_to_file(append_strings(front, old), "FILE_DELETE:"); 177 | 178 | //copy the file into our results folder 179 | copy_file(old, g_config.results); 180 | 181 | //write the "new" file to the list of created files 182 | write_to_file(append_strings(front, new), "FILE_CREATE:"); 183 | 184 | return real_rename(old, new); 185 | } 186 | 187 | /* 188 | * The hooks of the write functions are there to log what files are actually changed. 189 | * Changed files will also be extracted by Cuckoo 190 | * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man2/write.2.html 191 | */ 192 | 193 | ssize_t write(int fildes, const void *buf, size_t nbyte) { 194 | //get a pointer to the real function so we can call it from this wrapper 195 | ssize_t (*real_write)(int, const void *, size_t) = 196 | (ssize_t (*)(int, const void *, size_t)) dlsym(RTLD_NEXT, "write"); 197 | if (nbyte > 0) { //if we are actually writing something 198 | //get file name from descriptor 199 | char path[MAXPATHLEN]; 200 | if ((fcntl(fildes, F_GETPATH, path) != -1) & in_logfolder(path)) 201 | { 202 | //log to the writes log file 203 | char* log = timestamp(); 204 | char* front = append_strings(log, ": "); 205 | write_to_file(append_strings(front, path), "FILE_WRITE:"); 206 | //write the bytes to the log file 207 | char* data = buffer_to_hex(buf, nbyte, 0); 208 | write_to_file(append_strings("Bytes Written: ", data), "FILE_WRITE:"); 209 | //copy over the file to the results folder 210 | copy_file(path, g_config.results); 211 | } 212 | } 213 | 214 | return real_write(fildes, buf, nbyte); 215 | } 216 | 217 | ssize_t pwrite(int fildes, const void *buf, size_t nbyte, off_t offset) { 218 | //get a pointer to the real function so we can call it from this wrapper 219 | ssize_t (*real_pwrite)(int, const void *, size_t, off_t) = 220 | (ssize_t (*)(int, const void *, size_t, off_t)) dlsym(RTLD_NEXT, "pwrite"); 221 | 222 | if (nbyte > 0) { //if we are actually writing something 223 | //get file name from descriptor 224 | char path[MAXPATHLEN]; 225 | if ((fcntl(fildes, F_GETPATH, path) != -1) & in_logfolder(path)) 226 | { 227 | //log to the writes log file 228 | char* log = timestamp(); 229 | char* front = append_strings(log, ": "); 230 | write_to_file(append_strings(front, path), "FILE_WRITE:"); 231 | //write the bytes to the log file 232 | char* data = buffer_to_hex(buf, nbyte, offset); 233 | write_to_file(append_strings("Bytes Written: ", data), "FILE_WRITE:"); 234 | //copy over the file to the results folder 235 | copy_file(path, g_config.results); 236 | } 237 | } 238 | return real_pwrite(fildes, buf, nbyte, offset); 239 | } 240 | 241 | ssize_t writev(int fildes, const struct iovec *iov, int iovcnt) { 242 | //get a pointer to the real function so we can call it from this wrapper 243 | ssize_t (*real_writev)(int, const struct iovec, int) = 244 | (ssize_t (*)(int, const struct iovec, int)) dlsym(RTLD_NEXT, "writev"); 245 | 246 | //get file name from descriptor 247 | char path[MAXPATHLEN]; 248 | if ((fcntl(fildes, F_GETPATH, path) != -1) & in_logfolder(path)) 249 | { 250 | //log to the writes log file 251 | char* log = timestamp(); 252 | char* front = append_strings(log, ": "); 253 | write_to_file(append_strings(front, path), "FILE_WRITE:"); 254 | //copy over the file to the results folder 255 | copy_file(path, g_config.results); 256 | } 257 | 258 | return real_writev(fildes, *iov, iovcnt); 259 | } 260 | 261 | /* 262 | * Ptrace is of interest because processes calling ptrace with the 263 | * PT_DENY_ATTACH request can avoid being probed for debugging with Dtrace. 264 | * This request sets the P_LNOATTACH flag, which is checked by Dtrace. 265 | * Since Dtrace is a nice tool and we want use it, we can't have that. 266 | * So any PT_DENY_ATTACH request that comes through gets blocked. 267 | * https://developer.apple.com/library/ios/documentation/System/Conceptual/ManPages_iPhoneOS/man2/ptrace.2.html 268 | * http://dtrace.org/blogs/ahl/2008/01/18/mac-os-x-and-the-missing-probes/ 269 | */ 270 | int ptrace(int request, pid_t pid, caddr_t addr, int data) { 271 | //get a pointer to the real function so we can call it from this wrapper 272 | int (*real_ptrace)(int, pid_t, caddr_t, int) = 273 | (int (*)(int, pid_t, caddr_t, int)) dlsym(RTLD_NEXT, "ptrace"); 274 | 275 | //log to the writes log file 276 | char* log = timestamp(); 277 | char* front = append_strings(log, ": "); 278 | write_to_file(front, "PTRACE:"); 279 | 280 | if (request == PT_DENY_ATTACH) { //PT_DENY_ATTACH=31 281 | return 0; //do nothing for this request 282 | } 283 | else { 284 | return real_ptrace(request, pid, addr, data); 285 | } 286 | } 287 | 288 | /* 289 | * Vfork is the more memory-effecient version of fork. Hooking this allows 290 | * us to track new spawned processes. 291 | * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man2/vfork.2.html 292 | */ 293 | pid_t vfork(void) { 294 | 295 | //get a pointer to the real function so we can call it from this wrapper 296 | pid_t (*real_vfork)() = 297 | (pid_t (*)()) dlsym(RTLD_NEXT, "vfork"); 298 | 299 | //get the new pid 300 | pid_t new_process = real_vfork(); 301 | //suspend the process so the injection can happen 302 | if(new_process > 0) { 303 | kill(new_process, SIGSTOP); 304 | //write the new process pid to the pipe 305 | //The analyzer will add tracking when the command is read 306 | char* log = timestamp(); 307 | char* front = append_strings(log, ": "); 308 | sprintf(front, " %d", new_process); 309 | write_to_file(front, "PROCESS:"); 310 | } 311 | 312 | return new_process; 313 | 314 | } 315 | 316 | /* 317 | * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man2/fork.2.html#//apple_ref/doc/man/2/fork 318 | */ 319 | pid_t fork(void) { 320 | //get a pointer to the real function so we can call it from this wrapper 321 | pid_t (*real_fork)() = 322 | (pid_t (*)()) dlsym(RTLD_NEXT, "fork"); 323 | 324 | //get the new pid 325 | pid_t new_process = real_fork(); 326 | //suspend the process so the injection can happen 327 | if(new_process > 0) { 328 | kill(new_process, SIGSTOP); 329 | //write the new process pid to the pipe 330 | //The analyzer will add tracking when the command is read 331 | char* log = timestamp(); 332 | char* front = append_strings(log, ": "); 333 | sprintf(front, " %d", new_process); 334 | write_to_file(front, "PROCESS:"); 335 | } 336 | else if (new_process == 0) { // Stopping the new process from both the old thread and the new one is necessary for this to work consistently 337 | kill(getpid(), SIGSTOP); 338 | } 339 | 340 | return new_process; 341 | } 342 | 343 | /* 344 | * posix_spawn is what is most commonly used on OS X, since the Cocoa library uses it by default. 345 | * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man2/posix_spawn.2.html 346 | */ 347 | int posix_spawn(pid_t *restrict pid, const char *restrict path, 348 | const posix_spawn_file_actions_t *file_actions, const posix_spawnattr_t *restrict attrp, 349 | char *const argv[restrict], char *const envp[restrict]) { 350 | 351 | //get a pointer to the real function so we can call it from this wrapper 352 | int (*real_posix_spawn)(pid_t *restrict, const char *restrict, 353 | const posix_spawn_file_actions_t, const posix_spawnattr_t *restrict, 354 | char *const *restrict, char *const *restrict) = 355 | (int (*)(pid_t *restrict, const char *restrict, 356 | const posix_spawn_file_actions_t, const posix_spawnattr_t *restrict, 357 | char *const *restrict, char *const *restrict)) dlsym(RTLD_NEXT, "posix_spawn"); 358 | 359 | //insert the injected library into the new process' environment variables 360 | char* environ[3]; 361 | environ[0] = "DYLD_FORCE_FLAT_NAMESPACE=1"; 362 | environ[1] = append_strings("DYLD_INSERT_LIBRARIES=", getenv("DYLD_INSERT_LIBRARIES")); 363 | 364 | int result; 365 | //get the result - the pid will be saved in the pid argument struct 366 | if (!envp) { //if no arguments were passed, just use our array 367 | environ[2] = NULL; 368 | result = real_posix_spawn(pid, path, file_actions, attrp, argv, environ); 369 | } 370 | else { 371 | //calculate the size of the given array 372 | int size = 0; 373 | //we know the array is null-terminated, so we look for that to get the size 374 | char* item = envp[0]; 375 | while (item) { 376 | size++; 377 | item = envp[size]; 378 | } 379 | size++; 380 | char* both[2+size]; 381 | int i; 382 | int flag1 = 0; 383 | int flag2 = 0; 384 | //copy the given envp array into the new one 385 | for (i = 0; i < size-1; i++) { 386 | //avoid duplicate environment variables 387 | if(strstr(envp[i], "DYLD_FORCE_FLAT_NAMESPACE")) { 388 | flag1 = 1; 389 | } 390 | if (strstr(envp[i], "DYLD_INSERT_LIBRARIES=")) { 391 | flag2 = 1; 392 | } 393 | both[i] = envp[i]; 394 | } 395 | //if necessary, add in the injected library environment variables 396 | if (flag1 == 0) { 397 | both[i] = environ[0]; 398 | i++; 399 | } 400 | if (flag2 == 0) { 401 | both[i] = environ[1]; 402 | i++; 403 | } 404 | both[i] = NULL; //terminate the array (doesn't matter if it's not the end) 405 | 406 | result = real_posix_spawn(pid, path, file_actions, attrp, argv, both); 407 | } 408 | //suspend the process so the injection can happen 409 | if (*pid > 0) { 410 | kill(*pid, SIGSTOP); 411 | } 412 | 413 | //write the new process pid to the pipe 414 | //The analyzer will add tracking when the command is read 415 | char* log = timestamp(); 416 | char* front = append_strings(log, ": "); 417 | sprintf(front, " %d", *pid); 418 | write_to_file(front, "PROCESS:"); 419 | 420 | return result; 421 | 422 | } 423 | 424 | int posix_spawnp(pid_t *restrict pid, const char *restrict file, 425 | const posix_spawn_file_actions_t *file_actions, const posix_spawnattr_t *restrict attrp, 426 | char *const argv[restrict], char *const envp[restrict]) { 427 | 428 | //get a pointer to the real function so we can call it from this wrapper 429 | int (*real_posix_spawnp)(pid_t *restrict, const char *restrict, 430 | const posix_spawn_file_actions_t, const posix_spawnattr_t *restrict, 431 | char *const *restrict, char *const *restrict) = 432 | (int (*)(pid_t *restrict, const char *restrict, 433 | const posix_spawn_file_actions_t, const posix_spawnattr_t *restrict, 434 | char *const *restrict, char *const *restrict)) dlsym(RTLD_NEXT, "posix_spawnp"); 435 | 436 | //insert the injected library into the new process' environment variables 437 | char* environ[3]; 438 | environ[0] = "DYLD_FORCE_FLAT_NAMESPACE=1"; 439 | environ[1] = append_strings("DYLD_INSERT_LIBRARIES=", getenv("DYLD_INSERT_LIBRARIES")); 440 | 441 | int result; 442 | //get the result - the pid will be saved in the pid argument struct 443 | if (!envp) { //if no arguments were passed, just use our array 444 | environ[2] = NULL; 445 | result = real_posix_spawnp(pid, file, file_actions, attrp, argv, environ); 446 | } 447 | else { 448 | //calculate the size of the given array 449 | int size = 0; 450 | //we know the array is null-terminated, so we look for that to get the size 451 | char* item = envp[0]; 452 | while (item) { 453 | size++; 454 | item = envp[size]; 455 | } 456 | size++; 457 | char* both[2+size]; 458 | int i; 459 | int flag1 = 0; 460 | int flag2 = 0; 461 | //copy the given envp array into the new one 462 | for (i = 0; i < size-1; i++) { 463 | //avoid duplicate environment variables 464 | if(strstr(envp[i], "DYLD_FORCE_FLAT_NAMESPACE")) { 465 | flag1 = 1; 466 | } 467 | if (strstr(envp[i], "DYLD_INSERT_LIBRARIES=")) { 468 | flag2 = 1; 469 | } 470 | both[i] = envp[i]; 471 | } 472 | //if necessary, add in the injected library environment variables 473 | if (flag1 == 0) { 474 | both[i] = environ[0]; 475 | i++; 476 | } 477 | if (flag2 == 0) { 478 | both[i] = environ[1]; 479 | i++; 480 | } 481 | both[i] = NULL; //terminate the array (doesn't matter if it's not the end) 482 | 483 | result = real_posix_spawnp(pid, file, file_actions, attrp, argv, both); 484 | } 485 | //suspend the process so the injection can happen 486 | if (*pid > 0) { 487 | kill(*pid, SIGSTOP); 488 | } 489 | 490 | //write the new process pid to the pipe 491 | //The analyzer will add tracking when the command is read 492 | char* log = timestamp(); 493 | char* front = append_strings(log, ": "); 494 | sprintf(front, " %d", *pid); 495 | write_to_file(front, "PROCESS:"); 496 | 497 | return result; 498 | 499 | } 500 | 501 | /*int execve(const char *path, char *const argv[], char *const envp[]) { 502 | //get a pointer to the real function so we can call it from this wrapper 503 | int (*real_execve)(const char *, char *const *, char *const *) = 504 | (int (*)(const char *, char *const *, char *const *)) dlsym(RTLD_NEXT, "execve"); 505 | 506 | char* log = timestamp(); 507 | char* front = append_strings(log, ": "); 508 | sprintf(front, " %s", path); 509 | write_to_file(front, "EXEC:"); 510 | 511 | return real_execve(path, argv, envp); 512 | }*/ 513 | 514 | 515 | 516 | /* 517 | * This section contains utility functions used by the hooks. 518 | */ 519 | 520 | /* 521 | * Prints a string to the log file, with error handling. 522 | * By default it is just appended. 523 | */ 524 | void write_to_file(const char* str, const char* command) { 525 | //if we don't have the name of pipe, read it 526 | if (got_config == 0) { 527 | read_config(); 528 | got_config = 1; 529 | } 530 | struct timespec tim, tim2; 531 | tim.tv_sec = 0; 532 | tim.tv_nsec = 50000000L; 533 | while (pipe_open == 1) { 534 | nanosleep(&tim , &tim2); 535 | } 536 | pipe_open = 1; 537 | //open the pipe for writing 538 | FILE *p = fopen(g_config.pipe_name, "w"); 539 | if (p == NULL) 540 | { 541 | return; 542 | //fprintf(stderr, "Error opening pipe %s.\n", g_config.pipe_name); 543 | //exit(1); 544 | } 545 | //write to the file 546 | fprintf(p, "%s", command); 547 | fprintf(p, "%s", str); 548 | fprintf(p, "%s", "\n"); 549 | fflush(p); 550 | //close the file 551 | fclose(p); 552 | pipe_open = 0; 553 | } 554 | 555 | /* 556 | * Appends two strings, because C is evil. 557 | */ 558 | char* append_strings(const char* str1, const char* str2) { 559 | char* result; 560 | if((result = malloc(strlen(str1)+strlen(str2)+1)) != NULL){ 561 | result[0] = '\0'; // ensures the memory is an empty string 562 | //concatenate strings 563 | strcat(result,str1); 564 | strcat(result,str2); 565 | return result; 566 | } else { //if we run out of memory, which hopefully doesn't happen 567 | return ""; 568 | //fprintf(stderr,"Could not allocate memory for strings.\n"); 569 | //exit(1); 570 | } 571 | } 572 | 573 | /* 574 | * Checks if a file exists, used to see if something is being created. 575 | * Return 1 if the files exists, 0 if not. 576 | */ 577 | int file_exists(const char* path) { 578 | FILE* f; 579 | if ((f = fopen(path, "r")) == NULL) { //file was not opened 580 | if (errno == ENOENT) { 581 | return 0; //file does not exist 582 | } else { 583 | return 1; //file may exist, but there were other errors. 584 | } 585 | } else { 586 | fclose(f); 587 | } 588 | return 1; 589 | } 590 | 591 | /* 592 | * Copies the file at "src" into the file at "dest". 593 | * Note: It will overwrite "dest" if it already exists. 594 | */ 595 | void copy_file(const char* src, const char* dest) { 596 | unsigned char buffer[4096]; 597 | int err, n; 598 | 599 | //get the file name from the path and append it to dest 600 | dest = append_strings(dest, basename(src)); 601 | 602 | //open the two files 603 | int src_file = open(src, O_RDONLY); 604 | int dest_file = open(dest, O_CREAT|O_WRONLY, 0777); 605 | 606 | //write in the bytes 4096 at a time from src to dest 607 | while (1) { 608 | err = read(src_file, buffer, 4096); 609 | if (err == -1) { 610 | //printf("COPY FILE ERROR (src): %s on file %s\n", strerror(errno), src); 611 | break; 612 | } 613 | n = err; 614 | 615 | if (n == 0) break; //stop when we read all of the file 616 | 617 | err = write(dest_file, buffer, n); 618 | if (err == -1) { 619 | //printf("COPY FILE ERROR (dest): %s on file %s\n", strerror(errno), dest); 620 | break; 621 | } 622 | } 623 | //close both files 624 | close(src_file); 625 | close(dest_file); 626 | } 627 | 628 | /* Returns the current time in human-readable format for logging */ 629 | char* timestamp() { 630 | char * result = malloc(24); 631 | time_t ltime; 632 | ltime=time(NULL); //get calendar time 633 | sprintf(result, "%s",asctime(localtime(<ime))); 634 | result[strcspn(result,"\n")] = '\0'; //strip newlines 635 | 636 | //add the pid 637 | char str[15]; 638 | sprintf(str, " (%d) ", getpid()); 639 | return append_strings(result, str); 640 | } 641 | 642 | /* Returns 1 if the action is not in the logging folder 643 | * and 0 if it is, to avoid logging the injected library's actions. 644 | */ 645 | int in_logfolder(const char *path) { 646 | //if we don't have the name of the results folder, read it 647 | if (got_config == 0) { 648 | read_config(); 649 | got_config = 1; 650 | } 651 | if((strstr(path, g_config.results) != NULL) || (strstr(path, g_config.pipe_name) != NULL)) { //folder is in path 652 | return 0; 653 | } 654 | else { //folder is not in path 655 | return 1; 656 | } 657 | } 658 | 659 | /* 660 | * Read in the configuration file for this process, created by api/process.py 661 | * This is a modified verison of the function found in https://github.com/cuckoobox/cuckoomon/blob/master/config.c 662 | */ 663 | void read_config() { 664 | 665 | char buf[512], config_fname[PATH_MAX]; 666 | sprintf(config_fname, "%s%d.conf", getenv("TMPDIR"), getpid()); 667 | FILE *fp = fopen(config_fname, "r"); 668 | 669 | if (fp == NULL) { 670 | sprintf(config_fname, "%s%d.conf", getenv("TMPDIR"), getppid()); 671 | FILE *fp2 = fopen(config_fname, "r"); 672 | } 673 | 674 | 675 | if(fp != NULL) { 676 | while (fgets(buf, sizeof(buf), fp) != NULL) { 677 | // cut off the newline 678 | char *p = strchr(buf, '\r'); 679 | if(p != NULL) *p = 0; 680 | p = strchr(buf, '\n'); 681 | if(p != NULL) *p = 0; 682 | // split key=value 683 | p = strchr(buf, '='); 684 | if(p != NULL) { 685 | *p = 0; 686 | 687 | const char *key = buf, *value = p + 1; 688 | if(!strcmp(key, "pipe")) { 689 | strncpy(g_config.pipe_name, value, 690 | ARRAYSIZE(g_config.pipe_name)); 691 | } 692 | else if(!strcmp(key, "results")) { 693 | strncpy(g_config.results, value, 694 | ARRAYSIZE(g_config.results)); 695 | } 696 | else if(!strcmp(key, "analyzer")) { 697 | strncpy(g_config.analyzer, value, 698 | ARRAYSIZE(g_config.analyzer)); 699 | } 700 | else if(!strcmp(key, "first-process")) { 701 | g_config.first_process = value[0] == '1'; 702 | } 703 | else if(!strcmp(key, "startup-time")) { 704 | g_config.startup_time = atoi(value); 705 | } 706 | else if(!strcmp(key, "retaddr-check")) { 707 | g_config.retaddr_check = value[0] == '1'; 708 | } 709 | else if(!strcmp(key, "host-ip")) { 710 | g_config.host_ip = inet_addr(value); 711 | } 712 | else if(!strcmp(key, "host-port")) { 713 | g_config.host_port = atoi(value); 714 | } 715 | } 716 | } 717 | fclose(fp); 718 | 719 | int (*real_unlink)(const char*) = 720 | (int (*)(const char*)) dlsym(RTLD_NEXT, "unlink"); 721 | //real_unlink(config_fname); 722 | } 723 | } 724 | 725 | /* 726 | * Converts byte buffers to hex so we can see what the program is writing. 727 | */ 728 | char* buffer_to_hex(const void *buf, size_t size, off_t offset) { 729 | //sometimes files try to request a ridiculous amount to write, so we only do the first 100 bytes 730 | if (size > 100) { 731 | size = 100; 732 | } 733 | //copy the buffer 734 | const void *buf2[size]; 735 | memcpy (buf2, buf, size); 736 | //make the buffer - each byte will be converted to the form \x, so 4 chars each 737 | char * result = malloc((size*4)+1); 738 | int pos = 0; 739 | while(size > 0) { 740 | size--; 741 | sprintf(result+pos, "\\x%.2x", *((unsigned char*)&(buf[offset]))); 742 | offset++; 743 | pos += 4; 744 | } 745 | return result; 746 | } 747 | 748 | 749 | -------------------------------------------------------------------------------- /analyzer/darwin/dylib/cuckoohooks.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/dylib/cuckoohooks.dylib -------------------------------------------------------------------------------- /analyzer/darwin/dylib/cuckoohooks.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Symbols for cuckoohooks.c 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #ifndef _cuckoohooks_h 11 | #define _cuckoohooks_h 12 | #define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) 13 | 14 | struct open_nocancel_args { 15 | //char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; 16 | //char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; 17 | //char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; 18 | }; 19 | 20 | int open(const char *path, int oflag,...); 21 | int creat(const char *pathname, mode_t mode); 22 | int unlink(const char *path); 23 | int rename(const char *old, const char *new); 24 | ssize_t write(int fildes, const void *buf, size_t nbyte); 25 | ssize_t pwrite(int fildes, const void *buf, size_t nbyte, off_t offset); 26 | ssize_t writev(int fildes, const struct iovec *iov, int iovcnt); 27 | int ptrace(int request, pid_t pid, caddr_t addr, int data); 28 | pid_t vfork(void); 29 | pid_t fork(void); 30 | int posix_spawn(pid_t *restrict pid, const char *restrict path, 31 | const posix_spawn_file_actions_t *file_actions, const posix_spawnattr_t *restrict attrp, 32 | char *const argv[restrict], char *const envp[restrict]); 33 | int posix_spawnp(pid_t *restrict pid, const char *restrict file, 34 | const posix_spawn_file_actions_t *file_actions, const posix_spawnattr_t *restrict attrp, 35 | char *const argv[restrict], char *const envp[restrict]); 36 | //int execve(const char *path, char *const argv[], char *const envp[]); 37 | 38 | void write_to_file(const char* str, const char* command); 39 | char* append_strings(const char* str1, const char* str2); 40 | int file_exists(const char* path); 41 | void copy_file(const char* src, const char* dest); 42 | char* timestamp(); 43 | int in_logfolder(const char* path); 44 | char* buffer_to_hex(const void *buf, size_t size, off_t offset); 45 | 46 | struct { 47 | // name of the pipe to communicate with cuckoo 48 | char pipe_name[PATH_MAX]; 49 | 50 | // results directory, has to be hidden 51 | char results[PATH_MAX]; 52 | 53 | // analyzer directory, has to be hidden 54 | char analyzer[PATH_MAX]; 55 | 56 | // is this the first process or not? 57 | int first_process; 58 | 59 | // how many milliseconds since startup 60 | unsigned int startup_time; 61 | 62 | // do we want to enable the retaddr check? 63 | int retaddr_check; 64 | 65 | // server ip and port 66 | unsigned int host_ip; 67 | unsigned short host_port; 68 | } g_config; 69 | 70 | void read_config(); 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /analyzer/darwin/dylib/cuckoohooks.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/dylib/cuckoohooks.o -------------------------------------------------------------------------------- /analyzer/darwin/dylib/cuckoohooks_32.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/dylib/cuckoohooks_32.dylib -------------------------------------------------------------------------------- /analyzer/darwin/dylib/cuckoohooks_64.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/dylib/cuckoohooks_64.dylib -------------------------------------------------------------------------------- /analyzer/darwin/dylib/makefile: -------------------------------------------------------------------------------- 1 | all: 2 | gcc -fno-common -O0 -g -c cuckoohooks.c -arch i386 3 | gcc -dynamiclib -o cuckoohooks_32.dylib cuckoohooks.o -arch i386 4 | gcc -fno-common -O0 -g -c cuckoohooks.c -arch x86_64 5 | gcc -dynamiclib -o cuckoohooks_64.dylib cuckoohooks.o -arch x86_64 6 | lipo -create cuckoohooks_32.dylib cuckoohooks_64.dylib -output cuckoohooks.dylib 7 | -------------------------------------------------------------------------------- /analyzer/darwin/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/lib/__init__.py -------------------------------------------------------------------------------- /analyzer/darwin/lib/api/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /analyzer/darwin/lib/api/apitrace: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # #!/usr/bin/sh 3 | # 4 | # dapptrace - trace user and library function usage. 5 | # Written using DTrace (Solaris 10 3/05). 6 | # 7 | # The default output traces user functions as they are called. Options 8 | # can be used to examine libraries and timestamps. 9 | # 10 | # 17-Jun-2005, ver 0.61 (early release! check for newer versions) 11 | # 12 | # USAGE: dapptrace [-acdeFlhoU] [-u lib] { -p PID | command } 13 | # 14 | # -p PID # examine this PID 15 | # -a # print all details 16 | # -d # print relative timestamps (us) 17 | # -e # print elapsed times (us) 18 | # -F # print flow indentation 19 | # -l # print pid/lwpid per line 20 | # -o # print on cpu times (us) 21 | # -u lib # trace this library instead 22 | # -U # trace all libraries + user functions 23 | # -b bufsize # dynamic variable buf size (default is "4m") 24 | # eg, 25 | # dapptrace df -h # run and examine the "df -h" command 26 | # dapptrace -p 1871 # examine PID 1871 27 | # dapptrace -Fp 1871 # print using flow indents 28 | # dapptrace -eop 1871 # print elapsed and CPU times 29 | # 30 | # The elapsed times are interesting, to help identify calls that take 31 | # some time to complete (during which the process may have context 32 | # switched off the CPU). 33 | # 34 | # SEE ALSO: dappprof # DTraceToolkit 35 | # dtruss # DTraceToolkit 36 | # apptrace 37 | # 38 | # COPYRIGHT: Copyright (c) 2005 Brendan Gregg. 39 | # 40 | # CDDL HEADER START 41 | # 42 | # The contents of this file are subject to the terms of the 43 | # Common Development and Distribution License, Version 1.0 only 44 | # (the "License"). You may not use this file except in compliance 45 | # with the License. 46 | # 47 | # You can obtain a copy of the license at Docs/cddl1.txt 48 | # or http://www.opensolaris.org/os/licensing. 49 | # See the License for the specific language governing permissions 50 | # and limitations under the License. 51 | # 52 | # CDDL HEADER END 53 | # 54 | # Author: Brendan Gregg [Sydney, Australia] 55 | # 56 | # 16-May-2005 Brendan Gregg Created this. 57 | # 58 | 59 | # 60 | # NOTE: THIS DAPPTRACE HAS BEEN STRIPPED DOWN AND MODIFIED FOR PERFORMANCE REASONS 61 | # 62 | 63 | 64 | ############################## 65 | # --- Process Arguments --- 66 | # 67 | 68 | ### Default variables 69 | opt_pid=0; pid=0; opt_indent=0; opt_lib=0; lib="" 70 | opt_liball=0 71 | opt_command=0; command=""; opt_buf=0; buf="10m" 72 | 73 | ### Process options 74 | while getopts ab:cdhp:u:U name 75 | do 76 | case $name in 77 | a) opt_liball=1;; 78 | b) opt_buf=1; buf=$OPTARG ;; 79 | p) opt_pid=1; pid=$OPTARG ;; 80 | u) opt_lib=1; lib=$OPTARG ;; 81 | U) opt_liball=1 ;; 82 | h|?) cat <<-END >&2 83 | USAGE: dapptrace [-acdeholFLU] [-u lib] { -p PID | command } 84 | 85 | -p PID # examine this PID 86 | -a # print all details 87 | -u lib1,lib2... # trace given libraries 88 | -U # trace all libraries + user funcs 89 | -b bufsize # dynamic variable buf size 90 | eg, 91 | dapptrace df -h # run and examine "df -h" 92 | dapptrace -p 1871 # examine PID 1871 93 | END 94 | exit 1 95 | esac 96 | done 97 | shift `expr $OPTIND - 1` 98 | 99 | ### Option logic 100 | if [ $opt_pid -eq 0 ]; then 101 | opt_command=1 102 | if [ "$*" = "" ]; then 103 | $0 -h 104 | exit 105 | fi 106 | command="$*" 107 | fi 108 | 109 | ### Probe logic 110 | if [ $opt_liball -eq 1 ]; then 111 | probe_entry='pid$target:::entry' 112 | probe_return='pid$target:::return' 113 | elif [ $opt_lib -eq 1 ]; then 114 | IFS="," 115 | #create the list of probes 116 | for l in $lib 117 | do 118 | probe_entry=$probe_entry'pid$target:'$l'::entry'$IFS 119 | probe_return=$probe_return'pid$target:'$l'::return'$IFS 120 | done 121 | 122 | #strip the extra ending commas 123 | probe_entry=${probe_entry%?} 124 | probe_return=${probe_return%?} 125 | 126 | #Original code 127 | #probe_entry='pid$target:'$lib'::entry' 128 | #probe_return='pid$target:'$lib'::return' 129 | else 130 | probe_entry='pid$target:a.out::entry' 131 | probe_return='pid$target:a.out::return' 132 | fi 133 | 134 | ################################# 135 | # --- Main Program, DTrace --- 136 | # 137 | 138 | ### Define D Script 139 | dtrace=' 140 | #pragma D option quiet 141 | 142 | /* 143 | * Command line arguments 144 | */ 145 | inline int OPT_command = '$opt_command'; 146 | inline int OPT_liball = '$opt_liball'; 147 | inline int OPT_pid = '$opt_pid'; 148 | inline int PID = '$pid'; 149 | inline string NAME = "'$pname'"; 150 | 151 | dtrace:::BEGIN 152 | { 153 | /* print header */ 154 | printf("%-8s ","PID/THRD"); 155 | printf("CALL(args) \t\t = return\n"); 156 | 157 | /* indent depth */ 158 | depth = 0; 159 | } 160 | 161 | /* 162 | * Save syscall entry info 163 | */ 164 | '$probe_entry' 165 | /depth == 0/ 166 | { 167 | /* set function depth */ 168 | this->fdepth = ++fdepth[probefunc]; 169 | depth += 2; 170 | 171 | /* set start details */ 172 | self->start[probefunc,this->fdepth] = timestamp; 173 | 174 | /* print optional fields */ 175 | printf("%5d/0x%x: ",pid,tid); 176 | printf("%*s",depth,""); /* print indentation */ 177 | 178 | /* print main data */ 179 | printf("-> "); 180 | printf("%s:",probemod); /* print the library name */ 181 | printf("%s(0x%X, 0x%X, 0x%X)\t\t\n",probefunc,arg0,arg1,arg2); 182 | 183 | } 184 | 185 | /* 186 | * Print return data 187 | */ 188 | /* print 3 arg output - default */ 189 | '$probe_return' 190 | /depth == 2 && self->start[probefunc,fdepth[probefunc]]/ 191 | { 192 | /* fetch function depth */ 193 | this->fdepth = fdepth[probefunc]; 194 | 195 | /* print optional fields */ 196 | printf("%5d/0x%x: ",pid,tid); 197 | printf("%*s",depth,""); /* print indentation */ 198 | 199 | /* print main data */ 200 | printf("<- "); 201 | printf("%s:",probemod); /* print library name */ 202 | printf("%s = %d\n",probefunc,(int)arg0); 203 | depth -= 2; 204 | fdepth[probefunc]--; 205 | } 206 | 207 | /* reset indent depth */ 208 | /* SOLARIS: profile:::tick-1sec */ 209 | profile:::tick-10Hz 210 | { 211 | /* 212 | * some probes generated by the pid provider have entries 213 | * but not returns. this is a kludge to fix that problem. this 214 | * also explains fdepth[probefunc] rather than a single depth. 215 | */ 216 | depth = 0; 217 | } 218 | 219 | dtrace:::END 220 | ' 221 | 222 | ### Run DTrace 223 | if [ $opt_command -eq 1 ]; then 224 | /usr/sbin/dtrace -Z -x dynvarsize=$buf -x evaltime=exec -n "$dtrace" \ 225 | -c "$command" >&2 226 | else 227 | /usr/sbin/dtrace -Z -x dynvarsize=$buf -n "$dtrace" -p "$pid" >&2 228 | fi 229 | 230 | -------------------------------------------------------------------------------- /analyzer/darwin/lib/api/process.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 3 | this work by or on behalf of the U.S. Government. 4 | NOTICE: 5 | For five (5) years from the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE. 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws. 8 | 9 | Provides the dylib injection functionality and a wrapper for process execution. 10 | The classes that extend this for different file types are found in darwin/modules/packages. 11 | """ 12 | 13 | import os 14 | import logging 15 | import random 16 | import sys 17 | import time 18 | import subprocess 19 | from shutil import copy 20 | import signal 21 | 22 | from lib.common.rand import random_string 23 | from lib.common.constants import PATHS, PIPE, SHUTDOWN_MUTEX 24 | from lib.core.config import Config #parses the analysis.conf configuration file 25 | 26 | log = logging.getLogger(__name__) 27 | 28 | #This is the all-important list of APIs to trace 29 | #There is an overhead for each one you add, so choose them carefully. Some examples are below. 30 | api_traces = ["libSystem*", #provides libc API as well as access to kernel methods and low-level system calls 31 | "CoreFoundation", #Provides primitives, data structures, etc. 32 | "Foundation", #data structure support 33 | "CoreServices", #access to things like Bonjour, Spotlight, AppleEvents, etc. 34 | "libgcc*", #gcc, obviously 35 | ] 36 | 37 | def randomize_dylib(dylib_path): 38 | """Randomize dylib name, to prevent easy detection by malware. 39 | @return: new dylib path. 40 | """ 41 | new_dylib_name = random_string(6) # generate a random name 42 | # make a path to the random name in the current working directory 43 | new_dylib_path = os.path.join(os.getcwd(), "dylib", "{0}.dylib".format(new_dylib_name)) 44 | 45 | try: 46 | # copy the dylib file to the new path in the current working directory 47 | copy(dylib_path, new_dylib_path) 48 | return new_dylib_path 49 | except: #if this fails, just return the old path 50 | return dylib_path 51 | 52 | class Process: 53 | """ OS X process """ 54 | first_process = True 55 | cuckoohooks = "cuckoohooks.dylib" 56 | startup_time = 0 57 | 58 | def __init__(self, pid=0, h_process=None, thread_id=0, h_thread=0): 59 | """@param pid: PID. 60 | @param h_process: process handle. 61 | @param thread_id: thread id. 62 | @param h_thread: thread handle. 63 | """ 64 | self.pid = pid 65 | self.h_process = h_process 66 | self.thread_id = thread_id 67 | self.h_thread = h_thread 68 | self.suspended = True #assume it is suspended to start with 69 | self.event_handle = None 70 | self.is_tracing = False 71 | 72 | def execute(self, path, args=None, suspended=False): 73 | """Execute sample process. 74 | @param path: sample path. 75 | @param args: process args. 76 | @param suspended: is suspended. 77 | @return: operation status. 78 | """ 79 | #check to make sure the file is accessible 80 | if os.access(path, os.F_OK): 81 | log.info("File exists at path \"%s\"", path) 82 | 83 | #by default you can't execute in the /tmp directory, so have to change permissions 84 | i = 0 85 | while not os.access(path, os.X_OK) and i < 2: 86 | os.chmod(path, 0755) 87 | i += 1 88 | if not os.access(path, os.X_OK): 89 | log.error("No permissions to execute file at path \"%s\", " 90 | "execution aborted", path) 91 | return False 92 | 93 | # fork a child process 94 | # Note: this could also be done with the subprocess or multiprocessing modules 95 | # but neither of them gave the independence I was looking for. 96 | try: 97 | newpid = os.fork() 98 | except OSError, e: 99 | log.error("Failed to execute process from path \"%s\" with " 100 | "arguments \"%s\" (Error: %s)", path, args, e) 101 | return False 102 | 103 | # randomize the hooking library name 104 | dylib = randomize_dylib(os.path.join("dylib", self.cuckoohooks)) 105 | 106 | if newpid == 0: #if this is the child process 107 | #set the environment variables for the syscall hook injection 108 | new_environ = os.environ 109 | new_environ['DYLD_FORCE_FLAT_NAMESPACE'] = '1' 110 | new_environ['DYLD_INSERT_LIBRARIES'] = dylib 111 | log.info("Child process with pid %d", os.getpid()) 112 | self.pid = os.getpid() 113 | 114 | Process.first_process = False 115 | # set the sid to make this child process independent of the parent 116 | os.setsid() 117 | 118 | # wait for traces to be initialized 119 | app_log = os.path.join(PATHS["logs"], "api_calls_"+str(self.pid)+".log") 120 | while not os.path.exists(app_log): 121 | time.sleep(0.3) 122 | # execute the given executable 123 | if args is None: 124 | os.execve(path, (path,), new_environ) 125 | else: 126 | os.execve(path, args, new_environ) 127 | 128 | #exit when finished 129 | os._exit(0) 130 | else: #this is in the parent process 131 | log.info("Parent process with pid %d", os.getpid()) 132 | #store the child process info 133 | self.pid = newpid 134 | self.h_process = psutil.Process(self.pid) 135 | 136 | self.start_trace() 137 | 138 | log.info("Successfully executed process from path \"%s\" with " 139 | "arguments \"%s\" with pid %d", path, args, self.pid) 140 | 141 | return True 142 | 143 | def start_trace(self): 144 | """ 145 | Once a process has been started, write the library config file 146 | and start the system call tracing. 147 | @return: None 148 | """ 149 | 150 | # write configuration file for injected library 151 | config_path = os.path.join(os.getenv("TMPDIR"), "%s.conf" % self.pid) 152 | log.info("Writing configuration file at %s.", config_path) 153 | with open(config_path, "w") as config: 154 | cfg = Config("analysis.conf") 155 | 156 | # The first time we come up with a random startup-time. 157 | if Process.first_process: 158 | # This adds 1 up to 30 times of 20 minutes to the startup 159 | # time of the process, therefore bypassing anti-vm checks 160 | # which check whether the VM has only been up for <10 minutes. 161 | Process.startup_time = random.randint(1, 30) * 20 * 60 * 1000 162 | 163 | config.write("host-ip={0}\n".format(cfg.ip)) 164 | config.write("host-port={0}\n".format(cfg.port)) 165 | config.write("pipe={0}\n".format(PIPE)) 166 | config.write("results={0}\n".format(PATHS["drop"]+"/")) 167 | config.write("analyzer={0}\n".format(os.getcwd())) 168 | config.write("first-process={0}\n".format(Process.first_process)) 169 | config.write("startup-time={0}\n".format(Process.startup_time)) 170 | config.write("shutdown-mutex={0}\n".format(SHUTDOWN_MUTEX)) 171 | 172 | Process.first_process = False 173 | 174 | # Start system call tracing 175 | # Dtruss traces system calls using Dtrace 176 | pargs = ["dtruss", "-l", "-p", str(self.pid)] 177 | truss_log = os.path.join(PATHS["logs"], "system_calls_"+str(self.pid)+".log") 178 | results = open(truss_log, "a+") 179 | try: 180 | proc2 = subprocess.Popen(pargs, stdout=results, stderr=results) 181 | log.info("Starting Dtruss on pid %d", self.pid) 182 | except (OSError, ValueError): 183 | log.exception("Failed to start system call monitor.") 184 | results.close() 185 | 186 | # Wait for initialization lines to appear in log files 187 | while os.path.getsize(truss_log) == 0: 188 | time.sleep(0.5) 189 | 190 | # Dapptrace traces API calls using Dtrace. I used my own version modified for performance 191 | # NOTE: This slows down the program A LOT if you use the -U option (tracks all libraries) instead of -u 192 | os.chmod("lib/api/apitrace", 0755) 193 | pargs = ["lib/api/apitrace", "-u", ",".join(api_traces), "-p", str(self.pid)] 194 | app_log = os.path.join(PATHS["logs"], "api_calls_"+str(self.pid)+".log") 195 | results2 = open(app_log, "a+") 196 | try: 197 | proc1 = subprocess.Popen(pargs, stdout=results2, stderr=results2) 198 | log.info("Starting apitrace on pid %d", self.pid) 199 | except (OSError, ValueError): 200 | log.exception("Failed to start api call monitor.") 201 | results2.close() 202 | 203 | # wait for initialization lines to appear in log files 204 | while os.path.getsize(app_log) == 0: 205 | time.sleep(0.5) 206 | 207 | 208 | self.is_tracing = True 209 | self.resume() 210 | 211 | def is_alive(self): 212 | """Process is alive? 213 | @return: process status. 214 | """ 215 | exists = True 216 | if not self.h_process: 217 | exists = self.open() 218 | 219 | if not exists: #program has already exited 220 | return False 221 | 222 | #make sure the process is both in the table and not a zombie (ie, terminated) 223 | return self.h_process.is_running() and not (self.h_process.status() == psutil.STATUS_ZOMBIE) 224 | 225 | def get_filepath(self): 226 | """Get process image file path. 227 | @return: decoded file path. 228 | """ 229 | if not self.h_process: 230 | self.open() 231 | 232 | return self.h_process.name() 233 | 234 | 235 | def exit_code(self): 236 | """Get process exit code. 237 | @return: exit code value. 238 | """ 239 | if not self.h_process: 240 | self.open() 241 | 242 | return os.waitpid(self.pid) 243 | 244 | def open(self): 245 | """Open a process and/or thread. 246 | @return: operation status. 247 | """ 248 | ret = bool(self.pid or self.thread_id) 249 | if self.pid and not self.h_process: 250 | try: 251 | self.h_process = psutil.Process(self.pid) 252 | ret = True 253 | except: #unable to get process 254 | ret = False 255 | 256 | return ret 257 | 258 | def get_parent_pid(self): 259 | """Get the Parent Process ID.""" 260 | if not self.h_process: 261 | self.open() 262 | 263 | return self.h_process.ppid() 264 | 265 | def terminate(self): 266 | """Terminate process. 267 | @return: operation status. 268 | """ 269 | if self.h_process == 0: 270 | self.open() 271 | 272 | pargs = ["kill", str(self.pid)] 273 | count = 0 #sometimes this requires multiple tries 274 | while self.h_process.status() == psutil.STATUS_RUNNING: 275 | # Note: both self.h_process.terminate() and os.kill were unreliable for termination 276 | log.info("Attempting to kill process " + str(self.pid) + ", attempt " + str(count)) 277 | proc = subprocess.Popen(pargs) 278 | count +=1 279 | if count > 5: 280 | break 281 | if self.h_process.status() == psutil.STATUS_ZOMBIE or self.h_process.status() == psutil.STATUS_DEAD: 282 | log.info("Successfully terminated process with pid %d", self.pid) 283 | return True 284 | else: 285 | log.error("Failed to terminate process with pid %d", self.pid) 286 | return False 287 | 288 | def resume(self): 289 | """Resume a suspended thread. 290 | @return: operation status. 291 | """ 292 | if not self.suspended: 293 | log.warning("The process with pid %d was not suspended, so it was not resumed" 294 | % self.pid) 295 | return False 296 | 297 | if self.is_tracing: # only resume when Dtrace probes are in place 298 | pargs = ["kill", "-SIGCONT", str(self.pid)] 299 | count = 0 #sometimes this requires multiple tries 300 | while self.h_process.status() == psutil.STATUS_STOPPED: 301 | # Note: both self.h_process.resume() and os.kill were unreliable for resuming 302 | log.info("Attempting to resume process " + str(self.pid) + ", attempt " + str(count)) 303 | proc = subprocess.Popen(pargs) 304 | count +=1 305 | if count > 5: 306 | break 307 | if self.h_process.status() == psutil.STATUS_RUNNING: 308 | log.info("Successfully resumed process with pid %d", self.pid) 309 | return True 310 | else: 311 | log.error("Failed to resume process with pid %d", self.pid) 312 | return False 313 | 314 | if __name__ == '__main__': 315 | print "Why would you do that?" -------------------------------------------------------------------------------- /analyzer/darwin/lib/api/screenshot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 3 | this work by or on behalf of the U.S. Government. 4 | NOTICE: 5 | For five (5) years from the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE. 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws. 8 | """ 9 | 10 | import math 11 | import sys 12 | import logging 13 | import Quartz.CoreGraphics as CG 14 | from AppKit import * 15 | from Quartz import NSURL, CGImageDestinationCreateWithURL, CGImageDestinationAddImage, CGImageDestinationFinalize 16 | from LaunchServices import kUTTypePNG 17 | import os 18 | import tempfile 19 | 20 | sys.path.append('../common') 21 | 22 | try: 23 | import ImageChops 24 | import Image 25 | import ImageOps 26 | HAVE_PIL = True 27 | except: 28 | HAVE_PIL = False 29 | 30 | log = logging.getLogger(__name__) 31 | 32 | class Screenshot: 33 | """Get screenshots.""" 34 | 35 | def have_pil(self): 36 | """Is Python Image Library installed? 37 | @return: installed status. 38 | """ 39 | return HAVE_PIL 40 | 41 | def equal(self, img1, img2): 42 | """Compares two screenshots using Root-Mean-Square Difference (RMS). 43 | @param img1: screenshot to compare. 44 | @param img2: screenshot to compare. 45 | @return: equal status. 46 | """ 47 | if not HAVE_PIL: 48 | return None 49 | 50 | # To get a measure of how similar two images are, we use 51 | # root-mean-square (RMS). If the images are exactly identical, 52 | # this value is zero. 53 | diff = ImageChops.difference(img1, img2) 54 | h = diff.histogram() 55 | sq = (value * ((idx % 256)**2) for idx, value in enumerate(h)) 56 | sum_of_squares = sum(sq) 57 | rms = math.sqrt(sum_of_squares/float(img1.size[0] * img1.size[1])) 58 | 59 | # Might need to tweak the threshold. I have set it sensitive enough so that it should 60 | # detect installer changes but not so sensitive that it triggers every second with Activity Monitor open 61 | return rms < 50 62 | 63 | def take(self): 64 | """Take a screenshot. 65 | Unfortunately, PIL's screenshot functions are Windows-only. 66 | Most OS X solutions in Python involve loading an entire GUI library, like gtk or wxPython, 67 | or using the built-in screencapture program and then reading the screenshot from disk. 68 | In the interest of speed, I have tried to avoid those things by using PyObjC. 69 | @return: screenshot or None. 70 | """ 71 | 72 | #get the size of the screen 73 | rect = NSScreen.mainScreen().frame() 74 | size = (int(rect.size.width), int(rect.size.height)) 75 | 76 | try: 77 | image = CG.CGWindowListCreateImage(rect, CG.kCGWindowListOptionOnScreenOnly, CG.kCGNullWindowID, CG.kCGWindowImageDefault) 78 | except: 79 | log.exception("Unable to take screenshot.") 80 | 81 | # write the image to a temporary file in memory to save I/O performance 82 | f = tempfile.NamedTemporaryFile(mode='rwb+', delete=False) 83 | url = NSURL.fileURLWithPath_(f.name) 84 | destination = CGImageDestinationCreateWithURL(url, kUTTypePNG, 1, None) 85 | CGImageDestinationAddImage(destination, image, None) 86 | CGImageDestinationFinalize(destination) 87 | f.flush() 88 | f.seek(0) 89 | f.close() 90 | 91 | # Convert CGImage to PIL image for comparisons 92 | pil_image = Image.open(f.name) 93 | 94 | os.unlink(f.name) 95 | 96 | return pil_image 97 | 98 | def image_fix(self, image): 99 | #invert the color 100 | inverted_image = image 101 | """ 102 | if image.mode == 'RGBA': 103 | r, g, b, a = image.split() 104 | rgb_image = Image.merge('RGB', (r,g,b)) 105 | inverted_image = ImageOps.invert(rgb_image) 106 | r2, g2, b2 = inverted_image.split() 107 | inverted_image = Image.merge('RGBA', (r2,g2,b2,a)) 108 | 109 | else: #this nice simple method only works with RGB 110 | inverted_image = ImageOps.invert(image) 111 | """ 112 | 113 | # Rotate and flip horizontally to the correct orientation 114 | return inverted_image.rotate(180).transpose(Image.FLIP_LEFT_RIGHT) -------------------------------------------------------------------------------- /analyzer/darwin/lib/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/lib/common/__init__.py -------------------------------------------------------------------------------- /analyzer/darwin/lib/common/abstracts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Extremely bare-bones copy of the Cuckoo Package-Process structure. 3 | I have converted what I could for OS X and scrapped the rest. 4 | """ 5 | 6 | import os 7 | import os.path 8 | import plistlib 9 | import logging 10 | 11 | from lib.api.process import Process 12 | from lib.common.exceptions import CuckooPackageError 13 | 14 | #get logger 15 | log = logging.getLogger() 16 | 17 | class Package(object): 18 | """Base abstact analysis package.""" 19 | PATHS = [] 20 | 21 | def __init__(self, options={}): 22 | """@param options: options dict.""" 23 | self.options = options 24 | self.pids = [] 25 | 26 | def set_pids(self, pids): 27 | """Update list of monitored PIDs in the package context. 28 | @param pids: list of pids. 29 | """ 30 | self.pids = pids 31 | 32 | def start(self): 33 | """Run analysis packege. 34 | @param path: sample path. 35 | @raise NotImplementedError: this method is abstract. 36 | """ 37 | raise NotImplementedError 38 | 39 | def check(self): 40 | """Check.""" 41 | return True 42 | 43 | def _enum_paths(self): 44 | raise NotImplementedError 45 | 46 | def get_path(self, application): 47 | raise NotImplementedError 48 | 49 | def execute(self, path, args): 50 | 51 | p = Process() 52 | if not p.execute(path=path, args=args, suspended=True): 53 | raise CuckooPackageError("Unable to execute the initial process, " 54 | "analysis aborted.") 55 | return p.pid 56 | 57 | def finish(self): 58 | """Finish run. 59 | If specified to do so, this method dumps the memory of 60 | all running processes. 61 | """ 62 | return True 63 | 64 | def getAppFilePath(self, file_path): 65 | # the reason we don't just do "open file" is because OS X has restrictions on open that make it hard to trace 66 | # and you can't get the started process' pid from it 67 | open = "/usr/bin/open" 68 | 69 | #find Info.plist 70 | plist = "" 71 | path = self.findFile(file_path, "Info.plist") 72 | 73 | if path == "": #no Info.plist found, this is an illegally structured app 74 | log.info("No Info.plist found within .app file") 75 | return open 76 | else: 77 | plist = plistlib.readPlist(path) 78 | 79 | try: 80 | #get the name of the main executable of this app 81 | exec_file = plist["CFBundleExecutable"] 82 | except KeyError: #no executable was listed, this is an illegally structured app 83 | log.info("No main executable name found in Info.plist") 84 | return open 85 | 86 | #get the full path of the executable 87 | return self.findFile(file_path, exec_file) 88 | 89 | def findFile(self, path, name): 90 | result = "" 91 | for root, dirs, files in os.walk(path): 92 | for f in files: 93 | if name in f: 94 | return os.path.join(root, f) 95 | return result 96 | 97 | def findDir(self, path, name): 98 | result = "" 99 | for root, dirs, files in os.walk(path): 100 | for d in dirs: 101 | if name in d: 102 | return os.path.join(root, d) 103 | return result 104 | 105 | 106 | class Auxiliary(object): 107 | pass 108 | 109 | 110 | -------------------------------------------------------------------------------- /analyzer/darwin/lib/common/constants.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | from lib.common.rand import random_string 5 | 6 | 7 | ROOT = os.path.join(os.getenv("HOME"), random_string(6, 10)) 8 | 9 | PATHS = {"root" : ROOT, 10 | "logs" : os.path.join(ROOT, "logs"), 11 | "files" : os.path.join(ROOT, "files"), 12 | "shots" : os.path.join(ROOT, "shots"), 13 | "memory" : os.path.join(ROOT, "memory"), 14 | "drop" : os.path.join(ROOT, "drop")} 15 | 16 | PIPE = os.path.join(os.getenv("TMPDIR"), random_string(6, 10)) 17 | SHUTDOWN_MUTEX = "Global/" + random_string(6, 10) 18 | -------------------------------------------------------------------------------- /analyzer/darwin/lib/common/exceptions.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2010-2014 Cuckoo Foundation. 2 | # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 3 | # See the file 'docs/LICENSE' for copying permission. 4 | # Note: This is unmodified from the Cuckoo Windows version 5 | 6 | class CuckooError(Exception): 7 | pass 8 | 9 | 10 | class CuckooPackageError(Exception): 11 | pass 12 | -------------------------------------------------------------------------------- /analyzer/darwin/lib/common/hashing.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | BUFSIZE = 1024*1024 4 | 5 | 6 | def hash_file(method, path): 7 | """Calculates an hash on a file by path. 8 | @param method: callable hashing method 9 | @param path: file path 10 | @return: computed hash string 11 | """ 12 | f = open(path, "rb") 13 | h = method() 14 | while True: 15 | buf = f.read(BUFSIZE) 16 | if not buf: 17 | break 18 | h.update(buf) 19 | return h.hexdigest() 20 | -------------------------------------------------------------------------------- /analyzer/darwin/lib/common/rand.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | 4 | def random_string(minimum, maximum=None): 5 | if maximum is None: 6 | maximum = minimum 7 | 8 | count = random.randint(minimum, maximum) 9 | return "".join(random.choice(string.ascii_letters) for x in xrange(count)) 10 | 11 | def random_integer(digits): 12 | start = 10 ** (digits - 1) 13 | end = (10 ** digits) - 1 14 | return random.randint(start, end) 15 | -------------------------------------------------------------------------------- /analyzer/darwin/lib/common/results.py: -------------------------------------------------------------------------------- 1 | """ 2 | Required only minimal changes to work with OS X 3 | """ 4 | 5 | import logging 6 | import socket 7 | 8 | from lib.core.config import Config 9 | 10 | log = logging.getLogger(__name__) 11 | 12 | BUFSIZE = 1024*1024 13 | 14 | def upload_to_host(file_path, dump_path): 15 | nc = infd = None 16 | try: 17 | nc = NetlogFile(dump_path) 18 | 19 | infd = open(file_path, "rb") 20 | buf = infd.read(BUFSIZE) 21 | while buf: 22 | nc.send(buf) 23 | buf = infd.read(BUFSIZE) 24 | except Exception as e: 25 | log.error("Exception uploading file to host: %s", e) 26 | finally: 27 | if infd: 28 | infd.close() 29 | if nc: 30 | nc.close() 31 | 32 | class NetlogConnection(object): 33 | def __init__(self, proto=""): 34 | config = Config(cfg="analysis.conf") 35 | self.hostip, self.hostport = config.ip, config.port 36 | self.sock, self.file = None, None 37 | self.proto = proto 38 | 39 | def connect(self): 40 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 41 | try: 42 | s.connect((self.hostip, self.hostport)) 43 | s.sendall(self.proto) 44 | except: 45 | pass 46 | else: 47 | self.sock = s 48 | self.file = s.makefile() 49 | 50 | def send(self, data, retry=True): 51 | try: 52 | self.sock.sendall(data) 53 | except socket.error: 54 | self.connect() 55 | if retry: 56 | self.send(data, retry=False) 57 | except: 58 | # We really have nowhere to log this, if the netlog connection 59 | # does not work, we can assume that any logging won't work either. 60 | # So we just fail silently. 61 | self.close() 62 | 63 | def close(self): 64 | try: 65 | self.file.close() 66 | self.sock.close() 67 | except Exception: 68 | pass 69 | 70 | class NetlogFile(NetlogConnection): 71 | def __init__(self, filepath): 72 | self.filepath = filepath 73 | NetlogConnection.__init__(self, proto="FILE\n{0}\n".format(self.filepath)) 74 | self.connect() 75 | 76 | class NetlogHandler(logging.Handler, NetlogConnection): 77 | def __init__(self): 78 | logging.Handler.__init__(self) 79 | NetlogConnection.__init__(self, proto="LOG\n") 80 | self.connect() 81 | 82 | def emit(self, record): 83 | msg = self.format(record) 84 | self.send("{0}\n".format(msg)) 85 | -------------------------------------------------------------------------------- /analyzer/darwin/lib/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/lib/core/__init__.py -------------------------------------------------------------------------------- /analyzer/darwin/lib/core/config.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2010-2014 Cuckoo Foundation. 2 | # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 3 | # See the file 'docs/LICENSE' for copying permission. 4 | # Note: This is unmodified from the Cuckoo Windows version 5 | 6 | import ConfigParser 7 | 8 | class Config: 9 | def __init__(self, cfg): 10 | """@param cfg: configuration file.""" 11 | config = ConfigParser.ConfigParser(allow_no_value=True) 12 | config.read(cfg) 13 | 14 | for section in config.sections(): 15 | for name, raw_value in config.items(section): 16 | if name == "file_name": 17 | value = config.get(section, name) 18 | else: 19 | try: 20 | value = config.getboolean(section, name) 21 | except ValueError: 22 | try: 23 | value = config.getint(section, name) 24 | except ValueError: 25 | value = config.get(section, name) 26 | setattr(self, name, value) 27 | -------------------------------------------------------------------------------- /analyzer/darwin/lib/core/packages.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 3 | this work by or on behalf of the U.S. Government. 4 | NOTICE: 5 | For five (5) years from the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE. 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws. 8 | """ 9 | 10 | def choose_package(file_type, file_name): 11 | """Choose analysis package due to file type and file extension. 12 | @param file_type: file type. 13 | @return: package or None. 14 | """ 15 | if not file_type: 16 | return None 17 | 18 | file_name = file_name.lower() 19 | 20 | if "Mach-O" in file_type: 21 | return "macho" 22 | elif "PDF" in file_type or file_name.endswith(".pdf"): 23 | return "pdf" 24 | elif "Microsoft Word" in file_type or \ 25 | "Microsoft Office Word" in file_type or \ 26 | file_name.endswith(".docx") or \ 27 | file_name.endswith(".doc"): 28 | return "doc" 29 | elif "Rich Text Format" in file_type or file_name.endswith(".rtf") \ 30 | or "property list" in file_type or file_name.endswith(".plist"): 31 | return "rtf" 32 | elif "HTML" in file_type or file_name.endswith(".htm") or file_name.endswith(".html"): 33 | return "html" 34 | elif file_name.endswith(".jar"): 35 | return "jar" 36 | elif "Zip" in file_type or file_name.endswith(".zip"): 37 | return "zip" 38 | elif file_name.endswith(".py") or "Python script" in file_type: 39 | return "python" 40 | else: 41 | return "generic" 42 | -------------------------------------------------------------------------------- /analyzer/darwin/lib/core/startup.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | import logging 5 | 6 | from lib.common.constants import PATHS 7 | from lib.common.results import NetlogHandler 8 | 9 | log = logging.getLogger() 10 | 11 | def create_folders(): 12 | """Create folders in PATHS.""" 13 | for name, folder in PATHS.items(): 14 | if os.path.exists(folder): 15 | continue 16 | 17 | try: 18 | os.makedirs(folder) 19 | except OSError: 20 | log.error("Unable to create folder %s", folder) 21 | 22 | def init_logging(): 23 | """Initialize logger.""" 24 | formatter = logging.Formatter("%(asctime)s [%(name)s] %(levelname)s: %(message)s") 25 | sh = logging.StreamHandler() 26 | sh.setFormatter(formatter) 27 | log.addHandler(sh) 28 | 29 | nh = NetlogHandler() 30 | nh.setFormatter(formatter) 31 | log.addHandler(nh) 32 | 33 | log.setLevel(logging.DEBUG) 34 | 35 | #create the analyzer log file 36 | f = open(os.path.join(PATHS["logs"], 'analyzer.log'), 'w') 37 | f.write("") 38 | f.close() 39 | #use the file for logging 40 | hdlr = logging.FileHandler(os.path.join(PATHS["logs"], 'analyzer.log')) 41 | formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') 42 | hdlr.setFormatter(formatter) 43 | log.addHandler(hdlr) 44 | -------------------------------------------------------------------------------- /analyzer/darwin/modules/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /analyzer/darwin/modules/auxiliary/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/modules/auxiliary/__init__.py -------------------------------------------------------------------------------- /analyzer/darwin/modules/auxiliary/human.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 3 | this work by or on behalf of the U.S. Government. 4 | NOTICE: 5 | For five (5) years from the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE. 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws. 8 | """ 9 | 10 | import random 11 | import logging 12 | import time 13 | from threading import Thread 14 | import Quartz.CoreGraphics as CG 15 | from AppKit import * 16 | 17 | from lib.common.abstracts import Auxiliary 18 | 19 | log = logging.getLogger(__name__) 20 | 21 | class Human(Auxiliary, Thread): 22 | """Human after all""" 23 | 24 | def __init__(self): 25 | Thread.__init__(self) 26 | self.do_run = True 27 | #get the size of the screen 28 | rect = NSScreen.mainScreen().frame() 29 | self.width = int(rect.size.width) 30 | self.height = int(rect.size.height) 31 | 32 | def stop(self): 33 | self.do_run = False 34 | 35 | def run(self): 36 | while self.do_run: 37 | self.move_mouse() 38 | self.click_mouse() 39 | #sleep for one second 40 | time.sleep(1) 41 | 42 | """ 43 | Move the mouse to a random place on screen. 44 | This moves at computer-speed, so any malware checking for movement speed will be alerted. 45 | """ 46 | def move_mouse(self): 47 | x = random.randint(0, self.width) 48 | y = random.randint(0, self.height) 49 | #create the event 50 | move = CG.CGEventCreateMouseEvent(None, CG.kCGEventMouseMoved, CG.CGPointMake(x, y), CG.kCGMouseButtonLeft) 51 | #send the event 52 | CG.CGEventPost(CG.kCGHIDEventTap, move) 53 | 54 | def click_mouse(self): 55 | point = CG.CGPointMake(self.width/2, 250) 56 | # Move mouse to top-middle position. 57 | move = CG.CGEventCreateMouseEvent(None, CG.kCGEventMouseMoved, point, CG.kCGMouseButtonLeft) 58 | # Mouse down. 59 | down = CG.CGEventCreateMouseEvent(NULL, CG.kCGEventLeftMouseDown, point, CG.kCGMouseButtonLeft) 60 | # Mouse up. 61 | up = CG.CGEventCreateMouseEvent(NULL, CG.kCGEventLeftMouseUp, point, CG.kCGMouseButtonLeft) 62 | 63 | #send the events 64 | CG.CGEventPost(CG.kCGHIDEventTap, move) 65 | CG.CGEventPost(CG.kCGHIDEventTap, down) 66 | time.sleep(0.05) 67 | CG.CGEventPost(CG.kCGHIDEventTap, up) 68 | -------------------------------------------------------------------------------- /analyzer/darwin/modules/auxiliary/screenshots.py: -------------------------------------------------------------------------------- 1 | """ 2 | “Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 3 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 4 | this work by or on behalf of the U.S. Government. 5 | NOTICE: 6 | For five (5) years from the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE. 7 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. 8 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws. 9 | """ 10 | 11 | import time 12 | import logging 13 | import StringIO 14 | from threading import Thread 15 | import Quartz.CoreGraphics as CG 16 | 17 | from lib.common.abstracts import Auxiliary 18 | from lib.common.results import NetlogFile 19 | from lib.api.screenshot import Screenshot 20 | 21 | log = logging.getLogger(__name__) 22 | SHOT_DELAY = 1 23 | 24 | class Screenshots(Auxiliary, Thread): 25 | """Take screenshots.""" 26 | 27 | def __init__(self): 28 | Thread.__init__(self) 29 | self.do_run = True 30 | 31 | def stop(self): 32 | """Stop screenshotting.""" 33 | self.do_run = False 34 | 35 | def run(self): 36 | """Run screenshotting. 37 | @return: operation status. 38 | """ 39 | if not Screenshot().have_pil(): 40 | log.warning("Python Image Library is not installed, " 41 | "screenshots are disabled") 42 | return False 43 | 44 | img_counter = 0 45 | img_last = None 46 | 47 | while self.do_run: 48 | time.sleep(SHOT_DELAY) 49 | try: 50 | img_current = Screenshot().take() 51 | except IOError as e: 52 | log.error("Cannot take screenshot: %s", e) 53 | continue 54 | 55 | if img_last: 56 | if Screenshot().equal(img_last, img_current): 57 | continue 58 | img_counter += 1 59 | 60 | #send a return keystroke for installers 61 | self.sendKey(0x24) 62 | 63 | try: 64 | # workaround as PIL can't write to the socket file object :( 65 | tmpio = StringIO.StringIO() 66 | img_current.save(tmpio, format="PNG") 67 | tmpio.seek(0) 68 | except: 69 | log.exception("Unable to write screenshot to disk.") 70 | 71 | # now upload to host from the StringIO 72 | nf = NetlogFile("shots/%s.png" % str(img_counter).rjust(4, "0")) 73 | 74 | for chunk in tmpio: 75 | nf.sock.sendall(chunk) 76 | 77 | nf.close() 78 | 79 | img_last = img_current 80 | 81 | return True 82 | 83 | """ 84 | Send a keyboard event to the system at large using the Quartz Event Service 85 | https://developer.apple.com/library/mac/documentation/Carbon/Reference/QuartzEventServicesRef/Reference/reference.html 86 | 0x24 is Return/Enter (more keys at http://webnnel.googlecode.com/svn/trunk/lib/Carbon.framework/Versions/A/Frameworks/HIToolbox.framework/Versions/A/Headers/Events.h) 87 | Modifiers: (defined in http://www.opensource.apple.com/source/IOHIDFamily/IOHIDFamily-308/IOHIDSystem/IOKit/hidsystem/IOLLEvent.h) 88 | NX_ALPHASHIFTMASK 0x00010000 89 | NX_SHIFTMASK 0x00020000 90 | NX_CONTROLMASK 0x00040000 91 | NX_ALTERNATEMASK 0x00080000 92 | NX_COMMANDMASK 0x00100000 93 | NX_NUMERICPADMASK 0x00200000 94 | NX_HELPMASK 0x00400000 95 | NX_SECONDARYFNMASK 0x00800000 96 | """ 97 | def sendKey(self, key, modifiers=0x0): 98 | 99 | source = CG.CGEventSourceCreate(CG.kCGEventSourceStateCombinedSessionState) 100 | 101 | keyDown = CG.CGEventCreateKeyboardEvent(source, key, True) 102 | CG.CGEventSetFlags(keyDown, modifiers) 103 | keyUp = CG.CGEventCreateKeyboardEvent(source, key, False) 104 | 105 | CG.CGEventPost(CG.kCGAnnotatedSessionEventTap, keyDown) 106 | CG.CGEventPost(CG.kCGAnnotatedSessionEventTap, keyUp) 107 | 108 | #Apparently these lines are not needed on newer versions of PyObjC and cause a segfault 109 | #CG.CFRelease(keyUp) 110 | #CG.CFRelease(keyDown) 111 | #CG.CFRelease(source) 112 | 113 | -------------------------------------------------------------------------------- /analyzer/darwin/modules/packages/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /analyzer/darwin/modules/packages/doc.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from lib.common.abstracts import Package 4 | import os 5 | import os.path 6 | import plistlib 7 | import logging 8 | import subprocess 9 | 10 | from AppKit import * 11 | import Foundation 12 | from PyObjCTools import AppHelper 13 | 14 | #get logger 15 | log = logging.getLogger() 16 | 17 | class OpenDoc(NSObject): 18 | 19 | def setPath(self, p, test, app): 20 | self.path = p 21 | self.loop = test 22 | self.app = app 23 | 24 | def run_(self, event): 25 | if "Word" in str(event.userInfo()["NSApplicationName"]): 26 | #open the file in question 27 | subprocess.call(["/usr/bin/open", "-a", self.app, self.path]) 28 | #remove the event observer 29 | ws = NSWorkspace.sharedWorkspace() 30 | nc = ws.notificationCenter() 31 | nc.removeObserver_name_object_(self, NSWorkspaceDidLaunchApplicationNotification, None) 32 | self.loop.stop() 33 | 34 | class Doc(Package): 35 | """Word analysis package. 36 | Note that this tends to be a bit picky - if the document is too old for the version of Word 37 | on the VM, it may not open properly""" 38 | 39 | def stop(self): 40 | self.is_open = True 41 | 42 | def start(self, path): 43 | (word, app) = self.get_path("Microsoft Office") 44 | 45 | # There is no nice programmatic way to open a file in Word on OS X (other than open -a) 46 | # There is on Windows, not here. No command line arguments to work with at all 47 | #create the observer watching for application launch events 48 | ws = NSWorkspace.sharedWorkspace() 49 | nc = ws.notificationCenter() 50 | op = OpenDoc.new() 51 | op.setPath(path, self, app) 52 | nc.addObserver_selector_name_object_(op, 'run:', NSWorkspaceDidLaunchApplicationNotification, None) 53 | #start the Preview process 54 | pid = self.execute(word, (word,)) 55 | 56 | #Wait until the process is open 57 | self.is_open = False 58 | runLoop = NSRunLoop.currentRunLoop() 59 | date = NSDate.dateWithTimeIntervalSinceNow_(1.0) 60 | while not self.is_open: 61 | date = NSDate.dateWithTimeIntervalSinceNow_(1.0) 62 | NSRunLoop.runUntilDate_(runLoop, date) 63 | 64 | #return the pid of Word 65 | return pid 66 | 67 | #Old, less reliable bash method 68 | #args = "\"" + word + "\" & sleep 5 && echo \'tell application \""+app+"\" to open \""+path+"\"\' | /usr/bin/osascript" 69 | #return self.execute(bash, (bash, "-c", "%s" % args)) 70 | 71 | def get_path(self, name): 72 | #attempt to find Microsoft Word 73 | word_dir = "" 74 | dir = self.findDir("/Applications/", name) 75 | word_dir = os.path.join(dir, "Microsoft Word.app") 76 | 77 | if word_dir != "": 78 | return (self.getAppFilePath(word_dir), "Microsoft Word") 79 | 80 | #If we can't find Word, try to find Pages 81 | word_dir = self.findDir("/Applications/", "Pages") 82 | 83 | if word_dir != "": 84 | return (self.getAppFilePath(word_dir), "Pages") 85 | 86 | #If no Pages, open the darn thing in TextEdit 87 | return (self.getAppFilePath("/Applications/TextEdit.app/"), "TextEdit") 88 | 89 | def getAppFilePath(self, file_path): 90 | # the reason we don't just do "open file.app" is because OS X has restrictions on open that make it hard to trace 91 | # and you can't get the started process' pid from it 92 | open = "/usr/bin/open" 93 | 94 | #find Info.plist 95 | plist = "" 96 | path = self.findFile(file_path, "Info.plist") 97 | 98 | if path == "": #no Info.plist found, this is an illegally structured app 99 | log.info("No Info.plist found within .app file") 100 | return open 101 | else: 102 | plist = plistlib.readPlist(path) 103 | 104 | try: 105 | #get the name of the main executable of this app 106 | exec_file = plist["CFBundleExecutable"] 107 | except KeyError: #no executable was listed, this is an illegally structured app 108 | log.info("No main executable name found in Info.plist") 109 | return open 110 | 111 | #get the full path of the executable 112 | return self.findFile(file_path, exec_file) 113 | 114 | def findDir(self, path, name): 115 | result = "" 116 | for root, dirs, files in os.walk(path): 117 | for d in dirs: 118 | if name in d: 119 | return os.path.join(root, d) 120 | return result 121 | 122 | def findFile(self, path, name): 123 | result = "" 124 | for root, dirs, files in os.walk(path): 125 | for f in files: 126 | if name in f: 127 | return os.path.join(root, f) 128 | return result 129 | 130 | -------------------------------------------------------------------------------- /analyzer/darwin/modules/packages/generic.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the package of last resort - Apple's "open" utility is very good 3 | at determining what to use to open a file. Unfortunately, we cnanot get the pid 4 | of the new process from ope, which means the sandbox will probably prematurely quit once 5 | the open process is finished. 6 | """ 7 | 8 | from lib.common.abstracts import Package 9 | 10 | class Generic(Package): 11 | """Generic analysis package.""" 12 | 13 | def start(self, path): 14 | open = "/usr/bin/open" 15 | 16 | return self.execute(open, (open, path)) 17 | -------------------------------------------------------------------------------- /analyzer/darwin/modules/packages/html.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import shutil 4 | import logging 5 | import os 6 | import os.path 7 | import plistlib 8 | 9 | from lib.common.abstracts import Package 10 | 11 | log = logging.getLogger(__name__) 12 | 13 | class HTML(Package): 14 | """HTML file analysis package.""" 15 | 16 | def start(self, path): 17 | safari = self.getAppFilePath("/Applications/Safari.app") 18 | 19 | # Travelling inside malware universe you should bring a towel with you. 20 | # If a file detected as HTML is submitted without a proper extension, 21 | # or without an extension at all (are you used to name samples with hash?), 22 | # it might be opened as a text file, so your precious sample will not 23 | # be executed. 24 | # We help you sample to execute renaming it with a proper extension. 25 | if not path.endswith((".htm", ".html")): 26 | shutil.copy(path, path + ".html") 27 | path += ".html" 28 | log.info("Submitted file is missing extension, adding .html") 29 | 30 | return self.execute(safari, (safari, path)) 31 | 32 | 33 | -------------------------------------------------------------------------------- /analyzer/darwin/modules/packages/jar.py: -------------------------------------------------------------------------------- 1 | 2 | import logging 3 | import os.path 4 | 5 | from lib.common.abstracts import Package 6 | 7 | #get logger 8 | log = logging.getLogger() 9 | 10 | class Jar(Package): 11 | """Java analysis package.""" 12 | 13 | def start(self, path): 14 | #this is the standard path, actually a symlink to the read Java files 15 | java = "/usr/bin/java" 16 | #make sure there is Java on this system 17 | if not os.path.isfile(java): 18 | log.error("Cannot run jar file: No Java installed on the guest system!") 19 | return None 20 | 21 | class_path = self.options.get("class") 22 | 23 | if class_path: 24 | return self.execute(java, (java, "-cp", path, class_path)) 25 | #args = "-cp \"%s\" %s" % (path, class_path) 26 | else: 27 | return self.execute(java, (java, "-jar", path)) 28 | #args = "-jar \"%s\"" % path 29 | -------------------------------------------------------------------------------- /analyzer/darwin/modules/packages/macho.py: -------------------------------------------------------------------------------- 1 | """ 2 | This runs Mach-O executables and FAT files, provided they are the correct architecture for the VM in use. 3 | """ 4 | 5 | from lib.common.abstracts import Package 6 | 7 | class MachO(Package): 8 | 9 | def start(self, path): 10 | args = self.options.get("arguments") 11 | if args: 12 | return self.execute(path, args) 13 | return self.execute(path, (path,)) 14 | -------------------------------------------------------------------------------- /analyzer/darwin/modules/packages/pdf.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | import os.path 5 | import plistlib 6 | import subprocess 7 | import logging 8 | import time 9 | 10 | from AppKit import * 11 | import Foundation 12 | from PyObjCTools import AppHelper 13 | 14 | from lib.common.abstracts import Package 15 | 16 | #get logger 17 | log = logging.getLogger() 18 | 19 | class OpenPDF(NSObject): 20 | 21 | def setPath(self, p, test): 22 | self.path = p 23 | self.loop = test 24 | 25 | def run_(self, event): 26 | if str(event.userInfo()["NSApplicationName"]) == "Preview": 27 | #open the file in question 28 | subprocess.call(["/usr/bin/open", "-a", "Preview", self.path]) 29 | #remove the event observer 30 | ws = NSWorkspace.sharedWorkspace() 31 | nc = ws.notificationCenter() 32 | nc.removeObserver_name_object_(self, NSWorkspaceDidLaunchApplicationNotification, None) 33 | self.loop.stop() 34 | 35 | 36 | class PDF(Package): 37 | """PDF analysis package.""" 38 | 39 | def stop(self): 40 | self.is_open = True 41 | 42 | def start(self, path): 43 | #Use Preview, the default PDF application 44 | app = self.getAppFilePath("/Applications/Preview.app") 45 | 46 | #A lot of downloaded PDFs will contain a "quarantine" attribute until opened for the first time 47 | # This will cause a permission issue unless we remove it 48 | # But there are still other permission issues with downloaded files, apparently 49 | os.chmod(path, 0o777) 50 | result = subprocess.call(["xattr", "-c", path]) 51 | 52 | # The remaining problem is that we can't fight Apple's sandbox - it will refuse to 53 | # open the document in a nice scripted fashion unless it has already been opened 54 | # once by a signed piece of software or the user 55 | # In this case, 'open' is the signed software of choice 56 | 57 | #create the observer watching for application launch events 58 | ws = NSWorkspace.sharedWorkspace() 59 | nc = ws.notificationCenter() 60 | op = OpenPDF.new() 61 | op.setPath(path, self) 62 | nc.addObserver_selector_name_object_(op, 'run:', NSWorkspaceDidLaunchApplicationNotification, None) 63 | #start the Preview process 64 | pid = self.execute(app, (app,)) 65 | 66 | #Wait until the process is open 67 | self.is_open = False 68 | runLoop = NSRunLoop.currentRunLoop() 69 | date = NSDate.dateWithTimeIntervalSinceNow_(1.0) 70 | while not self.is_open: 71 | date = NSDate.dateWithTimeIntervalSinceNow_(1.0) 72 | NSRunLoop.runUntilDate_(runLoop, date) 73 | 74 | #return the pid of Preview 75 | return pid 76 | 77 | #This is the simple method you could do if Preview weren't sandboxed 78 | #return self.execute(app, (app, path)) 79 | 80 | #Another alternate, unreliable method using bash and sleep 81 | #args = "\"" + app + "\" & sleep 5 && open -a Preview "+path 82 | #return self.execute(bash, (bash, "-c", "%s" % args)) 83 | 84 | -------------------------------------------------------------------------------- /analyzer/darwin/modules/packages/python.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import logging 4 | import os 5 | 6 | from lib.common.abstracts import Package 7 | 8 | #get logger 9 | log = logging.getLogger() 10 | 11 | class Python(Package): 12 | """Python analysis package.""" 13 | 14 | def start(self, path): 15 | python = "/usr/bin/python" #symlink to actual python, default location for OS X 16 | #make sure there is Python here 17 | if not os.path.isfile(python): 18 | log.error("Cannot run python file: Python not found in /usr/bin") 19 | return None 20 | 21 | args = self.options.get("arguments", "") 22 | 23 | if args: 24 | return self.execute(python, (python, path, args)) 25 | return self.execute(python, (python, path)) 26 | -------------------------------------------------------------------------------- /analyzer/darwin/modules/packages/rtf.py: -------------------------------------------------------------------------------- 1 | from lib.common.abstracts import Package 2 | import os 3 | import os.path 4 | import plistlib 5 | import logging 6 | import subprocess 7 | 8 | from AppKit import * 9 | import Foundation 10 | from PyObjCTools import AppHelper 11 | 12 | #get logger 13 | log = logging.getLogger() 14 | 15 | class OpenFile(NSObject): 16 | 17 | def setPath(self, p, test): 18 | self.path = p 19 | self.loop = test 20 | 21 | def run_(self, event): 22 | if str(event.userInfo()["NSApplicationName"]) == "TextEdit": 23 | #open the file in question 24 | subprocess.call(["/usr/bin/open", "-e", self.path]) 25 | #remove the event observer 26 | ws = NSWorkspace.sharedWorkspace() 27 | nc = ws.notificationCenter() 28 | nc.removeObserver_name_object_(self, NSWorkspaceDidLaunchApplicationNotification, None) 29 | self.loop.stop() 30 | 31 | class RTF(Package): 32 | """ Plist and RTF analysis package 33 | """ 34 | 35 | def stop(self): 36 | self.is_open = True 37 | 38 | def start(self, path): 39 | textedit = self.getAppFilePath("/Applications/TextEdit.app/") 40 | 41 | #fix what permissions issues we can 42 | os.chmod(path, 0o777) 43 | result = subprocess.call(["xattr", "-c", path]) 44 | 45 | # The remaining problem is that we can't fight Apple's sandbox - it will refuse to 46 | # open the document in a nice scripted fashion unless it has already been opened 47 | # once by a signed piece of software or the user 48 | # In this case, 'open' is the signed software of choice 49 | 50 | #create the observer watching for application launch events 51 | ws = NSWorkspace.sharedWorkspace() 52 | nc = ws.notificationCenter() 53 | op = OpenFile.new() 54 | op.setPath(path, self) 55 | nc.addObserver_selector_name_object_(op, 'run:', NSWorkspaceDidLaunchApplicationNotification, None) 56 | #start the Preview process 57 | pid = self.execute(textedit, (textedit,)) 58 | 59 | #Wait until the process is open 60 | self.is_open = False 61 | runLoop = NSRunLoop.currentRunLoop() 62 | date = NSDate.dateWithTimeIntervalSinceNow_(1.0) 63 | while not self.is_open: 64 | date = NSDate.dateWithTimeIntervalSinceNow_(1.0) 65 | NSRunLoop.runUntilDate_(runLoop, date) 66 | 67 | #return the pid of TextEdit 68 | return pid 69 | 70 | #Old, less reliable bash method 71 | #args = "\"" + textedit + "\" & sleep 2 && open -a \"TextEdit\" \""+path+"\"" 72 | #return self.execute(bash, (bash, "-c", "%s" % args)) 73 | 74 | #This is the simple method you could do if TextEdit weren't sandboxed 75 | #return self.execute(textedit, (textedit, path)) -------------------------------------------------------------------------------- /analyzer/darwin/modules/packages/safari.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import os.path 4 | import plistlib 5 | import logging 6 | import subprocess 7 | 8 | from AppKit import * 9 | import Foundation 10 | from PyObjCTools import AppHelper 11 | 12 | from lib.common.abstracts import Package 13 | 14 | #get logger 15 | log = logging.getLogger() 16 | 17 | class OpenURL(NSObject): 18 | 19 | def setPath(self, p, test): 20 | self.path = p 21 | self.loop = test 22 | 23 | def run_(self, event): 24 | if str(event.userInfo()["NSApplicationName"]) == "Safari": 25 | #open the file in question 26 | subprocess.call(["/usr/bin/open", "-a", "Safari", self.path]) 27 | #remove the event observer 28 | ws = NSWorkspace.sharedWorkspace() 29 | nc = ws.notificationCenter() 30 | nc.removeObserver_name_object_(self, NSWorkspaceDidLaunchApplicationNotification, None) 31 | self.loop.stop() 32 | 33 | class Safari(Package): 34 | """Safari analysis package.""" 35 | 36 | def stop(self): 37 | self.is_open = True 38 | 39 | def start(self, url): 40 | safari = self.getAppFilePath("/Applications/Safari.app") 41 | 42 | #if we try and open the URL directly with " ", Safari treats it as a file path and tries to find it on the local machine 43 | #Create an observer to watch for Safari opening 44 | ws = NSWorkspace.sharedWorkspace() 45 | nc = ws.notificationCenter() 46 | op = OpenURL.new() 47 | op.setPath(url, self) 48 | nc.addObserver_selector_name_object_(op, 'run:', NSWorkspaceDidLaunchApplicationNotification, None) 49 | #start Safari 50 | pid = self.execute(safari, (safari,)) 51 | 52 | #Wait until the process is open 53 | self.is_open = False 54 | runLoop = NSRunLoop.currentRunLoop() 55 | date = NSDate.dateWithTimeIntervalSinceNow_(1.0) 56 | while not self.is_open: 57 | date = NSDate.dateWithTimeIntervalSinceNow_(1.0) 58 | NSRunLoop.runUntilDate_(runLoop, date) 59 | 60 | #return the pid of Safari 61 | return pid 62 | 63 | #Old, less reliable bash method 64 | #args = "\"" + safari + "\" & sleep 5 && open -a Safari \""+url+"\"" #went with open because the AppleScript was unreliable 65 | #return self.execute(bash, (bash, "-c", "%s" % args)) 66 | -------------------------------------------------------------------------------- /analyzer/darwin/modules/packages/zip.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import os.path 4 | import stat 5 | import logging 6 | import plistlib 7 | from zipfile import ZipFile, BadZipfile 8 | 9 | from lib.common.abstracts import Package 10 | from lib.common.exceptions import CuckooPackageError 11 | 12 | #get logger 13 | log = logging.getLogger() 14 | 15 | class Zip(Package): 16 | """Zip analysis package.""" 17 | 18 | def start(self, path): 19 | root = os.environ["TMPDIR"] 20 | password = self.options.get("password") 21 | 22 | with ZipFile(path, "r") as archive: 23 | zipinfos = archive.infolist() 24 | try: 25 | archive.extractall(path=root, pwd=password) 26 | except BadZipfile as e: 27 | raise CuckooPackageError("Invalid Zip file") 28 | except RuntimeError: 29 | try: 30 | archive.extractall(path=root, pwd="infected") 31 | except RuntimeError as e: 32 | raise CuckooPackageError("Unable to extract Zip file: " 33 | "{0}".format(e)) 34 | 35 | file_name = self.options.get("file") 36 | # If no file name is provided via option, take the first file. 37 | if not file_name: 38 | # No name provided try to find a better name. 39 | if len(zipinfos): 40 | # Take the first one. 41 | file_name = zipinfos[0].filename 42 | else: 43 | raise CuckooPackageError("Empty ZIP archive") 44 | 45 | file_path = os.path.join(root, file_name) 46 | #Have to make the file(s) executable 47 | os.chmod(file_path, 0o777 | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) 48 | if os.path.isdir(file_path): 49 | for root, dirs, files in os.walk(file_path): 50 | for d in files: 51 | os.chmod(os.path.join(root, d), 0o777 | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) 52 | #Handle .app files separately 53 | if file_name.endswith(".app") or file_name.endswith(".app/"): 54 | (exec_path, fpath) = self.getAppFilePath(file_path) 55 | args = self.options.get("arguments") 56 | if args is None: 57 | return self.execute(exec_path, (exec_path, "%s" % fpath)) 58 | return self.execute(exec_path, (exec_path, "%s %s" % (fpath, args))) 59 | 60 | return self.execute(file_path, (file_path, self.options.get("arguments"))) 61 | 62 | def getAppFilePath(self, file_path): 63 | # the reason we don't just do "open file.app" is because OS X has restrictions on open that make it hard to trace 64 | open = "/usr/bin/open" 65 | 66 | #find Info.plist 67 | plist = "" 68 | path = self.findFile(file_path, "Info.plist") 69 | for root, dirs, files in os.walk(file_path): 70 | for f in files: 71 | if f == "Info.plist": 72 | #read the plist file 73 | plist = plistlib.readPlist(os.path.join(root, f)) 74 | break 75 | 76 | if path == "": #no Info.plist found, this is an illegally structured app 77 | log.info("No Info.plist found within .app file") 78 | return (open, file_path) 79 | else: 80 | plist = plistlib.readPlist(path) 81 | 82 | try: 83 | #get the name of the main executable of this app 84 | exec_file = plist["CFBundleExecutable"] 85 | except KeyError: #no executable was listed, this is an illegally structured app 86 | log.info("No main executable name found in Info.plist") 87 | return (open, file_path) 88 | 89 | #get the full path of the executable 90 | exec_path = self.findFile(file_path, exec_file) 91 | return (exec_path, "") 92 | 93 | 94 | def findDir(self, path, name): 95 | result = "" 96 | for root, dirs, files in os.walk(path): 97 | for d in dirs: 98 | if name in d: 99 | return os.path.join(root, d) 100 | return result 101 | 102 | def findFile(self, path, name): 103 | result = "" 104 | for root, dirs, files in os.walk(path): 105 | for f in files: 106 | if name in f: 107 | return os.path.join(root, f) 108 | return result 109 | 110 | -------------------------------------------------------------------------------- /modules/processing/behavior_osx.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 3 | this work by or on behalf of the U.S. Government. 4 | NOTICE: 5 | For five (5) years from the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE. 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws. 8 | 9 | The purpose of this module is simply to pull data from the logs of the darwin analyzer into a format that 10 | the signature and JSON module can use (aka a giant structure of dictionaries and lists). 11 | The existing behavior module only works for the Windows analyzer results. 12 | This module pulls data from the file activity logs, the processes log, and the api call logs. 13 | """ 14 | 15 | import os 16 | import os.path 17 | 18 | from lib.cuckoo.common.abstracts import Processing 19 | from lib.cuckoo.common.exceptions import CuckooProcessingError 20 | 21 | class BehaviorOSX(Processing): 22 | """Pull results from log files""" 23 | 24 | def run(self): 25 | """Run extract of printable strings. 26 | @return: list of printable strings. 27 | """ 28 | self.key = "behavior_osx" 29 | 30 | result = {} #dictionary of call lists to return 31 | result["api_calls"] = {} 32 | 33 | #loop through to find the files of interest 34 | for root, dirs, files in os.walk(self.logs_path): 35 | for f in files: 36 | if "api_calls" in f: #api calls go in a dictionary of lists 37 | name = os.path.join(root, f) 38 | result["api_calls"][os.path.splitext(f)[0]] = self.readFile(name) 39 | elif "processes" in f: #processes are just a list 40 | name = os.path.join(root, f) 41 | result["processes"] = self.readFile(name) 42 | elif "file_" in f: #there are 4 logs related to file activity 43 | name = os.path.join(root, f) 44 | result[os.path.splitext(f)[0]] = self.readFile(name, skip=False) 45 | 46 | return result 47 | 48 | def readFile(self, fname, skip=True): 49 | """ 50 | Turns a newline-separated file into a list 51 | :param skip: When true, this means you skip the first line of the file as headers 52 | :param fname: The name of the file to process 53 | :return: a list of the file info 54 | """ 55 | 56 | results = [] # the list of lines in the file 57 | 58 | f = open(fname, "r") 59 | 60 | if skip: #skip the first line of the file 61 | f.readline() 62 | 63 | line = f.readline() 64 | while line is not None and line != "": 65 | results.append(line) 66 | line = f.readline() 67 | 68 | f.close() 69 | 70 | return results 71 | 72 | 73 | -------------------------------------------------------------------------------- /modules/processing/filter_syscall.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 3 | this work by or on behalf of the U.S. Government. 4 | NOTICE: 5 | For five (5) years from the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE. 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws. 8 | """ 9 | 10 | import os 11 | import os.path 12 | 13 | from lib.cuckoo.common.abstracts import Processing 14 | from lib.cuckoo.common.exceptions import CuckooProcessingError 15 | 16 | filters = ["FILE_ACTIVITY:", "FILE_CREATE:", "FILE_DELETE:", "FILE_WRITE:", "PROCESS:"] 17 | 18 | class FilterSyscall(Processing): 19 | """Filter out syscalls caused by system call hooks.""" 20 | 21 | def filterLine(self, line): 22 | for f in filters: 23 | if f in line: 24 | return True 25 | return False 26 | 27 | def filterFile(self, fname): 28 | data = open(fname, 'r') 29 | output = [] 30 | 31 | #take out the 4 commands that make up the pipe write 32 | i = 0 33 | line = data.readline() 34 | while line != '' and not line is None: 35 | if self.filterLine(line): 36 | if i > 2: 37 | del output[-1] 38 | del output[-1] 39 | line = data.readline() 40 | i += 1 41 | else: 42 | output.append(line) 43 | line = data.readline() 44 | i += 1 45 | 46 | 47 | #close the source file 48 | data.close() 49 | #delete the source file 50 | os.remove(fname) 51 | #rewrite the system call log 52 | out = open(fname, 'w+') 53 | for line in output: 54 | out.write(line) 55 | out.close() 56 | 57 | return output 58 | 59 | 60 | def run(self): 61 | """Filter and extract syscall logs 62 | @return: dictionary of list of syscalls. 63 | """ 64 | self.key = "filter_syscall" 65 | call_logs = [] #list of all the syscall file logs 66 | result = {} #dictionary of call lists to return 67 | 68 | #find all the syscall logs, if any 69 | for root, dirs, files in os.walk(self.logs_path): 70 | for f in files: 71 | if "system_calls" in f: 72 | call_logs.append(os.path.join(root, f)) 73 | 74 | #for each file, look for calls caused by the hook library 75 | for c in call_logs: 76 | result[c] = self.filterFile(c) 77 | 78 | return result 79 | -------------------------------------------------------------------------------- /modules/processing/macho_data.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 3 | this work by or on behalf of the U.S. Government. 4 | NOTICE: 5 | For five (5) years from the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE. 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws. 8 | 9 | This file contains the dictionary structures for whatever human-readable parsing we need that wasn't done by macholib. 10 | A lot of the constants were determined using the MachOView program. 11 | http://sourceforge.net/projects/machoview/ 12 | ''' 13 | 14 | ### CPU SUBTYPES ### 15 | 16 | CPU_SUBTYPE_ANY = { 17 | -1: 'MULTIPLE', 18 | 0: 'LITTLE_ENDIAN', 19 | 1: 'BIG_ENDIAN', 20 | } 21 | 22 | CPU_SUBTYPE_HIGH = { 23 | 2147483648: 'LIB64', 24 | 4278190080: 'MASK', 25 | } 26 | 27 | CPU_SUBTYPE_ARM = { 28 | 0: 'ARM_ALL', 29 | 5: 'ARM_V4T', 30 | 6: 'ARM_v6', 31 | 7: 'ARM_V5TEJ', 32 | 9: 'ARM_V7', 33 | 8: 'ARM_XSCALE', 34 | } 35 | 36 | CPU_SUBTYPE_HPPA = { 37 | 0: 'HPPA_7100 OR HPPA_ALL', 38 | 1: 'HPPA_7100LC', 39 | } 40 | 41 | CPU_SUBTYPE_I860 = { 42 | 0: 'I860_ALL', 43 | 1: 'I860_860', 44 | } 45 | 46 | CPU_SUBTYPE_I386 = { 47 | 3: 'I386_ALL', 48 | 5: 'PENT', 49 | 8: 'PENTIUM_3', 50 | 9: 'PENTIUM_M', 51 | 10: 'PENTIUM_4', 52 | 24: 'PENTIUM_3_M', 53 | 22: 'PENTPRO', 54 | 26: 'PENTIUM_4_M', 55 | 40: 'PENTIUM_3_XEON', 56 | 54: 'PENTII_M3', 57 | 86: 'PENTII_M5', 58 | 3: '386', 59 | 4: '486', 60 | 5: '586', 61 | 11: 'ITANIUM', 62 | 12: 'XEON', 63 | 15: 'INTEL_FAMILY_MAX', 64 | 27: 'ITANIUM_2', 65 | 28: 'XEON_MP', 66 | 103: 'CELERON', 67 | 119: 'CELERON_MOBILE', 68 | } 69 | 70 | CPU_SUBTYPE_MC680x0 = { 71 | 1: 'MC680x0_ALL', 72 | 2: 'MC68040', 73 | 3: 'MC68030_ONLY', 74 | } 75 | 76 | CPU_SUBTYPE_MC88000 = { 77 | 0: 'MC88000_ALL', 78 | 1: 'MC88100', 79 | 2: 'MC88110', 80 | 81 | } 82 | 83 | CPU_SUBTYPE_MIPS = { 84 | 0: 'MIPS_ALL', 85 | 1: 'MIPS_R2300', 86 | 2: 'MIPS_R2600', 87 | 3: 'MIPS_R2800', 88 | 4: 'MIPS_R2000a', 89 | 5: 'MIPS_R2000', 90 | 6: 'MIPS_R3000a', 91 | 7: 'MIPS_R3000', 92 | } 93 | 94 | CPU_SUBTYPE_MC98000 = { 95 | 0: 'MC98000_ALL', 96 | 1: 'MC98601', 97 | } 98 | 99 | CPU_SUBTYPE_POWERPC = { 100 | 0: 'POWERPC_ALL', 101 | 1: 'POWERPC_601', 102 | 2: 'POWERPC_602', 103 | 3: 'POWERPC_603', 104 | 4: 'POWERPC_603e', 105 | 5: 'POWERPC_603ev', 106 | 6: 'POWERPC_604', 107 | 7: 'POWERPC_604e', 108 | 8: 'POWERPC_620', 109 | 9: 'POWERPC_750', 110 | 10: 'POWERPC_7400', 111 | 11: 'POWERPC_7450', 112 | 100: 'POWERPC_970', 113 | } 114 | 115 | CPU_SUBTYPE_SPARC = { 116 | 0: 'SPARC_ALL', 117 | } 118 | 119 | CPU_SUBTYPE_VAX = { 120 | 0: 'VAX_ALL', 121 | 1: 'VAX780', 122 | 2: 'VAX785', 123 | 3: 'VAX750', 124 | 4: 'VAX730', 125 | 5: 'UVAXI', 126 | 6: 'UVAXII', 127 | 7: 'VAX8200', 128 | 8: 'VAX8500', 129 | 9: 'VAX8600', 130 | 10: 'VAX8650', 131 | 11: 'VAX8800', 132 | 12: 'UVAXIII', 133 | } 134 | 135 | CPU_SUBTYPE_X86 = { 136 | 3: 'X86_ALL', 137 | 4: 'X86_ARCH1', 138 | } 139 | 140 | CPU_SUBTYPE_X86_64 = { 141 | 3: 'X86_64_ALL', 142 | } 143 | 144 | ### File Types ### 145 | 146 | FILE_TYPE = { 147 | 1: 'MH_OBJECT', #relocatable object file 148 | 2: 'MH_EXECUTE', #demand page executable file 149 | 3: 'MH_FVMLIB', #fixed VM shared library file 150 | 4: 'MH_CORE', #core file 151 | 5: 'MH_PRELOAD', #preloaded executable file 152 | 6: 'MH_DYLIB', #dynamically bound shared library 153 | 7: 'MH_DYLINKER', #dynamic link editor 154 | 8: 'MH_BUNDLE', #dynamically bound bundle file 155 | 9: 'MH_DYLIB_STUB', #shared library stub for static linking only 156 | 10: 'MH_DSYM', #companion file with only debug sections 157 | 11: 'MH_KTEXT_BUNDLE', #x86_64 ktexts 158 | } 159 | 160 | ### Mach-O Header Flags ### 161 | 162 | MACHO_FLAGS = { 163 | 0x1: 'MH_NOUNDEFS', #the object file has no undefined references 164 | 0x2: 'MH_INCRLINK', # the object file is the output of an incremental link against a base file 165 | 0x4: 'MH_DYLDLINK', # the object file is input for the dynamic linker 166 | 0x8: 'MH_BINDATLOAD', # the object file's undefined references are bound by the dynamic linker when loaded. 167 | 0x10: 'MH_PREBOUND', # the file has its dynamic undefined references prebound. 168 | 0x20: 'MH_SPLIT_SEGS', # the file has its read-only and read-write segments split 169 | 0x40: 'MH_LAZY_INIT', #the shared library init routine is to be run lazily via catching memory faults to its writeable segments (obsolete) 170 | 0x80: 'MH_TWOLEVEL', # the image is using two-level name space bindings 171 | 0x100: 'MH_FORCE_FLAT', # the executable is forcing all images to use flat name space bindings 172 | 0x200: 'MH_NOMULTIDEFS', #this umbrella guarantees no multiple defintions of symbols in its sub-images 173 | 0x400: 'MH_NOFIXPREBINDING', # do not have dyld notify the prebinding agent about this executable 174 | 0x800: 'MH_PREBINDABLE', #the binary is not prebound but can have its prebinding redone. only used when MH_PREBOUND is not set. 175 | 0x1000: 'MH_ALLMODSBOUND', # indicates that this binary binds to all two-level namespace modules of its dependent libraries. only used when MH_PREBINDABLE and MH_TWOLEVEL are both set. 176 | 0x2000: 'MH_SUBSECTIONS_VIA_SYMBOLS', # safe to divide up the sections into sub-sections via symbols for dead code stripping 177 | 0x4000: 'MH_CANONICAL', # the binary has been canonicalized via the unprebind operation 178 | 0x8000: 'MH_WEAK_DEFINES', # the final linked image contains external weak symbols 179 | 0x10000: 'MH_BINDS_TO_WEAK', # the final linked image uses weak symbols 180 | 0x20000: 'MH_ALLOW_STACK_EXECUTION', # When this bit is set, all stacks in the task will be given stack execution privilege. Only used in MH_EXECUTE filetypes. 181 | 0x40000: 'MH_ROOT_SAFE', #When this bit is set, the binary declares it is safe for use in processes with uid zero 182 | 0x80000: 'MH_SETUID_SAFE', # When this bit is set, the binary declares it is safe for use in processes when issetugid() is true 183 | 0x100000: 'MH_NO_REEXPORTED_DYLIBS', # When this bit is set on a dylib, the static linker does not need to examine dependent dylibs to see if any are re-exported 184 | 0x200000: 'MH_PIE', #When this bit is set, the OS will load the main executable at a random address. Only used in MH_EXECUTE filetypes. 185 | 0x400000: 'MH_DEAD_STRIPPABLE_DYLIB', #Only for use on dylibs. When linking against a dylib that has this bit set, the static linker will automatically not create a 186 | # LC_LOAD_DYLIB load command to the dylib if no symbols are being referenced from the dylib. 187 | 0x800000: 'MH_HAS_TLV_DESCRIPTORS', # Contains a section of type S_THREAD_LOCAL_VARIABLES 188 | 0x1000000: 'MH_NO_HEAP_EXECUTION', # When this bit is set, the OS will run the main executable with a non-executable heap even on 189 | # platforms (e.g. i386) that don't require it. Only used in MH_EXECUTE filetypes. 190 | } 191 | 192 | ### Mach-O Load Commands ### 193 | 194 | LOAD_CMDS = { 195 | # After MacOS X 10.1 when a new load command is added that is required to be 196 | # understood by the dynamic linker for the image to execute properly the 197 | # LC_REQ_DYLD bit will be or'ed into the load command constant. 198 | 0x80000000: 'LC_REQ_DYLD ', 199 | 200 | 0x1: 'LC_SEGMENT', # segment of this file to be mapped 201 | 0x2: 'LC_SYMTAB', # link-edit stab symbol table info 202 | 0x3: 'LC_SYMSEG', # link-edit gdb symbol table info (obsolete) 203 | 0x4: 'LC_THREAD', # thread 204 | 0x5: 'LC_UNIXTHREAD', # unix thread (includes a stack), replaced by LC_MAIN for OS X 10.8+ 205 | 0x6: 'LC_LOADFVMLIB', # load a specified fixed VM shared library 206 | 0x7: 'LC_IDFVMLIB', # fixed VM shared library identification 207 | 0x8: 'LC_IDENT', # object identification info (obsolete) 208 | 0x9: 'LC_FVMFILE', # fixed VM file inclusion (internal use) 209 | 0xA: 'LC_PREPAGE', # prepage command (internal use) 210 | 0xB: 'LC_DYSYMTAB',# dynamic link-edit symbol table info 211 | 0xC: 'LC_LOAD_DYLIB', #load a dynamically linked shared library 212 | 0xD: 'LC_ID_DYLIB', # dynamically linked shared library identification 213 | 0xE: 'LC_LOAD_DYLINKER', # load a dynamic linker 214 | 0xF: 'LC_ID_DYLINKER', # dynamic linker identification 215 | 0x10: 'LC_PREBOUND_DYLIB', # modules prebound for a dynamically linked shared library 216 | 0x11: 'LC_ROUTINES', # image routines 217 | 0x12: 'LC_SUB_FRAMEWORK', # sub framework 218 | 0x13: 'LC_SUB_UMBRELLA', # sub umbrella 219 | 0x14: 'LC_SUB_CLIENT', # sub client 220 | 0x15: 'LC_SUB_LIBRARY', # sub library 221 | 0x16: 'LC_TWOLEVEL_HINTS', # two-level namespace lookup hints 222 | 0x17: 'LC_PREBIND_CKSUM', # prebind checksum 223 | 0x18: 'LC_LOAD_WEAK_DYLIB', 224 | 0x19: 'LC_SEGMENT_64', #64-bit segment of this file to be mapped 225 | 0x1a: 'LC_ROUTINES_64', # 64-bit image routines 226 | 0x1b: 'LC_UUID', # the uuid 227 | 0x1c: 'LC_RPATH', # runpath additions 228 | 0x1d: 'LC_CODE_SIGNATURE', # location of code signature 229 | 0x1e: 'LC_SEGMENT_SPLIT_INFO', # location of info to split segments 230 | 0x1f: 'LC_REEXPORT_DYLIB', # load and re-export dylib 231 | 0x20: 'LC_LAZY_LOAD_DYLIB', # delay load of dylib until first use 232 | 0x21: 'LC_ENCRYPTION_INFO', # encrypted segment information 233 | 0x22: 'LC_DYLD_INFO', # compressed dyld information 234 | 0x80000022: 'LC_DYLD_INFO_ONLY', # (0x22|LC_REQ_DYLD) - compressed dyld information only 235 | 0x23: 'LC_LOAD_UPWARD_DYLIB', # load upward dylib 236 | 0x24: 'LC_VERSION_MIN_MACOSX', # build for MacOSX min OS version 237 | 0x25: 'LC_VERSION_MIN_IPHONEOS', # build for iPhoneOS min OS version 238 | 0x26: 'LC_FUNCTION_STARTS', # compressed table of function start addresses 239 | 0x27: 'LC_DYLD_ENVIRONMENT', # string for dyld to treat like environment variable 240 | 0x28: 'LC_MAIN', # replacement for LC_UNIXTHREAD 241 | 0x29: 'LC_DATA_IN_CODE', # table of non-instructions in __text 242 | 0x2a: 'LC_SOURCE_VERSION', # source version used to build binary 243 | 0x2b: 'LC_DYLIB_CODE_SIGN_DRS', # Code signing DRs copied from linked dylibs 244 | 0x2c: 'LC_ENCRYPTION_INFO_64', # 64-bit encrypted segment information 245 | 0x2d: 'LC_LINKER_OPTION', #linker options in MH_OBJECT files 246 | } 247 | 248 | ### Section Flags ### 249 | 250 | SECTION_TYPES = { #the lowest byte - note each section has only one type 251 | 0x0: 'S_REGULAR', # regular section 252 | 0x1: 'S_ZEROFILL', # zero fill on demand section 253 | 0x2: 'S_CSTRING_LITERALS', # section with only literal C strings 254 | 0x3: 'S_4BYTE_LITERALS', # section with only 4 byte literals 255 | 0x4: 'S_8BYTE_LITERALS', # section with only 8 byte literals 256 | 0x5: 'S_LITERAL_POINTERS', # section with only pointers to literals 257 | 0x6: 'S_NON_LAZY_SYMBOL_POINTERS', # section with only non-lazy symbol pointers 258 | 0x7: 'S_LAZY_SYMBOL_POINTERS', # section with only lazy symbol pointers 259 | 0x8: 'S_SYMBOL_STUBS', # section with only symbol stubs, byte size of stub in the reserved2 field 260 | 0x9: 'S_MOD_INIT_FUNC_POINTERS', #section with only function pointers for initialization 261 | 0xa: 'S_MOD_TERM_FUNC_POINTERS', # section with only function pointers for termination 262 | 0xb: 'S_COALESCED', # section contains symbols that are to be coalesced 263 | 0xc: 'S_GB_ZEROFILL', # zero fill on demand section (that can be larger than 4 gigabytes) 264 | 0xd: 'S_INTERPOSING', # section with only pairs of function pointers for interposing 265 | 0xe: 'S_16BYTE_LITERALS', # section with only 16 byte literals 266 | 0xf: 'S_DTRACE_DOF', # section contains DTrace Object Format 267 | 0x10: 'S_LAZY_DYLIB_SYMBOL_POINTERS', # section with only lazy symbol pointers to lazy loaded dylibs 268 | #types for thread local variables (TLVs) 269 | 0x11: 'S_THREAD_LOCAL_REGULAR', # template of initial values for TLVs 270 | 0x12: 'S_THREAD_LOCAL_ZEROFILL', # template of initial values for TLVs 271 | 0x13: 'S_THREAD_LOCAL_VARIABLES', # TLV descriptors 272 | 0x14: 'S_THREAD_LOCAL_VARIABLE_POINTERS', # pointers to TLV descriptors 273 | 0x15: 'S_THREAD_LOCAL_INIT_FUNCTION_POINTERS', # functions to call to initialize TLV values 274 | } 275 | 276 | SECTION_ATTR = { #a section can have multiple attributes - high 3 bytes 277 | #User-settable attributes - first byte 278 | 0x80000000: 'S_ATTR_PURE_INSTRUCTIONS', # section contains only true machine instructions 279 | 0x40000000: 'S_ATTR_NO_TOC', # section contains coalesced symbols that are not to be in a ranlib table of contents 280 | 0x20000000: 'S_ATTR_STRIP_STATIC_SYMS', # ok to strip static symbols in this section in files with the MH_DYLDLINK flag 281 | 0x10000000: 'S_ATTR_NO_DEAD_STRIP', # no dead stripping 282 | 0x08000000: 'S_ATTR_LIVE_SUPPORT', # blocks are live if they reference live blocks 283 | 0x04000000: 'S_ATTR_SELF_MODIFYING_CODE', # Used with i386 code stubs written on by dyld 284 | 0x02000000: 'S_ATTR_DEBUG', # a debug section 285 | #System-settable attributes - next two bytes 286 | 0x00000400: 'S_ATTR_SOME_INSTRUCTIONS', # section contains some machine instructions 287 | 0x00000200: 'S_ATTR_EXT_RELOC', # section has external relocation entries 288 | 0x00000100: 'S_ATTR_LOC_RELOC', # section has local relocation entries 289 | } 290 | 291 | ### Virtual Memory Protection Flags - see mach/vm_prot.h ### 292 | 293 | VM_PROT = { 294 | 0x00: 'VM_PROT_NONE', 295 | 0x01: 'VM_PROT_READ', #read permission 296 | 0x02: 'VM_PROT_WRITE', #write permission 297 | 0x04: 'VM_PROT_EXECUTE', #execute permission 298 | 0x08: 'VM_PROT_NO_CHANGE', #technically invalid, only used by memory_object_lock_request 299 | 0x10: 'VM_PROT_COPY', #when the caller cannot obtain write permission, this can be used to make a working copy 300 | 0x10: 'VM_PROT_WANTS_COPY', #only used by memory_object_data_request 301 | 302 | (0x01 | 0x02): 'VM_PROT_DEFAULT (rw)', #read and write permissions, the default for new virtual memory 303 | (0x01 | 0x02 | 0x04): 'VM_PROT_ALL (rwe)', #max possible permissions, used for parameter checking 304 | } -------------------------------------------------------------------------------- /modules/processing/static_macho.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 3 | this work by or on behalf of the U.S. Government. 4 | NOTICE: 5 | For five (5) years from the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE. 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws. 8 | 9 | This script extracts the metadata, imports, and whatever else we can get statically from Mach-O and FAT files. 10 | 11 | # Depends on python-magic (and libmagic) and macholib 12 | # Macholib: https://pypi.python.org/pypi/macholib/ 13 | # Python-magic: https://github.com/ahupp/python-magic 14 | # Libmagic Instructions: http://www.brambraakman.com/blog/comments/installing_libmagic_in_mac_os_x_for_python-magic/ 15 | 16 | """ 17 | 18 | import struct 19 | from zipfile import ZipFile, BadZipfile 20 | import os 21 | import os.path 22 | import plistlib 23 | import logging 24 | import shutil 25 | import subprocess 26 | 27 | #the magic library is used to identify the file type, since we can only handle certain kinds 28 | try: 29 | import magic 30 | HAVE_MAGIC = True 31 | except ImportError: 32 | HAVE_MAGIC = False 33 | # The macholib library is used to handle the byte-parsing of a lot of the file structures 34 | try: 35 | #import the parsing stuff from the macholib library 36 | from macholib.MachO import MachO 37 | #import the header constants 38 | from macholib.mach_o import * 39 | HAVE_MACHO = True 40 | except ImportError: 41 | HAVE_MACHO = False 42 | 43 | '''Cuckoo libraries''' 44 | from lib.cuckoo.common.abstracts import Processing #the framework for all processing modules 45 | from lib.cuckoo.common.exceptions import CuckooProcessingError #error to throw if something goes wrong 46 | import modules.processing.macho_data as data #custom library of human-readable field conversions 47 | 48 | #get logger 49 | log = logging.getLogger() 50 | 51 | 52 | class MachO_Parse: 53 | """Mach-O and FAT file static analysis""" 54 | 55 | def __init__(self, file_path): 56 | ### Class Variables ### 57 | self.file_name = file_path #a single file to parse 58 | 59 | def parse(self): 60 | """Parse the file's static attributes. 61 | @return: analysis results dict or None. 62 | """ 63 | results = {} 64 | 65 | # get the signature info via the codesign utility 66 | args = ["codesign","-dvvvv", self.file_name] 67 | proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 68 | output, error_output = proc.communicate() 69 | if proc.returncode: #error, probably file not signed 70 | results["signature"] = error_output 71 | else: 72 | results["signature"] = output 73 | 74 | #get the file object 75 | file_object = open(self.file_name, 'rb') 76 | 77 | 78 | #Use the macho library to parse out some structures 79 | pFile = MachO(self.file_name) 80 | 81 | #if this is a fat file, it will have multiple Mach-O objects inside it 82 | results["FAT_header"] = self.parseFATHeader(file_object, pFile) 83 | 84 | #parse all the Mach-O headers 85 | i = 1 86 | for h in pFile.headers: 87 | results["MachO_header" + str(i)] = self.parseMachOHeader(h, file_object) 88 | i +=1 89 | 90 | #close the file 91 | file_object.close() 92 | 93 | #return the dict of results 94 | return results 95 | 96 | def parseMachOHeader(self, header, file_object): 97 | results = {} 98 | m = header.MH_MAGIC 99 | #get down to the actual header info 100 | h = header.header 101 | 102 | ### get human-readable strings ### 103 | cpu_type = CPU_TYPE_NAMES.get(h.cputype, h.cputype) 104 | results["cpu_type"] = cpu_type 105 | #this needs a mask due to a couple high-bit types like lib64 106 | hex_stype = (h.cpusubtype + (1 << 32)) % (1 << 32) #because some numbers turn out negative when read 107 | cpu_stype = self.getCPUSubtype(cpu_type, hex_stype & ~0xff000000) 108 | #test for the high-bit ones 109 | try: 110 | s = self.getCPUSubtype('high', hex_stype & 0xff000000) 111 | cpu_stype += ", " + s 112 | except TypeError: #meaning no matches 113 | pass 114 | results["cpu_subtype"] = cpu_stype 115 | #get the file type - library, executable, etc. 116 | results["ftype"] = data.FILE_TYPE.get(int(h.filetype)) 117 | #get the list of flags 118 | results["flags"] = self.getFlags(h.flags) 119 | 120 | 121 | #reserved field only exists in 64-bit headers, so set to None if 32-bit 122 | res = 'NULL' 123 | if hasattr(h, 'reserved'): 124 | res = h.reserved 125 | results["reserved"] = res 126 | 127 | #parse the load commands 128 | (results["load_commands"], results["sections"]) = self.parseLoadCommands(header.commands) 129 | 130 | #get a stand-alone list of the dynamically linked libraries, just for convenience 131 | libs = [] 132 | for lib in header.walkRelocatables(): 133 | libs.append(lib[2]) 134 | results["DyLinkedLibs"] = libs 135 | 136 | #get the imports/exports from the symbol table 137 | sym_cmd = header.getSymbolTableCommand() #get the symbol table load command from the header 138 | dyn_cmd = header.getDynamicSymbolTableCommand() #get the dynamic symbol table load command from the header 139 | (results["DefExtSymbols"], results["UndefExtSymbols"]) = self.parseSymbolTable(sym_cmd, dyn_cmd, file_object, header) 140 | 141 | 142 | return results 143 | 144 | def parseLoadCommands(self, commands): 145 | results = [] #list of all load commands to return 146 | sections = [] 147 | 148 | # Each command is a tuple with 3 entries 149 | i = 0 150 | for cmd in commands: #list of load commands for one MachO header 151 | c = {} 152 | #the first entry is a load_command structure, made up of the command type and its size 153 | #get the human-readable command name 154 | cmd_name = data.LOAD_CMDS.get(cmd[0].cmd) 155 | #test for the high-bit LC_REQ_DYLD 156 | if cmd_name is None: 157 | s1 = data.LOAD_CMDS.get(cmd[0].cmd & 0xff000000) #check the high bit separately 158 | cmd_name = s1 159 | s2 = data.LOAD_CMDS.get(cmd[0].cmd & 0x00ffffff) #check the low bits 160 | cmd_name += ", " + s2 161 | c["cmd_name"] = cmd_name 162 | c["cmd_size"] = int(cmd[0].cmdsize) 163 | 164 | #the second entry in the tuple is the actual content of the command, which varies depending on the command 165 | # Since we can't predict the content without an excessively long switch statement, we just pull out 166 | # the structure attributes directly as a dict. It's not elegant but it works. 167 | c["cmd_content"] = cmd[1].__dict__["_objects_"] 168 | 169 | #sometimes some of the dict objects will create JSON errors 170 | for key in c["cmd_content"]: 171 | if isinstance(c["cmd_content"][key], str): #these strings are ASCII, and sometimes they don't play well with UTF-8 172 | c["cmd_content"][key] = c["cmd_content"][key].decode('utf-8', 'ignore').strip('\u0000') 173 | if isinstance(c["cmd_content"][key], mach_version_helper): #these are Python objects JSON can't handle 174 | c["cmd_content"][key] = c["cmd_content"][key].__dict__["_objects_"] 175 | 176 | 177 | # the third thing in the tuple is a string used by the command (usually a library/framework name) 178 | if "LC_SEGMENT" in c["cmd_name"]: #unless its a segment, then the sections need to be parsed 179 | #get human-readable memory flags for the segment 180 | c["cmd_content"]["maxprot"] = self.getMemProt(c["cmd_content"]["maxprot"]) 181 | c["cmd_content"]["initprot"] = self.getMemProt(c["cmd_content"]["initprot"]) 182 | #parse the sections in the segment 183 | for sec in cmd[2]: 184 | sec2 = sec.__dict__["_objects_"] 185 | 186 | #parse the flags of sections into human-readable text 187 | # There is a type flag and one or more attribute flags in the 4-byte field 188 | sec2["flags"] = self.parseSectionFlags(sec2["flags"]) 189 | 190 | #add the section to the list 191 | sections.append(sec.__dict__["_objects_"]) 192 | else: 193 | c["strings"] = cmd[2] 194 | if isinstance(c["strings"], str): #these strings are ASCII, and sometimes they don't play well with UTF-8 195 | c["strings"] = c["strings"].decode('utf-8', 'ignore') 196 | 197 | results.append(c) #add the command to the list 198 | i += 1 199 | 200 | 201 | return (results, sections) 202 | 203 | def getMemProt(self, flags): 204 | #check to see if it has a single dict value 205 | if not data.VM_PROT.get(flags) is None: 206 | return data.VM_PROT.get(flags) 207 | else: 208 | f = '' #string to hold the flags 209 | for i in range(0, 31): #flags are each one bit, so check each bit in 4 bytes 210 | mask = 1 << i 211 | flag = flags & mask 212 | if flag in data.VM_PROT and flag != 0: 213 | if f != '': 214 | f += ", " 215 | f += data.VM_PROT.get(flag) 216 | return f 217 | 218 | def parseSectionFlags(self, flags): 219 | f = '' #variable to store all the flags in 220 | 221 | #get the type - stored in the lowest byte 222 | type = data.SECTION_TYPES.get(flags & 0x000000ff) 223 | if not type is None: 224 | f += type 225 | 226 | #get the user-settable attributes - highest byte 227 | a1 = data.SECTION_ATTR.get(flags & 0xff000000) 228 | if not a1 is None: 229 | f += ", " + a1 230 | 231 | #get the system-settable attributes - middle two bytes 232 | a2 = data.SECTION_ATTR.get(flags & 0x00ffff00) 233 | if not a2 is None: 234 | f += ", " + a2 235 | 236 | return f 237 | 238 | 239 | def parseSymbolTable(self, sym_cmd, dyn_cmd, file_object, header): 240 | if dyn_cmd is None or sym_cmd is None: 241 | return 242 | try: 243 | offset = header.offset 244 | endian = header.endian 245 | # The symbol table is actually made up of several partitions. These partitions and their offsets 246 | # are listed in the LC_DYSYMTAB load command. 247 | symbols = [] 248 | # the human-readable string of the symbol table are actually stored in the strings table, so get those 249 | #go to the beginning of the strings table, offset from the beginning of the Mach-O object 250 | file_object.seek(0) 251 | file_object.seek(sym_cmd.stroff+offset) 252 | #file_object.seek(sym_cmd.stroff, offset) #for some reason this throws an IOError 253 | strs = file_object.read(sym_cmd.strsize) #read in the entire string table 254 | #each string is null (00) terminated, so you can split on that 255 | # however the indexes to the string table are byte offsets, so this is not necessary really 256 | # strings = strs.split('\x00') 257 | 258 | # go to the beginning of the symbol table 259 | file_object.seek(0) 260 | file_object.seek(sym_cmd.symoff+offset) 261 | undef = [] #undefined external symbols 262 | defined = [] #defined external symbols 263 | #seek to the beginning index of the defined external symbols 264 | for i in xrange(dyn_cmd.iextdefsym): 265 | file_object.read(12) 266 | #if this is a 64-bit object file, there will be an extra 4 blank bytes 267 | if isinstance(header.header, mach_header_64): 268 | file_object.read(4) 269 | 270 | #read the number of defined external symbols specified in LC_DYSYMTAB 271 | for i in xrange(dyn_cmd.nextdefsym): 272 | # get the index to the strings table - this is 4 bytes long 273 | t = file_object.read(4) 274 | # the endian of the Mach-O object is in the header 275 | index = struct.unpack(endian+'L', t)[0] 276 | #indirect.append(''.join('%02x' % ord(byte) for byte in t)) 277 | file_object.read(8) #skip the rest of the symbol table entry - 8 bytes total 278 | #if this is a 64-bit object file, there will be an extra 4 blank bytes 279 | if isinstance(header.header, mach_header_64): 280 | file_object.read(4) 281 | 282 | if index == 0: # a null string has an index of 0 283 | defined.append('NULL') 284 | else: #get the human-readable string at the index 285 | str = '' 286 | b = strs[index] 287 | i = 0 288 | while (b != b'\x00'): 289 | str = str + b 290 | i += 1 291 | b = strs[index+i] 292 | defined.append(str) 293 | 294 | #read the number of undefined external symbols specified in LC_DYSYMTAB 295 | for i in xrange(dyn_cmd.nundefsym): 296 | # get the index to the strings table - this is 4 bytes long 297 | t = file_object.read(4) 298 | # the endian of the Mach-O object is in the header 299 | index = struct.unpack(endian+'L', t)[0] 300 | #indirect.append(''.join('%02x' % ord(byte) for byte in t)) 301 | file_object.read(8) #skip the rest of the symbol table entry - 8 bytes total 302 | #if this is a 64-bit object file, there will be an extra 4 blank bytes 303 | if isinstance(header.header, mach_header_64): 304 | file_object.read(4) 305 | 306 | if index == 0: # a null string has an index of 0 307 | undef.append('NULL') 308 | else: #get the human-readable string at the index 309 | str = '' 310 | b = strs[index] 311 | i = 0 312 | while (b != b'\x00'): 313 | str = str + b 314 | i += 1 315 | b = strs[index+i] 316 | undef.append(str) 317 | except: 318 | defined = "Error: malformed symbol table" 319 | undef = [] 320 | 321 | return (defined, undef) 322 | 323 | def getFlags(self, flags): 324 | f = '' #string to hold the flags 325 | for i in range(0, 31): #flags are each one bit, so check each bit in 4 bytes 326 | mask = 1 << i 327 | flag = flags & mask 328 | if flag in data.MACHO_FLAGS: 329 | if f != '': 330 | f += ", " 331 | f += data.MACHO_FLAGS.get(flag) 332 | return f 333 | 334 | def parseFATHeader(self, f, pFile): 335 | results = {} 336 | #If this is a FAT file, it will have an extra header 337 | if not (pFile.fat is None): 338 | 339 | #insert the main FAT header fields 340 | results["Magic"] = pFile.fat.magic 341 | results["n_arch"] = pFile.fat.nfat_arch 342 | 343 | #seek past the first couple FAT header fields (2 fields, 4 bytes each) 344 | f.seek(8) 345 | #parse the sub-file object structures (fat_arch structures) 346 | archs = [fat_arch.from_fileobj(f) for i in range(pFile.fat.nfat_arch)] 347 | a_results = {} 348 | for a in archs: 349 | ar = {} 350 | #get human-readable names 351 | cpu_type = CPU_TYPE_NAMES.get(a.cputype, a.cputype) 352 | cpu_stype = self.getCPUSubtype(cpu_type, a.cpusubtype) 353 | 354 | ar["cpu_subtype"] = cpu_stype 355 | ar["offset"] = a.offset 356 | ar["size"] = a.size 357 | ar["alignment"] = a.align 358 | a_results[cpu_type] = ar 359 | 360 | results["archs"] = a_results 361 | 362 | return results 363 | 364 | '''Get the human-readable cpu subtype. 365 | This is a bit complicate because there seems to be no defined mapping for cpu_type to cpu_subtype, so I had to guess for some. 366 | ctype = human-readable cpu_type 367 | stype = cpu_subtype ''' 368 | def getCPUSubtype(self, ctype, stype): 369 | if 'ARM' in ctype: 370 | return data.CPU_SUBTYPE_ARM.get(stype) 371 | elif 'HPPA' in ctype: 372 | return data.CPU_SUBTYPE_HPPA.get(stype) 373 | elif 'i860' in ctype: 374 | return data.CPU_SUBTYPE_I860.get(stype) 375 | elif 'i386' in ctype: 376 | return data.CPU_SUBTYPE_I386.get(stype) 377 | elif 'MC68' in ctype: 378 | return data.CPU_SUBTYPE_MC680x0.get(stype) 379 | elif 'MC88' in ctype: 380 | return data.CPU_SUBTYPE_MC88000.get(stype) 381 | elif 'MC98' in ctype: 382 | return data.CPU_SUBTYPE_MC98000.get(stype) 383 | elif 'MIPS' in ctype: 384 | return data.CPU_SUBTYPE_MIPS.get(stype) 385 | elif 'PowerPC' in ctype: 386 | return data.CPU_SUBTYPE_POWERPC.get(stype) 387 | elif 'SPARC' in ctype: 388 | return data.CPU_SUBTYPE_SPARC.get(stype) 389 | elif 'VAX' in ctype: 390 | return data.CPU_SUBTYPE_VAX.get(stype) 391 | elif 'x86_64' in ctype: 392 | return data.CPU_SUBTYPE_X86_64.get(stype) 393 | elif 'x86' in ctype: 394 | return data.CPU_SUBTYPE_X86.get(stype) 395 | elif 'high' in ctype: 396 | data.CPU_SUBTYPE_HIGH.get(stype) 397 | else: 398 | return data.CPU_SUBTYPE_ANY.get(stype) 399 | 400 | 401 | class StaticMac(Processing): 402 | """ 403 | The class that is actually called by Cuckoo when the processing modules are run. 404 | It collects the results from the MachO class, which does all the real work. 405 | """ 406 | 407 | def run(self): 408 | """ 409 | Run the analysis. 410 | @return: results dict. 411 | """ 412 | #This is the name of the subcontainer Cuckoo will use for the returned data 413 | self.key = "static_macho" 414 | static_macho = {} #the dictionary to store the results in 415 | 416 | if self.task["category"] == "file": #If cuckoo analyzes a file, not a URL 417 | if HAVE_MACHO and HAVE_MAGIC: #if the proper libraries are installed 418 | if not (self.file_path is None): #if the file exists 419 | kind = magic.from_file(self.file_path) #get the file type 420 | #if it is Mach-O, parse it. Note FAT files are listed as Mach-O with multiple architectures 421 | if not (kind is None) and ("Mach-O" in kind): 422 | static_macho = MachO_Parse(self.file_path).parse() 423 | elif not (kind is None) and ("Zip" in kind): #could be an app file 424 | log.info(".zip file found, checking for executables inside") 425 | static_macho = self.handleZip() 426 | else: 427 | log.info("File is not Mach-O or FAT file, quitting module") 428 | 429 | return static_macho 430 | 431 | """ 432 | .app files contain Mach-O files, but they have to be submitted as zips 433 | This attempts to analyze the main executable if it is a .app file. 434 | """ 435 | def handleZip(self): 436 | root = os.environ["TMPDIR"] 437 | static_macho = {} #the dictionary to store the results in 438 | 439 | with ZipFile(self.file_path, "r") as archive: 440 | zipinfos = archive.namelist() 441 | 442 | if not len(zipinfos): #this is an empty zip file 443 | return static_macho 444 | 445 | try: 446 | exec_file = "" 447 | for z in zipinfos: 448 | if z.endswith(".app") or z.endswith(".app/"): #there is an app file 449 | # extract the Info.plist 450 | try: 451 | plist_path = archive.open(z + "Contents/Info.plist") 452 | plist = plistlib.readPlist(plist_path) 453 | exec_file = plist["CFBundleExecutable"] 454 | except KeyError: 455 | log.info("Malformed .app file " + z + ", aborting static analysis") 456 | break 457 | 458 | if exec_file == "": 459 | return static_macho 460 | 461 | for z in zipinfos: 462 | if os.path.basename(z) == exec_file: 463 | #write out the executable file with only read permissions 464 | outpath = os.path.join(root, os.path.basename(z)) 465 | out1 = open(outpath, "w+") 466 | out1.close() 467 | os.chmod(outpath, 0664) 468 | #write the file out 469 | out2 = open(outpath, "w+") 470 | bytes = archive.read(z) 471 | out2.write(bytes) 472 | out2.close() 473 | #parse the file 474 | static_macho = MachO_Parse(outpath).parse() 475 | #delete the file 476 | os.remove(outpath) 477 | 478 | except BadZipfile: 479 | log.error("Unable to open zip file") 480 | return static_macho 481 | except RuntimeError: 482 | return static_macho 483 | 484 | --------------------------------------------------------------------------------