├── README.md
├── analyzer
    └── darwin
    │   ├── __init__.py
    │   ├── analysis.conf
    │   ├── analyzer.py
    │   ├── dylib
    │       ├── cuckoohooks.c
    │       ├── cuckoohooks.dylib
    │       ├── cuckoohooks.h
    │       ├── cuckoohooks.o
    │       ├── cuckoohooks_32.dylib
    │       ├── cuckoohooks_64.dylib
    │       └── makefile
    │   ├── lib
    │       ├── __init__.py
    │       ├── api
    │       │   ├── __init__.py
    │       │   ├── apitrace
    │       │   ├── process.py
    │       │   └── screenshot.py
    │       ├── common
    │       │   ├── __init__.py
    │       │   ├── abstracts.py
    │       │   ├── constants.py
    │       │   ├── exceptions.py
    │       │   ├── hashing.py
    │       │   ├── rand.py
    │       │   └── results.py
    │       └── core
    │       │   ├── __init__.py
    │       │   ├── config.py
    │       │   ├── packages.py
    │       │   └── startup.py
    │   └── modules
    │       ├── __init__.py
    │       ├── auxiliary
    │           ├── __init__.py
    │           ├── human.py
    │           └── screenshots.py
    │       └── packages
    │           ├── __init__.py
    │           ├── doc.py
    │           ├── generic.py
    │           ├── html.py
    │           ├── jar.py
    │           ├── macho.py
    │           ├── pdf.py
    │           ├── python.py
    │           ├── rtf.py
    │           ├── safari.py
    │           └── zip.py
└── modules
    └── processing
        ├── behavior_osx.py
        ├── filter_syscall.py
        ├── macho_data.py
        └── static_macho.py


/README.md:
--------------------------------------------------------------------------------
 1 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software.
 2 | 
 3 | NOTICE:
 4 | 
 5 | For five (5) years from  the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE.
 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws.
 8 | 
 9 | This analyzer extends the open-source [Cuckoo Sandbox](https://github.com/cuckoobox/cuckoo) with functionality for analyzing OS X malware in an OS X guest VM.
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/analyzer/darwin/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/analyzer/darwin/analysis.conf:
--------------------------------------------------------------------------------
 1 | [analysis]
 2 | category = file
 3 | target = /Example/path/to/Sample
 4 | clock = 20140619T11:15:56
 5 | file_type = Mach-O 64-bit x86_64 executable
 6 | ip = 10.0.0.1
 7 | package =
 8 | options =
 9 | enforce_timeout = True
10 | timeout = 120
11 | file_name = Sample
12 | id = 1
13 | port = 2042
14 | 


--------------------------------------------------------------------------------
/analyzer/darwin/analyzer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 
  4 | this work by or on behalf of the U.S. Government. 
  5 | NOTICE:
  6 | For five (5) years from  the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE.
  7 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
  8 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws.
  9 | 
 10 | This is the darwin analyzer for Cuckoo - OS X is built on Darwin, which is a UNIX and FreeBSD based open-source OS.
 11 | It was released by Apple in 2000. That's why the OS X analyzer is called darwin - Cuckoo's choice, not mine.
 12 | 
 13 | The process for the analyzer seems to be as follows:
 14 | 
 15 | 1. Cuckoo checks the status of agent.py (running on the guest) until it comes up
 16 | 2. Cuckoo sends a zipped file of analyzer/darwin/ to agent.py
 17 | 3. The guest agent unzips the files to /ANALYZER_FOLDER/<random 5-10 characters>/
 18 |     (ANALYZER_FOLDER is the home folder of the user agent.py runs as)
 19 | 4. Cuckoo sends the key/value pair options for the analysis.
 20 | 5. The agent writes those options to analyzer.conf in the folder with the unzipped files
 21 | 6. Cuckoo sends the file sample to the agent
 22 | 7. The agent writes the file to /tmp
 23 | 8. Cuckoo sends the execute commands
 24 | 9. The agent executes analyzer.py
 25 | 10. Cuckoo polls the agent until the analyzer has finished
 26 | 11. The agent sends the results folder to Cuckoo
 27 | 12. Cuckoo stores the results in storage/ on the host
 28 | 
 29 | Source: http://public.honeynet.org/pipermail/cuckoo/2013-June/001489.html
 30 | """
 31 | 
 32 | import logging
 33 | import sys
 34 | import os
 35 | import os.path
 36 | import traceback
 37 | import xmlrpclib
 38 | import random
 39 | import hashlib
 40 | import socket
 41 | import time
 42 | import fnmatch
 43 | import fcntl
 44 | import termios
 45 | import array
 46 | import pkgutil
 47 | import subprocess
 48 | import psutil
 49 | 
 50 | from threading import Lock, Thread
 51 | from ctypes import create_unicode_buffer, create_string_buffer
 52 | from ctypes import c_wchar_p, byref, c_int, sizeof
 53 | 
 54 | ''' These are Cuckoo files we are importing here '''
 55 | from lib.common.constants import PATHS, PIPE, SHUTDOWN_MUTEX #OS path constants
 56 | from lib.core.startup import create_folders, init_logging #creates results folders and configures logger
 57 | from lib.core.config import Config #parses the analysis.conf configuration file
 58 | from lib.common.results import upload_to_host
 59 | from lib.common.hashing import hash_file
 60 | from lib.api.process import Process
 61 | from lib.common.abstracts import Package, Auxiliary
 62 | from lib.core.packages import choose_package
 63 | from lib.common.exceptions import CuckooError, CuckooPackageError
 64 | from modules import auxiliary
 65 | 
 66 | #get logger
 67 | log = logging.getLogger()
 68 | 
 69 | FILES_LIST = [] #list of files to transfer to the Cuckoo host
 70 | DUMPED_LIST = [] #files already transferred to host
 71 | PROCESS_LIST = [] #list of watched processes, either started by us or by the sample
 72 | 
 73 | PROTECTED_LIST = ["sleep", "kernel_task"] #processes that are not traced
 74 | 
 75 | def add_pid(pid):
 76 |     """Add a process to process list."""
 77 |     if isinstance(pid, (int, long, str)):
 78 |         log.info("Added new process to list with pid: %s", pid)
 79 |         PROCESS_LIST.append(int(pid))
 80 | 
 81 | def add_pids(pids):
 82 |     """Add PID."""
 83 |     if isinstance(pids, (tuple, list)):
 84 |         for pid in pids:
 85 |             add_pid(pid)
 86 |     else:
 87 |         add_pid(pids)
 88 | 
 89 | def add_file(file_path):
 90 |     """Add a file to list of files to be copied to the host."""
 91 |     if file_path not in FILES_LIST:
 92 |         log.info("Added new file to list with path: %s",
 93 |                  unicode(file_path).encode("utf-8", "replace"))
 94 |         FILES_LIST.append(file_path)
 95 | 
 96 | def dump_file(file_path):
 97 |     """Create a copy of the given file path and send it to the host."""
 98 |     try:
 99 |         if os.path.exists(file_path):
100 |             sha256 = hash_file(hashlib.sha256, file_path)
101 |             if sha256 in DUMPED_LIST:
102 |                 # The file was already dumped
103 |                 # Cuckoo normally just skips the file, I have chosen not to
104 |                 #return
105 |                 log.warning("File at path \"%s\" has a hash that is a duplicate of another dumped file.",
106 |                         file_path)
107 |         else:
108 |             log.warning("File at path \"%s\" does not exist, skip.",
109 |                         file_path)
110 |             return
111 |     except IOError as e:
112 |         log.warning("Unable to access file at path \"%s\": %s", file_path, e)
113 |         return
114 | 
115 |     log.info("File path is %s and file size is %d.", file_path, os.stat(file_path).st_size)
116 | 
117 |     #choose the correct folder
118 |     if "logs" in file_path:
119 |         upload_path = os.path.join("logs", os.path.basename(file_path))
120 |     elif "drop" in file_path:
121 |         upload_path = os.path.join("files", os.path.basename(file_path))
122 |     else:
123 |         upload_path = os.path.join("files",
124 |                                str(random.randint(100000000, 9999999999)),
125 |                                os.path.basename(file_path))
126 |     log.info("Upload path is %s.", upload_path)
127 | 
128 |     #send file to host
129 |     try:
130 |         upload_to_host(file_path, upload_path)
131 |         DUMPED_LIST.append(sha256)
132 |     except (IOError, socket.error) as e:
133 |         log.error("Unable to upload dropped file at path \"%s\": %s",
134 |                   file_path, e)
135 | 
136 | def dump_files():
137 |     """Dump all the dropped files.
138 |        This function transfers all the results files to the host."""
139 |     for file_path in FILES_LIST:
140 |         log.info("Dumping %s to host,", file_path)
141 |         dump_file(file_path)
142 | 
143 | class PipeServer(Thread):
144 |     """Cuckoo PIPE server.
145 | 
146 |     This Pipe Server receives notifications from the injected processes for
147 |     new processes being spawned and for files being created or deleted.
148 |     """
149 | 
150 |     def __init__(self, pipe_name=PIPE):
151 |         """@param pipe_name: Cuckoo PIPE server name."""
152 |         log.info("Starting PipeSever")
153 |         Thread.__init__(self)
154 |         self.pipe_name = pipe_name
155 |         self.fd = -1
156 |         self.do_run = True
157 |         self.handlers = []
158 |         self.created = False
159 |         self.iocbuf = array.array('i', [0])
160 |         self.h_pipe = None
161 | 
162 |     def stop(self):
163 |         """Stop PIPE server."""
164 |         start_time = time.time()
165 |         end_time = start_time + 30
166 |         #wait until we get everything...up to 30 seconds of extra time
167 |         while self.get_content() > 0 and time.time() < end_time:
168 |             if len(self.handlers) <= 20:
169 |                 handle = PipeHandler(self.h_pipe)
170 |                 handle.daemon = True
171 |                 handle.start()
172 |                 self.handlers.append(handle)
173 |             log.info("Waiting to process all pipe data")
174 |             time.sleep(0.5)
175 |         self.do_run = False
176 |         #delete the FIFO pipe
177 |         os.unlink(self.pipe_name)
178 | 
179 |     def is_done(self):
180 |         return not self.do_run
181 | 
182 |     def run(self):
183 |         """Create and run PIPE server.
184 |         @return: operation status.
185 |         """
186 |         try:
187 |             if not self.created:
188 |                 # create the pipe
189 |                 os.mkfifo(self.pipe_name)
190 |                 self.created = True
191 |                 # If you use the normal open() function, it will block and no input will be received
192 |                 self.fd = os.open(self.pipe_name, os.O_NONBLOCK) #returns a file descriptor
193 |                 #wrap the file descriptor in a file object
194 |                 self.h_pipe = os.fdopen(self.fd, 'r')
195 |                 log.info("Opened pipe file %s with fd %d", self.pipe_name, self.fd)
196 |         except:
197 |             log.exception("Unable to start process communication pipe.")
198 | 
199 |         while self.do_run:
200 |             # There is no way to poll for a connection to a named pipe on Unix
201 |             # So we check the file size
202 |             if self.get_content() > 0 and len(self.handlers) < 20:
203 |                 #log.info("Starting a pipe handler...")
204 |                 handle = PipeHandler(self.h_pipe)
205 |                 handle.daemon = True
206 |                 handle.start()
207 |                 self.handlers.append(handle)
208 |             else:
209 |                 # Sleep for a bit
210 |                 time.sleep(0.05)
211 | 
212 |         return True
213 | 
214 |     def get_content(self):
215 |         """
216 |         Get the number of bytes in the pipe that can be read.
217 |         @return: the number of bytes
218 |         """
219 |         try:
220 |             fcntl.ioctl(self.fd, termios.FIONREAD, self.iocbuf, True)
221 |         except:
222 |             log.exception("Unable to read bytes from pipe.")
223 |         return self.iocbuf[0]
224 | 
225 | 
226 | class PipeHandler(Thread):
227 |     """Pipe Handler.
228 | 
229 |     This class handles the notifications received through the Pipe Server and
230 |     decides what to do with them.
231 |     """
232 | 
233 |     F_LOGFILE = "file_activity.log"
234 |     W_LOGFILE = "file_writes.log"
235 |     C_LOGFILE = "file_creation.log"
236 |     D_LOGFILE = "file_deletion.log"
237 |     read_lock = Lock()
238 | 
239 |     def __init__(self, h_pipe):
240 |         """@param h_pipe: PIPE to read."""
241 |         Thread.__init__(self)
242 |         self.h_pipe = h_pipe
243 |         self.part = ''
244 |         self.done = False
245 | 
246 |     def run(self):
247 |         """Run handler.
248 |         @return: operation status.
249 |         """
250 |         data = ""
251 |         wait = False
252 |         proc = None
253 | 
254 |         # Read the data submitted to the Pipe Server.
255 |         while True:
256 |             while True:
257 |                 try:
258 |                     PipeHandler.read_lock.acquire()
259 |                     data = self.h_pipe.readline()
260 |                     PipeHandler.read_lock.release()
261 |                     break
262 |                 except IOError:
263 |                     log.error("Unable to open process communication pipe, retrying.")
264 | 
265 |             if data == '':
266 |                 break
267 | 
268 |             if data:
269 |                 #one line = one logging command
270 |                 c = [data]
271 |                 for command in c:
272 | 
273 |                     if not command.endswith('\n'): #if we have read a partial line
274 |                         log.info("Saving a part of a log")
275 |                         self.part = command #save it for later
276 |                         continue
277 | 
278 |                     if self.part != '': # append any pieces to the end
279 |                         log.info("Using a part of a log")
280 |                         command = self.part + command
281 |                         self.part = ''
282 | 
283 |                     if command.startswith("FILE_ACTIVITY:"):
284 |                         self.writeToLogFile(os.path.join(PATHS["logs"], self.F_LOGFILE), command[14:len(command)])
285 |                     elif command.startswith("FILE_CREATE:"):
286 |                         self.writeToLogFile(os.path.join(PATHS["logs"], self.C_LOGFILE), command[12:len(command)])
287 |                     elif command.startswith("FILE_DELETE:"):
288 |                         self.writeToLogFile(os.path.join(PATHS["logs"], self.D_LOGFILE), command[12:len(command)])
289 |                     elif command.startswith("FILE_WRITE:"):
290 |                         self.writeToLogFile(os.path.join(PATHS["logs"], self.W_LOGFILE), command[11:len(command)])
291 |                     elif command.startswith("PROCESS:"):
292 |                         process_id = int(command[8:len(command)])
293 |                         if process_id not in PROCESS_LIST:
294 |                             if psutil.pid_exists(process_id):
295 |                                 h_p = psutil.Process(process_id)
296 |                                 proc = Process(pid=process_id, h_process=h_p, thread_id=None)
297 |                                 filename = proc.get_filepath()
298 |                                 log.info("Announced new process name: %s with pid %d", filename, process_id)
299 |                                 if not filename in PROTECTED_LIST:
300 |                                     proc.start_trace()
301 |                                 add_pids(process_id)
302 |                     elif command.startswith("EXEC:"):
303 |                         log.info(command)
304 |                     else:
305 |                         log.error("Invalid pipe command: %s", command)
306 |                     continue
307 | 
308 |             #break
309 | 
310 |         # We wait until the injected library reports back.
311 |         if wait:
312 |             proc.wait()
313 | 
314 |         if proc:
315 |             proc.close()
316 | 
317 |         self.done = True
318 |         return True
319 | 
320 |     def writeToLogFile(self, logfile, data):
321 |         try:
322 |             flog = open(logfile, 'a+')
323 |             flog.write(data)
324 |             flog.close()
325 |         except:
326 |             log.error("Unable to write to logfile %s.", logfile)
327 | 
328 | class Analyzer:
329 |     """Cuckoo Darwin (OS X) Analyzer.
330 |     """
331 | 
332 |     PIPE_SERVER_COUNT = 1
333 | 
334 |     def __init__(self):
335 |         self.pipes = [None]*self.PIPE_SERVER_COUNT
336 |         self.config = None
337 |         self.target = None
338 | 
339 |     def complete(self):
340 |         """Mark the analysis as completed and return files"""
341 | 
342 |         # Oh look, it's done
343 |         log.info("Analysis completed")
344 |         # Stop the Pipe Servers.
345 |         for x in xrange(self.PIPE_SERVER_COUNT):
346 |             self.pipes[x].stop()
347 |             while not self.pipes[x].is_done():
348 |                 log.info("Waiting for Pipe Servers to finish")
349 |                 time.sleep(0.1)
350 | 
351 |         # pick up log files and created files from the system file call hooks
352 |         # these are stored in ~/tmp/
353 |         flog_root = os.path.join(os.getenv("HOME"), "tmp") #get the path
354 |         if os.path.exists(flog_root):
355 |             #transfer the log files to the "logs" directory - there could be 3 or there could be none
356 |             for f in os.listdir(flog_root):
357 |                 if fnmatch.fnmatch(f, 'file_*.log'): #if the file is one of our log files
358 |                     os.rename(os.path.join(flog_root, f), os.path.join(PATHS["logs"], f)) #then move it
359 |                 else: #all the other files in the directory will be files that were deleted
360 |                     log.info("Adding file %s with size %d", os.path.join(flog_root, f), os.path.getsize(os.path.join(flog_root, f)))
361 |                     os.rename(os.path.join(flog_root, f), os.path.join(PATHS["files"], f)) #move those to the files folder
362 |             # copy over any created files that were not in the deleted files list
363 |             if os.path.exists(os.path.join(PATHS["logs"], "file_creation.log")):
364 |                 #open the log file for reading - it will have one file path per line
365 |                 flog = open(os.path.join(PATHS["logs"], "file_creation.log"))
366 |                 for row in flog.readlines():
367 |                     row = row.strip("\n")
368 |                     # get rid of the timestamp in front
369 |                     split = row.split(":")
370 |                     row = split[len(split)-1].strip(" ")
371 |                     log.info("Looking for %s - exists: %s", row, str(os.path.exists(row)))
372 |                     #if the file has not already been copied, copy it over
373 |                     if os.path.exists(row) and os.path.isfile(row) and not os.path.exists(os.path.join(PATHS["files"], os.path.basename(row))):
374 |                         try:
375 |                             log.info("Adding file %s with size %d", row, os.path.getsize(row))
376 |                             os.rename(row, os.path.join(PATHS["files"], os.path.basename(row)))
377 |                         except IOError:
378 |                             log.error("Failed to extract created file %s.", row)
379 | 
380 |                 flog.close()
381 | 
382 |         # Dump all the relevant files to the host
383 |         for folder, subs, files in os.walk(PATHS["root"]):
384 |             for filename in files:
385 |                 path = os.path.join(folder, filename)
386 |                 add_file(path)
387 | 
388 |         dump_files()
389 | 
390 | 
391 |     def prepare(self):
392 |         """
393 |         Prepare the environment for analysis.
394 |         """
395 |         # Create the folders used for storing the results.
396 |         create_folders()
397 | 
398 |         # Initialize logging.
399 |         init_logging()
400 | 
401 |         # Parse the analysis configuration file generated by the agent.
402 |         self.config = Config(cfg="analysis.conf")
403 | 
404 |         # Initialize and start the Pipe Servers. This is going to be used for
405 |         # communicating with the injected and monitored processes.
406 |         for x in xrange(self.PIPE_SERVER_COUNT):
407 |             self.pipes[x] = PipeServer()
408 |             self.pipes[x].daemon = True
409 |             self.pipes[x].start()
410 | 
411 |         # We update the target according to its category. If it's a file, then
412 |         # we store the path.
413 |         if self.config.category == "file":
414 |             #Note: The /tmp directory is specified in agent.py for linux and darwin
415 |             self.target = os.path.join("/tmp",
416 |                                        str(self.config.file_name))
417 |         # If it's a URL, well.. we store the URL.
418 |         else:
419 |             self.target = self.config.target
420 |         log.info("Target is at %s", self.target)
421 | 
422 |         # Execsnoop traces process creation using Dtrace
423 |         pargs = ["execsnoop", "-a", "-e"]
424 |         results = open(os.path.join(PATHS["logs"], "processes.log"), "a+")
425 |         try:
426 |             proc = subprocess.Popen(pargs, stdout=results, stderr=results)
427 |             log.info("Starting Execsnoop")
428 |         except (OSError, ValueError):
429 |             log.exception("Failed to start execsnoop.")
430 |         results.close()
431 | 
432 |     def run(self):
433 |         """Run analysis.
434 |         @return: operation status.
435 |         """
436 |         #set up the analysis
437 |         self.prepare()
438 | 
439 |         log.info("Starting analyzer from: %s", os.getcwd())
440 |         log.info("Storing results at: %s", PATHS["root"])
441 | 
442 |         # If no analysis package was specified at submission, we try to select
443 |         # one automatically.
444 |         if not self.config.package:
445 |             log.info("No analysis package specified, trying to detect "
446 |                      "it automagically.")
447 |             # If the analysis target is a file, we choose the package according
448 |             # to the file format.
449 |             if self.config.category == "file":
450 |                 package = choose_package(self.config.file_type, self.config.file_name)
451 |             # If it's an URL, try to use Safari
452 |             else:
453 |                 package = "safari"
454 | 
455 |             # If we weren't able to automatically determine the proper package,
456 |             # we need to abort the analysis.
457 |             if not package:
458 |                 raise CuckooError("No valid package available for file "
459 |                                   "type: {0}".format(self.config.file_type))
460 | 
461 |             log.info("Automatically selected analysis package \"%s\"", package)
462 |         # Otherwise just select the specified package.
463 |         else:
464 |             package = self.config.package
465 | 
466 |         # Generate the package path.
467 |         package_name = "modules.packages.%s" % package
468 | 
469 |         # Try to import the analysis package.
470 |         try:
471 |             __import__(package_name, globals(), locals(), ["dummy"], -1)
472 |         # If it fails, we need to abort the analysis.
473 |         except ImportError:
474 |             raise CuckooError("Unable to import package \"{0}\", does "
475 |                               "not exist.".format(package_name))
476 | 
477 |         # Initialize the package parent abstract.
478 |         Package()
479 | 
480 |         # Enumerate the abstract's subclasses.
481 |         try:
482 |             package_class = Package.__subclasses__()[0]
483 |         except IndexError as e:
484 |             raise CuckooError("Unable to select package class "
485 |                               "(package={0}): {1}".format(package_name, e))
486 | 
487 |                 # Initialize the analysis package.
488 |         pack = package_class(self.get_options())
489 | 
490 |         # Initialize Auxiliary modules
491 |         Auxiliary()
492 |         prefix = auxiliary.__name__ + "."
493 |         for loader, name, ispkg in pkgutil.iter_modules(auxiliary.__path__, prefix):
494 |             if ispkg:
495 |                 continue
496 | 
497 |             # Import the auxiliary module.
498 |             try:
499 |                 __import__(name, globals(), locals(), ["dummy"], -1)
500 |             except ImportError as e:
501 |                 log.warning("Unable to import the auxiliary module "
502 |                             "\"%s\": %s", name, e)
503 | 
504 |         # Walk through the available auxiliary modules.
505 |         aux_enabled, aux_avail = [], []
506 |         for module in Auxiliary.__subclasses__():
507 |             # Try to start the auxiliary module.
508 |             try:
509 |                 aux = module()
510 |                 aux_avail.append(aux)
511 |                 aux.start()
512 |             except (NotImplementedError, AttributeError):
513 |                 log.warning("Auxiliary module %s was not implemented",
514 |                             aux.__class__.__name__)
515 |                 continue
516 |             except Exception as e:
517 |                 log.warning("Cannot execute auxiliary module %s: %s",
518 |                             aux.__class__.__name__, e)
519 |                 continue
520 |             finally:
521 |                 log.info("Started auxiliary module %s",
522 |                          aux.__class__.__name__)
523 |                 aux_enabled.append(aux)
524 | 
525 |         # Initialize the analysis package.
526 |         pack = package_class(self.get_options())
527 | 
528 |         # Start analysis package. If for any reason, the execution of the
529 |         # analysis package fails, we have to abort the analysis.
530 |         try:
531 |             pids = pack.start(self.target)
532 |         except NotImplementedError:
533 |             raise CuckooError("The package \"{0}\" doesn't contain a run "
534 |                               "function.".format(package_name))
535 |         except CuckooPackageError as e:
536 |             raise CuckooError("The package \"{0}\" start function raised an "
537 |                               "error: {1}".format(package_name, e))
538 |         except Exception as e:
539 |             raise CuckooError("The package \"{0}\" start function encountered "
540 |                               "an unhandled exception: "
541 |                               "{1}".format(package_name, e))
542 | 
543 |         # If the analysis package returned a list of process IDs, we add them
544 |         # to the list of monitored processes and enable the process monitor.
545 |         if pids:
546 |             add_pids(pids)
547 |             pid_check = True
548 | 
549 |         # If the package didn't return any process ID (for example in the case
550 |         # where the package isn't enabling any behavioral analysis), we don't
551 |         # enable the process monitor.
552 |         else:
553 |             log.info("No process IDs returned by the package, running "
554 |                      "for the full timeout")
555 |             pid_check = False
556 | 
557 | 
558 |         time_counter = 0
559 | 
560 |         while True:
561 |             time_counter += 1
562 |             if time_counter == int(self.config.timeout):
563 |                 log.info("Analysis timeout hit, terminating analysis")
564 |                 break
565 | 
566 |             try:
567 |                 # If the process monitor is enabled we start checking whether
568 |                 # the monitored processes are still alive.
569 |                 if pid_check:
570 |                     for pid in PROCESS_LIST:
571 |                         if not Process(pid=pid).is_alive():
572 |                             log.info("Process with pid %s has terminated", pid)
573 |                             PROCESS_LIST.remove(pid)
574 | 
575 |                     # If none of the monitored processes are still alive, we
576 |                     # can terminate the analysis.
577 |                     if not PROCESS_LIST:
578 |                         log.info("Process list is empty, "
579 |                                  "terminating analysis.")
580 |                         break
581 | 
582 |                     # Update the list of monitored processes available to the
583 |                     # analysis package. It could be used for internal
584 |                     # operations within the module.
585 |                     pack.set_pids(PROCESS_LIST)
586 | 
587 |                 try:
588 |                     # The analysis packages are provided with a function that
589 |                     # is executed at every loop's iteration. If such function
590 |                     # returns False, it means that it requested the analysis
591 |                     # to be terminate.
592 |                     if not pack.check():
593 |                         log.info("The analysis package requested the "
594 |                                  "termination of the analysis...")
595 |                         break
596 | 
597 |                 # If the check() function of the package raised some exception
598 |                 # we don't care, we can still proceed with the analysis but we
599 |                 # throw a warning.
600 |                 except Exception as e:
601 |                     log.warning("The package \"%s\" check function raised "
602 |                                 "an exception: %s", package_name, e)
603 |             finally:
604 |                 # Sleep for one second
605 |                 time.sleep(1)
606 | 
607 |         try:
608 |             # Before shutting down the analysis, the package can perform some
609 |             # final operations through the finish() function.
610 |             pack.finish()
611 |         except Exception as e:
612 |             log.warning("The package \"%s\" finish function raised an "
613 |                         "exception: %s", package_name, e)
614 | 
615 |         # Try to terminate remaining active processes. We do this to make sure
616 |         # that we clean up remaining open handles (sockets, files, etc.).
617 |         log.info("Terminating remaining processes before shutdown...")
618 | 
619 |         for pid in PROCESS_LIST:
620 |             proc = Process(pid=pid)
621 |             if proc.is_alive():
622 |                 try:
623 |                     proc.terminate()
624 |                 except:
625 |                     continue
626 | 
627 | 
628 |         # Call the completion procedure
629 |         self.complete()
630 | 
631 |         return True
632 | 
633 |     def get_options(self):
634 |         """Get analysis options.
635 |         @return: options dict.
636 |         """
637 |         # The analysis package can be provided with some options in the
638 |         # following format:
639 |         #   option1=value1,option2=value2,option3=value3
640 |         #
641 |         # Here we parse such options and provide a dictionary that will be made
642 |         # accessible to the analysis package.
643 |         options = {}
644 |         if self.config.options:
645 |             try:
646 |                 # Split the options by comma.
647 |                 fields = self.config.options.strip().split(",")
648 |             except ValueError as e:
649 |                 log.warning("Failed parsing the options: %s", e)
650 |             else:
651 |                 for field in fields:
652 |                     # Split the name and the value of the option.
653 |                     try:
654 |                         key, value = field.strip().split("=")
655 |                     except ValueError as e:
656 |                         log.warning("Failed parsing option (%s): %s", field, e)
657 |                     else:
658 |                         # If the parsing went good, we add the option to the
659 |                         # dictionary.
660 |                         options[key.strip()] = value.strip()
661 | 
662 |         return options
663 | 
664 | #executed when this file is run
665 | if __name__ == "__main__":
666 |     success = False #did the analyzer successfully run?
667 |     error = "" #any error messages the analyzer returns
668 | 
669 |     try:
670 |         # Initialize the main analyzer class.
671 |         analyzer = Analyzer()
672 | 
673 |         # Run it and wait for the response.
674 |         success = analyzer.run()
675 | 
676 |     # This is not likely to happen.
677 |     except KeyboardInterrupt:
678 |         error = "Keyboard Interrupt"
679 | 
680 |     # If the analysis process encountered a critical error, it will raise a
681 |     # CuckooError exception, which will force the termination of the analysis.
682 |     # Notify the agent of the failure. Also catches unexpected exceptions.
683 |     except Exception as e:
684 |         # Store the error.
685 |         error_exc = traceback.format_exc()
686 |         error = str(e)
687 | 
688 |         # Just to be paranoid.
689 |         if len(log.handlers) > 0:
690 |             log.exception(error_exc)
691 |         else:
692 |             sys.stderr.write("{0}\n".format(error_exc))
693 | 
694 |     # Once the analysis is completed or terminated for any reason, we report
695 |     # back to the agent, notifying that it can report back to the host.
696 |     finally:
697 |         # Establish connection with the agent XMLRPC server.
698 |         server = xmlrpclib.Server("http://127.0.0.1:8000", allow_none=True)
699 |         logging.critical("success: %s, error: %s, PATHS[root]: %s" % (success, error, PATHS["root"]))
700 |         server.complete(success, error, PATHS["root"])


--------------------------------------------------------------------------------
/analyzer/darwin/dylib/cuckoohooks.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 
  3 |  * this work by or on behalf of the U.S. Government. 
  4 |  * NOTICE:
  5 |  * For five (5) years from  the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE.
  6 |  * NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
  7 |  * Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws.
  8 |  * 
  9 |  * This file is the source code of cuckoohooks.dylib for the darwin analyzer of the Cuckoo sandbox.
 10 |  * Using process injection, it hooks system calls of interest to monitor for activity.
 11 |  * The full list of syscalls is in sys/syscall.h
 12 |  * It is compiled using the commands:
 13 |  * gcc -fno-common -c cuckoohooks.c
 14 |  * gcc -dynamiclib -o cuckoohooks.dylib cuckoohooks.o
 15 |  * or, for a 32-and-64-bit one:
 16 |  * gcc -fno-common -c cuckoohooks.c -arch i386
 17 |  * gcc -dynamiclib -o cuckoohooks_32.dylib cuckoohooks.o -arch i386
 18 |  * gcc -fno-common -c cuckoohooks.c -arch x86_64
 19 |  * gcc -dynamiclib -o cuckoohooks_64.dylib cuckoohooks.o -arch x86_64
 20 |  * lipo -create cuckoohooks_32.dylib cuckoohooks_64.dylib -output cuckoohooks.dylib
 21 |  *
 22 |  * A makefile is included for convenience
 23 |  * It is injected at runtime into the desired process with:
 24 |  * DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES=<path>/cuckoohooks.dylib ./<executable>
 25 |  *
 26 |  */
 27 | 
 28 | #include <stdio.h>
 29 | #include <stdlib.h>
 30 | #include <dlfcn.h>
 31 | #include <fcntl.h>
 32 | #include <stdarg.h>
 33 | #include <string.h>
 34 | #include <sys/stat.h>
 35 | #include <errno.h>
 36 | #include <fcntl.h>
 37 | #include <unistd.h>
 38 | #include <libgen.h>
 39 | #include <errno.h>
 40 | #include <time.h>
 41 | #include <sys/param.h>
 42 | #include <sys/uio.h>
 43 | #include <sys/types.h>
 44 | #include <sys/ptrace.h>
 45 | #include <arpa/inet.h>
 46 | #include <spawn.h>
 47 | #include <sys/syscall.h>
 48 | #include <sys/types.h>
 49 | #include <signal.h>
 50 | 
 51 | #include "cuckoohooks.h"
 52 | 
 53 | #define LOGFOLDER "/tmp/"
 54 | #define F_LOGFILE "file_activity.log"
 55 | #define W_LOGFILE "file_writes.log"
 56 | #define C_LOGFILE "file_creation.log"
 57 | #define D_LOGFILE "file_deletion.log"
 58 | 
 59 | int got_config = 0;
 60 | int pipe_open = 0;
 61 | 
 62 | /*
 63 |  * This section contains the hooked syscalls.
 64 |  */
 65 | 
 66 | /*
 67 |  * The open(2) syscall hook. The "..." means this function takes an unspecified number of variables.
 68 |  * We are mostly interested in files being created and files being written, so that we can inform Cuckoo about them.
 69 |  * However, the function will also log all files read, for completeness.
 70 |  * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man2/open.2.html#//apple_ref/doc/man/2/open
 71 |  */
 72 | 
 73 | int open(const char *path, int oflag,...) {
 74 |     bool was_create;
 75 |     //fprintf(stderr, "Starting open within pid %d and file %s and mode %d\n", getpid(), path, oflag);
 76 |     //get a pointer to the real function so we can call it from this wrapper
 77 |     int (*real_open)(const char*, int, ...) =
 78 |     (int (*)(const char*, int,...)) dlsym(RTLD_NEXT, "open");
 79 | 
 80 |     //log the file and its flags to the logfile
 81 |     char* log = timestamp();
 82 |     char* front = append_strings(log, ": ");
 83 |     if (in_logfolder(path)) {
 84 |         switch(oflag & O_ACCMODE) {
 85 |             case O_RDONLY:
 86 |                 write_to_file(append_strings(front, append_strings("Read - ", path)), "FILE_ACTIVITY:");
 87 |             case O_WRONLY:
 88 |                 write_to_file(append_strings(front, append_strings("Write - ", path)), "FILE_ACTIVITY:");
 89 |             case O_RDWR:
 90 |                 write_to_file(append_strings(front, append_strings("ReadWrite - ", path)), "FILE_ACTIVITY:");
 91 |         }
 92 |     }
 93 | 
 94 |     //printf("File %s opened with %d\n", path, oflag); //for debugging
 95 |     
 96 |     //The most essential unnamed argument is the file permissions, so we need to pass that to the real open
 97 |     va_list args;
 98 |     va_start(args,oflag); //list of unnamed arguments
 99 |     int perm = va_arg(args, int); //permissions
100 |     va_end(args);
101 |     
102 |     //check to see if the "create if not exists" flag is set
103 |     //and also if the file exists
104 |     if (((oflag & O_CREAT) != 1) & (file_exists(path) == 0) & in_logfolder(path)) {
105 |         //then log that this file was created
106 |         write_to_file(append_strings(front, path), "FILE_CREATE:");
107 |     }
108 | 
109 |     //call the real function for the results
110 |     int result = real_open(path, oflag, perm);
111 |     if (result == -1) { //if there was an error, note it
112 |          write_to_file(append_strings(front, append_strings("Open failed on ", path)), "FILE_ACTIVITY:");
113 |     }
114 |     return result;
115 | }
116 | 
117 | /*
118 |  * The creat() call is not a system call, but it is hooked for thoroughness.
119 |  * Header can be found  in fcntl.h
120 |  *
121 |  */
122 | 
123 | int	creat(const char *pathname, mode_t mode) {
124 |     //get a pointer to the real function so we can call it from this wrapper
125 |     int (*real_creat)(const char*, mode_t) =
126 |     (int (*)(const char*, mode_t)) dlsym(RTLD_NEXT, "creat");
127 |     
128 |     if (in_logfolder(pathname)) {
129 |         //write to the logfile of created files
130 |         char* log = timestamp();
131 |         char* front = append_strings(log, ": ");
132 |         write_to_file(append_strings(front, pathname), "FILE_CREATE:");
133 |     }
134 |     
135 |     return real_creat(pathname, mode);
136 |     
137 | }
138 | 
139 | /*
140 |  * The unlink command is basically the "delete file" command.
141 |  * In this case, we save all files to be deleted to the ~/tmp folder
142 |  * before doing the delete. These are then exported back to cuckoo for reference.
143 |  * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man2/unlink.2.html
144 |  */
145 | 
146 | int unlink(const char *path) {
147 |     //get a pointer to the real function so we can call it from this wrapper
148 |     int (*real_unlink)(const char*) =
149 |     (int (*)(const char*)) dlsym(RTLD_NEXT, "unlink");
150 |     
151 |     //write the file to the log
152 |     char* log = timestamp();
153 |     char* front = append_strings(log, ": ");
154 |     write_to_file(append_strings(front, path), "FILE_DELETE:");
155 |     
156 |     //copy the file into our results folder
157 |     copy_file(path, g_config.results);
158 |     
159 |     return real_unlink(path);
160 | }
161 | 
162 | /*
163 |  * The rename command, also serves as the "move" command. The new file name
164 |  * is considered to be a "created" file and the old one is considered a "deleted"
165 |  * file, so the end result will have two copies of the file, one under each name.
166 |  * https://developer.apple.com/library/ios/documentation/System/Conceptual/ManPages_iPhoneOS/man2/rename.2.html
167 |  */
168 | int rename(const char *old, const char *new) {
169 |     //get a pointer to the real function so we can call it from this wrapper
170 |     int (*real_rename)(const char*, const char*) =
171 |     (int (*)(const char*, const char*)) dlsym(RTLD_NEXT, "rename");
172 |     
173 |     //write the "old" file to the deleted log
174 |     char* log = timestamp();
175 |     char* front = append_strings(log, ": ");
176 |     write_to_file(append_strings(front, old), "FILE_DELETE:");
177 |     
178 |     //copy the file into our results folder
179 |     copy_file(old, g_config.results);
180 |     
181 |     //write the "new" file to the list of created files
182 |     write_to_file(append_strings(front, new), "FILE_CREATE:");
183 |     
184 |     return real_rename(old, new);
185 | }
186 | 
187 | /*
188 |  * The hooks of the write functions are there to log what files are actually changed.
189 |  * Changed files will also be extracted by Cuckoo
190 |  * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man2/write.2.html
191 |  */
192 | 
193 | ssize_t write(int fildes, const void *buf, size_t nbyte) {
194 |     //get a pointer to the real function so we can call it from this wrapper
195 |     ssize_t (*real_write)(int, const void *, size_t) =
196 |     (ssize_t (*)(int, const void *, size_t)) dlsym(RTLD_NEXT, "write");
197 |     if (nbyte > 0) { //if we are actually writing something
198 |         //get file name from descriptor
199 |         char path[MAXPATHLEN];
200 |         if ((fcntl(fildes, F_GETPATH, path) != -1) & in_logfolder(path))
201 |         {
202 |             //log to the writes log file
203 |             char* log = timestamp();
204 |             char* front = append_strings(log, ": ");
205 |             write_to_file(append_strings(front, path), "FILE_WRITE:");
206 |             //write the bytes to the log file
207 |             char* data = buffer_to_hex(buf, nbyte, 0);
208 |             write_to_file(append_strings("Bytes Written: ", data), "FILE_WRITE:");
209 |             //copy over the file to the results folder
210 |             copy_file(path, g_config.results);
211 |         }
212 |     }
213 |     
214 |     return real_write(fildes, buf, nbyte);
215 | }
216 | 
217 | ssize_t pwrite(int fildes, const void *buf, size_t nbyte, off_t offset) {
218 |     //get a pointer to the real function so we can call it from this wrapper
219 |     ssize_t (*real_pwrite)(int, const void *, size_t, off_t) =
220 |     (ssize_t (*)(int, const void *, size_t, off_t)) dlsym(RTLD_NEXT, "pwrite");
221 |     
222 |     if (nbyte > 0) { //if we are actually writing something
223 |         //get file name from descriptor
224 |         char path[MAXPATHLEN];
225 |         if ((fcntl(fildes, F_GETPATH, path) != -1) & in_logfolder(path))
226 |         {
227 |             //log to the writes log file
228 |             char* log = timestamp();
229 |             char* front = append_strings(log, ": ");
230 |             write_to_file(append_strings(front, path), "FILE_WRITE:");
231 |             //write the bytes to the log file
232 |             char* data = buffer_to_hex(buf, nbyte, offset);
233 |             write_to_file(append_strings("Bytes Written: ", data), "FILE_WRITE:");
234 |             //copy over the file to the results folder
235 |             copy_file(path, g_config.results);
236 |         }
237 |     }
238 |     return real_pwrite(fildes, buf, nbyte, offset);
239 | }
240 | 
241 | ssize_t writev(int fildes, const struct iovec *iov, int iovcnt) {
242 |     //get a pointer to the real function so we can call it from this wrapper
243 |     ssize_t (*real_writev)(int, const struct iovec, int) =
244 |     (ssize_t (*)(int, const struct iovec, int)) dlsym(RTLD_NEXT, "writev");
245 |      
246 |      //get file name from descriptor
247 |      char path[MAXPATHLEN];
248 |      if ((fcntl(fildes, F_GETPATH, path) != -1) & in_logfolder(path))
249 |      {
250 |          //log to the writes log file
251 |          char* log = timestamp();
252 |          char* front = append_strings(log, ": ");
253 |          write_to_file(append_strings(front, path), "FILE_WRITE:");
254 |          //copy over the file to the results folder
255 |          copy_file(path, g_config.results);
256 |      }
257 |      
258 |      return real_writev(fildes, *iov, iovcnt);
259 | }
260 | 
261 | /*
262 |  * Ptrace is of interest because processes calling ptrace with the
263 |  * PT_DENY_ATTACH request can avoid being probed for debugging with Dtrace.
264 |  * This request sets the P_LNOATTACH flag, which is checked by Dtrace.
265 |  * Since Dtrace is a nice tool and we want use it, we can't have that.
266 |  * So any PT_DENY_ATTACH request that comes through gets blocked.
267 |  * https://developer.apple.com/library/ios/documentation/System/Conceptual/ManPages_iPhoneOS/man2/ptrace.2.html
268 |  * http://dtrace.org/blogs/ahl/2008/01/18/mac-os-x-and-the-missing-probes/
269 |  */
270 | int ptrace(int request, pid_t pid, caddr_t addr, int data) {
271 |     //get a pointer to the real function so we can call it from this wrapper
272 |     int (*real_ptrace)(int, pid_t, caddr_t, int) =
273 |     (int (*)(int, pid_t, caddr_t, int)) dlsym(RTLD_NEXT, "ptrace");
274 |     
275 |     //log to the writes log file
276 |     char* log = timestamp();
277 |     char* front = append_strings(log, ": ");
278 |     write_to_file(front, "PTRACE:");
279 |     
280 |     if (request == PT_DENY_ATTACH) { //PT_DENY_ATTACH=31
281 |         return 0; //do nothing for this request
282 |     }
283 |     else {
284 |         return real_ptrace(request, pid, addr, data);
285 |     }
286 | }
287 | 
288 | /*
289 |  * Vfork is the more memory-effecient version of fork. Hooking this allows
290 |  * us to track new spawned processes.
291 |  * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man2/vfork.2.html
292 |  */
293 | pid_t vfork(void) {
294 | 
295 |     //get a pointer to the real function so we can call it from this wrapper
296 |     pid_t (*real_vfork)() =
297 |     (pid_t (*)()) dlsym(RTLD_NEXT, "vfork");
298 |     
299 |     //get the new pid
300 |     pid_t new_process = real_vfork();
301 |     //suspend the process so the injection can happen
302 |     if(new_process > 0) {
303 |         kill(new_process, SIGSTOP);
304 |         //write the new process pid to the pipe
305 |         //The analyzer will add tracking when the command is read
306 |         char* log = timestamp();
307 |         char* front = append_strings(log, ": ");
308 |         sprintf(front, " %d", new_process);
309 |         write_to_file(front, "PROCESS:");
310 |     }
311 |     
312 |     return new_process;
313 | 
314 | }
315 | 
316 | /*
317 |  * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man2/fork.2.html#//apple_ref/doc/man/2/fork
318 |  */
319 | pid_t fork(void) {
320 |     //get a pointer to the real function so we can call it from this wrapper
321 |     pid_t (*real_fork)() =
322 |     (pid_t (*)()) dlsym(RTLD_NEXT, "fork");
323 |     
324 |     //get the new pid
325 |     pid_t new_process = real_fork();
326 |     //suspend the process so the injection can happen
327 |     if(new_process > 0) {
328 |         kill(new_process, SIGSTOP);
329 |         //write the new process pid to the pipe
330 |         //The analyzer will add tracking when the command is read
331 |         char* log = timestamp();
332 |         char* front = append_strings(log, ": ");
333 |         sprintf(front, " %d", new_process);
334 |         write_to_file(front, "PROCESS:");
335 |     }
336 |     else if (new_process == 0) { // Stopping the new process from both the old thread and the new one is necessary for this to work consistently
337 |         kill(getpid(), SIGSTOP);
338 |     }
339 |     
340 |     return new_process;
341 | }
342 | 
343 | /*
344 |  * posix_spawn is what is most commonly used on OS X, since the Cocoa library uses it by default.
345 |  * https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man2/posix_spawn.2.html
346 |  */
347 | int posix_spawn(pid_t *restrict pid, const char *restrict path,
348 |                 const posix_spawn_file_actions_t *file_actions, const posix_spawnattr_t *restrict attrp,
349 |                 char *const argv[restrict], char *const envp[restrict]) {
350 | 
351 |     //get a pointer to the real function so we can call it from this wrapper
352 |     int (*real_posix_spawn)(pid_t *restrict, const char *restrict,
353 |                             const posix_spawn_file_actions_t, const posix_spawnattr_t *restrict,
354 |                             char *const *restrict, char *const *restrict) =
355 |     (int (*)(pid_t *restrict, const char *restrict,
356 |              const posix_spawn_file_actions_t, const posix_spawnattr_t *restrict,
357 |              char *const *restrict, char *const *restrict)) dlsym(RTLD_NEXT, "posix_spawn");
358 |     
359 |     //insert the injected library into the new process' environment variables
360 |     char* environ[3];
361 |     environ[0] = "DYLD_FORCE_FLAT_NAMESPACE=1";
362 |     environ[1] = append_strings("DYLD_INSERT_LIBRARIES=", getenv("DYLD_INSERT_LIBRARIES"));
363 |     
364 |     int result;
365 |     //get the result - the pid will be saved in the pid argument struct
366 |     if (!envp) { //if no arguments were passed, just use our array
367 |         environ[2] = NULL;
368 |         result = real_posix_spawn(pid, path, file_actions, attrp, argv, environ);
369 |     }
370 |     else {
371 |         //calculate the size of the given array
372 |         int size = 0;
373 |         //we know the array is null-terminated, so we look for that to get the size
374 |         char* item = envp[0];
375 |         while (item) {
376 |             size++;
377 |             item = envp[size];
378 |         }
379 |         size++;
380 |         char* both[2+size];
381 |         int i;
382 |         int flag1 = 0;
383 |         int flag2 = 0;
384 |         //copy the given envp array into the new one
385 |         for (i = 0; i < size-1; i++) {
386 |             //avoid duplicate environment variables
387 |             if(strstr(envp[i], "DYLD_FORCE_FLAT_NAMESPACE")) {
388 |                 flag1 = 1;
389 |             }
390 |             if (strstr(envp[i], "DYLD_INSERT_LIBRARIES=")) {
391 |                 flag2 = 1;
392 |             }
393 |             both[i] = envp[i];
394 |         }
395 |         //if necessary, add in the injected library environment variables
396 |         if (flag1 == 0) {
397 |             both[i] = environ[0];
398 |             i++;
399 |         }
400 |         if (flag2 == 0) {
401 |             both[i] = environ[1];
402 |             i++;
403 |         }
404 |         both[i] = NULL; //terminate the array (doesn't matter if it's not the end)
405 | 
406 |         result = real_posix_spawn(pid, path, file_actions, attrp, argv, both);
407 |     }
408 |     //suspend the process so the injection can happen
409 |     if (*pid > 0) {
410 |         kill(*pid, SIGSTOP);
411 |     }
412 |     
413 |     //write the new process pid to the pipe
414 |     //The analyzer will add tracking when the command is read
415 |     char* log = timestamp();
416 |     char* front = append_strings(log, ": ");
417 |     sprintf(front, " %d", *pid);
418 |     write_to_file(front, "PROCESS:");
419 |     
420 |     return result;
421 |     
422 | }
423 | 
424 | int posix_spawnp(pid_t *restrict pid, const char *restrict file,
425 |                  const posix_spawn_file_actions_t *file_actions, const posix_spawnattr_t *restrict attrp,
426 |                  char *const argv[restrict], char *const envp[restrict]) {
427 | 
428 |     //get a pointer to the real function so we can call it from this wrapper
429 |     int (*real_posix_spawnp)(pid_t *restrict, const char *restrict,
430 |                             const posix_spawn_file_actions_t, const posix_spawnattr_t *restrict,
431 |                             char *const *restrict, char *const *restrict) =
432 |     (int (*)(pid_t *restrict, const char *restrict,
433 |              const posix_spawn_file_actions_t, const posix_spawnattr_t *restrict,
434 |              char *const *restrict, char *const *restrict)) dlsym(RTLD_NEXT, "posix_spawnp");
435 |     
436 |     //insert the injected library into the new process' environment variables
437 |     char* environ[3];
438 |     environ[0] = "DYLD_FORCE_FLAT_NAMESPACE=1";
439 |     environ[1] = append_strings("DYLD_INSERT_LIBRARIES=", getenv("DYLD_INSERT_LIBRARIES"));
440 |     
441 |     int result;
442 |     //get the result - the pid will be saved in the pid argument struct
443 |     if (!envp) { //if no arguments were passed, just use our array
444 |         environ[2] = NULL;
445 |         result = real_posix_spawnp(pid, file, file_actions, attrp, argv, environ);
446 |     }
447 |     else {
448 |         //calculate the size of the given array
449 |         int size = 0;
450 |         //we know the array is null-terminated, so we look for that to get the size
451 |         char* item = envp[0];
452 |         while (item) {
453 |             size++;
454 |             item = envp[size];
455 |         }
456 |         size++;
457 |         char* both[2+size];
458 |         int i;
459 |         int flag1 = 0;
460 |         int flag2 = 0;
461 |         //copy the given envp array into the new one
462 |         for (i = 0; i < size-1; i++) {
463 |             //avoid duplicate environment variables
464 |             if(strstr(envp[i], "DYLD_FORCE_FLAT_NAMESPACE")) {
465 |                 flag1 = 1;
466 |             }
467 |             if (strstr(envp[i], "DYLD_INSERT_LIBRARIES=")) {
468 |                 flag2 = 1;
469 |             }
470 |             both[i] = envp[i];
471 |         }
472 |         //if necessary, add in the injected library environment variables
473 |         if (flag1 == 0) {
474 |             both[i] = environ[0];
475 |             i++;
476 |         }
477 |         if (flag2 == 0) {
478 |             both[i] = environ[1];
479 |             i++;
480 |         }
481 |         both[i] = NULL; //terminate the array (doesn't matter if it's not the end)
482 |         
483 |         result = real_posix_spawnp(pid, file, file_actions, attrp, argv, both);
484 |     }
485 |     //suspend the process so the injection can happen
486 |     if (*pid > 0) {
487 |         kill(*pid, SIGSTOP);
488 |     }
489 |     
490 |     //write the new process pid to the pipe
491 |     //The analyzer will add tracking when the command is read
492 |     char* log = timestamp();
493 |     char* front = append_strings(log, ": ");
494 |     sprintf(front, " %d", *pid);
495 |     write_to_file(front, "PROCESS:");
496 |     
497 |     return result;
498 |     
499 | }
500 | 
501 | /*int execve(const char *path, char *const argv[], char *const envp[]) {
502 |     //get a pointer to the real function so we can call it from this wrapper
503 |     int (*real_execve)(const char *, char *const *, char *const *) =
504 |     (int (*)(const char *, char *const *, char *const *)) dlsym(RTLD_NEXT, "execve");
505 |     
506 |     char* log = timestamp();
507 |     char* front = append_strings(log, ": ");
508 |     sprintf(front, " %s", path);
509 |     write_to_file(front, "EXEC:");
510 | 
511 |     return real_execve(path, argv, envp);
512 | }*/
513 | 
514 | 
515 | 
516 | /*
517 |  * This section contains utility functions used by the hooks.
518 |  */
519 | 
520 | /*
521 |  * Prints a string to the log file, with error handling.
522 |  * By default it is just appended.
523 |  */
524 | void write_to_file(const char* str, const char* command) {
525 |     //if we don't have the name of pipe, read it
526 |     if (got_config == 0) {
527 |         read_config();
528 |         got_config = 1;
529 |     }
530 |     struct timespec tim, tim2;
531 |     tim.tv_sec = 0;
532 |     tim.tv_nsec = 50000000L;
533 |     while (pipe_open == 1) {
534 |         nanosleep(&tim , &tim2);
535 |     }
536 |     pipe_open = 1;
537 |     //open the pipe for writing
538 |     FILE *p = fopen(g_config.pipe_name, "w");
539 |     if (p == NULL)
540 |     {
541 |         return;
542 |         //fprintf(stderr, "Error opening pipe %s.\n", g_config.pipe_name);
543 |         //exit(1);
544 |     }
545 |     //write to the file
546 |     fprintf(p, "%s", command);
547 |     fprintf(p, "%s", str);
548 |     fprintf(p, "%s", "\n");
549 |     fflush(p);
550 |     //close the file
551 |     fclose(p);
552 |     pipe_open = 0;
553 | }
554 | 
555 | /*
556 |  * Appends two strings, because C is evil.
557 |  */
558 | char* append_strings(const char* str1, const char* str2) {
559 |     char* result;
560 |     if((result = malloc(strlen(str1)+strlen(str2)+1)) != NULL){
561 |         result[0] = '\0';   // ensures the memory is an empty string
562 |         //concatenate strings
563 |         strcat(result,str1);
564 |         strcat(result,str2);
565 |         return result;
566 |     } else { //if we run out of memory, which hopefully doesn't happen
567 |         return "";
568 |         //fprintf(stderr,"Could not allocate memory for strings.\n");
569 |         //exit(1);
570 |     }
571 | }
572 | 
573 | /*
574 |  * Checks if a file exists, used to see if something is being created.
575 |  * Return 1 if the files exists, 0 if not.
576 |  */
577 | int file_exists(const char* path) {
578 |     FILE* f;
579 |     if ((f = fopen(path, "r")) == NULL) { //file was not opened
580 |         if (errno == ENOENT) {
581 |             return 0; //file does not exist
582 |         } else {
583 |             return 1; //file may exist, but there were other errors.
584 |         }
585 |     } else {
586 |         fclose(f);
587 |     }
588 |     return 1;
589 | }
590 | 
591 | /*
592 |  * Copies the file at "src" into the file at "dest".
593 |  * Note: It will overwrite "dest" if it already exists.
594 |  */
595 | void copy_file(const char* src, const char* dest) {
596 |     unsigned char buffer[4096];
597 |     int err, n;
598 |     
599 |     //get the file name from the path and append it to dest
600 |     dest = append_strings(dest, basename(src));
601 |     
602 |     //open the two files
603 |     int src_file = open(src, O_RDONLY);
604 |     int dest_file = open(dest, O_CREAT|O_WRONLY, 0777);
605 |     
606 |     //write in the bytes 4096 at a time from src to dest
607 |     while (1) {
608 |         err = read(src_file, buffer, 4096);
609 |         if (err == -1) {
610 |             //printf("COPY FILE ERROR (src): %s on file %s\n", strerror(errno), src);
611 |             break;
612 |         }
613 |         n = err;
614 |         
615 |         if (n == 0) break; //stop when we read all of the file
616 |         
617 |         err = write(dest_file, buffer, n);
618 |         if (err == -1) {
619 |             //printf("COPY FILE ERROR (dest): %s on file %s\n", strerror(errno), dest);
620 |             break;
621 |         }
622 |     }
623 |     //close both files
624 |     close(src_file);
625 |     close(dest_file);
626 | }
627 | 
628 | /* Returns the current time in human-readable format for logging */
629 | char* timestamp() {
630 |     char * result = malloc(24);
631 |     time_t ltime;
632 |     ltime=time(NULL); //get calendar time
633 |     sprintf(result, "%s",asctime(localtime(&ltime)));
634 |     result[strcspn(result,"\n")] = '\0'; //strip newlines
635 |     
636 |     //add the pid
637 |     char str[15];
638 |     sprintf(str, " (%d)  ", getpid());
639 |     return append_strings(result, str);
640 | }
641 | 
642 | /* Returns 1 if the action is not in the logging folder
643 |  * and 0 if it is, to avoid logging the injected library's actions.
644 |  */
645 | int in_logfolder(const char *path) {
646 |     //if we don't have the name of the results folder, read it
647 |     if (got_config == 0) {
648 |         read_config();
649 |         got_config = 1;
650 |     }
651 |     if((strstr(path, g_config.results) != NULL) ||  (strstr(path, g_config.pipe_name) != NULL)) { //folder is in path
652 |         return 0;
653 |     }
654 |     else { //folder is not in path
655 |         return 1;
656 |     }
657 | }
658 | 
659 | /*
660 |  * Read in the configuration file for this process, created by api/process.py
661 |  * This is a modified verison of the function found in https://github.com/cuckoobox/cuckoomon/blob/master/config.c
662 |  */
663 | void read_config() {
664 |     
665 |     char buf[512], config_fname[PATH_MAX];
666 |     sprintf(config_fname, "%s%d.conf", getenv("TMPDIR"), getpid());
667 |     FILE *fp = fopen(config_fname, "r");
668 | 
669 |     if (fp == NULL) {
670 |         sprintf(config_fname, "%s%d.conf", getenv("TMPDIR"), getppid());
671 |         FILE *fp2 = fopen(config_fname, "r");
672 |     }
673 |     
674 | 
675 |     if(fp != NULL) {
676 |         while (fgets(buf, sizeof(buf), fp) != NULL) {
677 |             // cut off the newline
678 |             char *p = strchr(buf, '\r');
679 |             if(p != NULL) *p = 0;
680 |             p = strchr(buf, '\n');
681 |             if(p != NULL) *p = 0;
682 |             // split key=value
683 |             p = strchr(buf, '=');
684 |             if(p != NULL) {
685 |                 *p = 0;
686 |                 
687 |                 const char *key = buf, *value = p + 1;
688 |                 if(!strcmp(key, "pipe")) {
689 |                     strncpy(g_config.pipe_name, value,
690 |                             ARRAYSIZE(g_config.pipe_name));
691 |                 }
692 |                 else if(!strcmp(key, "results")) {
693 |                     strncpy(g_config.results, value,
694 |                             ARRAYSIZE(g_config.results));
695 |                 }
696 |                 else if(!strcmp(key, "analyzer")) {
697 |                     strncpy(g_config.analyzer, value,
698 |                             ARRAYSIZE(g_config.analyzer));
699 |                 }
700 |                 else if(!strcmp(key, "first-process")) {
701 |                     g_config.first_process = value[0] == '1';
702 |                 }
703 |                 else if(!strcmp(key, "startup-time")) {
704 |                     g_config.startup_time = atoi(value);
705 |                 }
706 |                 else if(!strcmp(key, "retaddr-check")) {
707 |                     g_config.retaddr_check = value[0] == '1';
708 |                 }
709 |                 else if(!strcmp(key, "host-ip")) {
710 |                     g_config.host_ip = inet_addr(value);
711 |                 }
712 |                 else if(!strcmp(key, "host-port")) {
713 |                     g_config.host_port = atoi(value);
714 |                 }
715 |             }
716 |         }
717 |         fclose(fp);
718 | 
719 |         int (*real_unlink)(const char*) =
720 |         (int (*)(const char*)) dlsym(RTLD_NEXT, "unlink");
721 |         //real_unlink(config_fname);
722 |     }
723 | }
724 | 
725 | /*
726 |  * Converts byte buffers to hex so we can see what the program is writing.
727 |  */
728 | char* buffer_to_hex(const void *buf, size_t size, off_t offset) {
729 |     //sometimes files try to request a ridiculous amount to write, so we only do the first 100 bytes
730 |     if (size > 100) {
731 |         size = 100;
732 |     }
733 |     //copy the buffer
734 |     const void *buf2[size];
735 |     memcpy (buf2, buf, size);
736 |     //make the buffer - each byte will be converted to the form \x<hex>, so 4 chars each
737 |     char * result = malloc((size*4)+1);
738 |     int pos = 0;
739 |     while(size > 0) {
740 |         size--;
741 |         sprintf(result+pos, "\\x%.2x", *((unsigned char*)&(buf[offset])));
742 |         offset++;
743 |         pos += 4;
744 |     }
745 |     return result;
746 | }
747 | 
748 | 
749 | 


--------------------------------------------------------------------------------
/analyzer/darwin/dylib/cuckoohooks.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/dylib/cuckoohooks.dylib


--------------------------------------------------------------------------------
/analyzer/darwin/dylib/cuckoohooks.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Symbols for cuckoohooks.c
 3 |  */
 4 | 
 5 | #include <sys/syslimits.h>
 6 | #include <sys/_types.h>
 7 | #include <sys/_types/_sigset_t.h>
 8 | #include <sys/kernel_types.h>
 9 | #include <sys/proc_info.h>
10 | #ifndef _cuckoohooks_h
11 | #define _cuckoohooks_h
12 | #define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a)))
13 | 
14 | struct open_nocancel_args {
15 | 	//char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)];
16 | 	//char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
17 | 	//char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
18 | };
19 | 
20 | int open(const char *path, int oflag,...);
21 | int	creat(const char *pathname, mode_t mode);
22 | int unlink(const char *path);
23 | int rename(const char *old, const char *new);
24 | ssize_t write(int fildes, const void *buf, size_t nbyte);
25 | ssize_t pwrite(int fildes, const void *buf, size_t nbyte, off_t offset);
26 | ssize_t writev(int fildes, const struct iovec *iov, int iovcnt);
27 | int ptrace(int request, pid_t pid, caddr_t addr, int data);
28 | pid_t vfork(void);
29 | pid_t fork(void);
30 | int posix_spawn(pid_t *restrict pid, const char *restrict path,
31 |             const posix_spawn_file_actions_t *file_actions, const posix_spawnattr_t *restrict attrp,
32 |             char *const argv[restrict], char *const envp[restrict]);
33 | int posix_spawnp(pid_t *restrict pid, const char *restrict file,
34 |              const posix_spawn_file_actions_t *file_actions, const posix_spawnattr_t *restrict attrp,
35 |              char *const argv[restrict], char *const envp[restrict]);
36 | //int execve(const char *path, char *const argv[], char *const envp[]);
37 | 
38 | void write_to_file(const char* str, const char* command);
39 | char* append_strings(const char* str1, const char* str2);
40 | int file_exists(const char* path);
41 | void copy_file(const char* src, const char* dest);
42 | char* timestamp();
43 | int in_logfolder(const char* path);
44 | char* buffer_to_hex(const void *buf, size_t size, off_t offset);
45 | 
46 | struct {
47 |     // name of the pipe to communicate with cuckoo
48 |     char pipe_name[PATH_MAX];
49 |     
50 |     // results directory, has to be hidden
51 |     char results[PATH_MAX];
52 |     
53 |     // analyzer directory, has to be hidden
54 |     char analyzer[PATH_MAX];
55 |     
56 |     // is this the first process or not?
57 |     int first_process;
58 |     
59 |     // how many milliseconds since startup
60 |     unsigned int startup_time;
61 |     
62 |     // do we want to enable the retaddr check?
63 |     int retaddr_check;
64 |     
65 |     // server ip and port
66 |     unsigned int host_ip;
67 |     unsigned short host_port;
68 | } g_config;
69 | 
70 | void read_config();
71 | 
72 | #endif
73 | 


--------------------------------------------------------------------------------
/analyzer/darwin/dylib/cuckoohooks.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/dylib/cuckoohooks.o


--------------------------------------------------------------------------------
/analyzer/darwin/dylib/cuckoohooks_32.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/dylib/cuckoohooks_32.dylib


--------------------------------------------------------------------------------
/analyzer/darwin/dylib/cuckoohooks_64.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/dylib/cuckoohooks_64.dylib


--------------------------------------------------------------------------------
/analyzer/darwin/dylib/makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	gcc -fno-common -O0 -g -c cuckoohooks.c -arch i386
3 | 	gcc -dynamiclib -o cuckoohooks_32.dylib cuckoohooks.o -arch i386
4 | 	gcc -fno-common -O0 -g -c cuckoohooks.c -arch x86_64
5 | 	gcc -dynamiclib -o cuckoohooks_64.dylib cuckoohooks.o -arch x86_64
6 | 	lipo -create cuckoohooks_32.dylib cuckoohooks_64.dylib -output cuckoohooks.dylib
7 | 


--------------------------------------------------------------------------------
/analyzer/darwin/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/lib/__init__.py


--------------------------------------------------------------------------------
/analyzer/darwin/lib/api/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/analyzer/darwin/lib/api/apitrace:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | # #!/usr/bin/sh
  3 | #
  4 | # dapptrace - trace user and library function usage.
  5 | #             Written using DTrace (Solaris 10 3/05).
  6 | #
  7 | # The default output traces user functions as they are called. Options
  8 | #  can be used to examine libraries and timestamps.
  9 | #
 10 | # 17-Jun-2005, ver 0.61         (early release! check for newer versions)
 11 | #
 12 | # USAGE: dapptrace [-acdeFlhoU] [-u lib] { -p PID | command }
 13 | #
 14 | #          -p PID          # examine this PID
 15 | #          -a              # print all details
 16 | #          -d              # print relative timestamps (us)
 17 | #          -e              # print elapsed times (us)
 18 | #          -F              # print flow indentation
 19 | #          -l              # print pid/lwpid per line
 20 | #          -o              # print on cpu times (us)
 21 | #          -u lib          # trace this library instead
 22 | #          -U              # trace all libraries + user functions
 23 | #          -b bufsize      # dynamic variable buf size (default is "4m")
 24 | #  eg,
 25 | #       dapptrace df -h       # run and examine the "df -h" command
 26 | #       dapptrace -p 1871     # examine PID 1871
 27 | #       dapptrace -Fp 1871    # print using flow indents
 28 | #       dapptrace -eop 1871   # print elapsed and CPU times
 29 | #
 30 | # The elapsed times are interesting, to help identify calls that take
 31 | #  some time to complete (during which the process may have context
 32 | #  switched off the CPU). 
 33 | #
 34 | # SEE ALSO: dappprof       # DTraceToolkit
 35 | #           dtruss         # DTraceToolkit
 36 | #           apptrace
 37 | #
 38 | # COPYRIGHT: Copyright (c) 2005 Brendan Gregg.
 39 | #
 40 | # CDDL HEADER START
 41 | #
 42 | #  The contents of this file are subject to the terms of the
 43 | #  Common Development and Distribution License, Version 1.0 only
 44 | #  (the "License").  You may not use this file except in compliance
 45 | #  with the License.
 46 | #
 47 | #  You can obtain a copy of the license at Docs/cddl1.txt
 48 | #  or http://www.opensolaris.org/os/licensing.
 49 | #  See the License for the specific language governing permissions
 50 | #  and limitations under the License.
 51 | #
 52 | # CDDL HEADER END
 53 | #
 54 | # Author: Brendan Gregg  [Sydney, Australia]
 55 | #
 56 | # 16-May-2005   Brendan Gregg   Created this.
 57 | #
 58 | 
 59 | #
 60 | # NOTE: THIS DAPPTRACE HAS BEEN STRIPPED DOWN AND MODIFIED FOR PERFORMANCE REASONS
 61 | #
 62 | 
 63 | 
 64 | ##############################
 65 | # --- Process Arguments ---
 66 | #
 67 | 
 68 | ### Default variables
 69 | opt_pid=0; pid=0; opt_indent=0; opt_lib=0; lib=""
 70 | opt_liball=0
 71 | opt_command=0; command=""; opt_buf=0; buf="10m"
 72 | 
 73 | ### Process options
 74 | while getopts ab:cdhp:u:U name
 75 | do
 76 |         case $name in
 77 |     a)      opt_liball=1;;
 78 | 	b)	opt_buf=1; buf=$OPTARG ;;
 79 |         p)      opt_pid=1; pid=$OPTARG ;;
 80 |         u)      opt_lib=1; lib=$OPTARG ;;
 81 |         U)      opt_liball=1 ;;
 82 |         h|?)    cat <<-END >&2
 83 | 		USAGE: dapptrace [-acdeholFLU] [-u lib] { -p PID | command }
 84 | 
 85 | 		          -p PID          # examine this PID
 86 | 		          -a              # print all details
 87 | 		          -u lib1,lib2... # trace given libraries
 88 | 		          -U              # trace all libraries + user funcs
 89 | 		          -b bufsize      # dynamic variable buf size
 90 | 		   eg,
 91 | 		       dapptrace df -h       # run and examine "df -h"
 92 | 		       dapptrace -p 1871     # examine PID 1871
 93 | 		END
 94 | 		exit 1
 95 |         esac
 96 | done
 97 | shift `expr $OPTIND - 1`
 98 | 
 99 | ### Option logic
100 | if [ $opt_pid -eq 0 ]; then
101 | 	opt_command=1
102 | 	if [ "$*" = "" ]; then
103 | 		$0 -h
104 | 		exit
105 | 	fi
106 | 	command="$*"
107 | fi
108 | 
109 | ### Probe logic
110 | if [ $opt_liball -eq 1 ]; then
111 | 	probe_entry='pid$target:::entry'
112 | 	probe_return='pid$target:::return'
113 | elif [ $opt_lib -eq 1 ]; then
114 |     IFS=","
115 |     #create the list of probes
116 |     for l in $lib
117 |     do
118 |     probe_entry=$probe_entry'pid$target:'$l'::entry'$IFS
119 | 	probe_return=$probe_return'pid$target:'$l'::return'$IFS
120 | 	done
121 | 
122 | 	#strip the extra ending commas
123 |     probe_entry=${probe_entry%?}
124 |     probe_return=${probe_return%?}
125 | 
126 | 	#Original code
127 | 	#probe_entry='pid$target:'$lib'::entry'
128 | 	#probe_return='pid$target:'$lib'::return'
129 | else
130 |  	probe_entry='pid$target:a.out::entry'
131 |  	probe_return='pid$target:a.out::return'
132 | fi
133 | 
134 | #################################
135 | # --- Main Program, DTrace ---
136 | #
137 | 
138 | ### Define D Script
139 | dtrace='
140 |  #pragma D option quiet
141 | 
142 |  /*
143 |   * Command line arguments
144 |   */
145 |  inline int OPT_command   = '$opt_command';
146 |  inline int OPT_liball    = '$opt_liball';
147 |  inline int OPT_pid       = '$opt_pid';
148 |  inline int PID           = '$pid';
149 |  inline string NAME       = "'$pname'";
150 | 
151 |  dtrace:::BEGIN 
152 |  {
153 | 	/* print header */
154 | 	printf("%-8s  ","PID/THRD");
155 | 	printf("CALL(args) \t\t = return\n");
156 | 
157 | 	/* indent depth */
158 | 	depth = 0;
159 |  }
160 | 
161 |  /*
162 |   * Save syscall entry info
163 |   */
164 |  '$probe_entry'
165 |  /depth == 0/
166 |  {
167 | 	/* set function depth */
168 | 	this->fdepth = ++fdepth[probefunc];
169 | 	depth += 2;
170 | 
171 | 	/* set start details */
172 | 	self->start[probefunc,this->fdepth] = timestamp;
173 | 
174 | 	/* print optional fields */
175 | 	printf("%5d/0x%x:  ",pid,tid);
176 | 	printf("%*s",depth,""); /* print indentation */
177 | 
178 | 	/* print main data */
179 | 	printf("-> ");
180 | 	printf("%s:",probemod); /* print the library name */
181 | 	printf("%s(0x%X, 0x%X, 0x%X)\t\t\n",probefunc,arg0,arg1,arg2);
182 | 
183 |  }
184 | 
185 |  /*
186 |   * Print return data
187 |   */
188 |  /* print 3 arg output - default */
189 |  '$probe_return'
190 |  /depth == 2 && self->start[probefunc,fdepth[probefunc]]/
191 |  {
192 | 	/* fetch function depth */
193 | 	this->fdepth = fdepth[probefunc];
194 | 
195 | 	/* print optional fields */
196 | 	printf("%5d/0x%x:  ",pid,tid);
197 | 	printf("%*s",depth,""); /* print indentation */
198 | 
199 | 	/* print main data */
200 | 	printf("<- ");
201 | 	printf("%s:",probemod); /* print library name */
202 | 	printf("%s = %d\n",probefunc,(int)arg0);
203 | 	depth -= 2;
204 | 	fdepth[probefunc]--;
205 |  }
206 | 
207 |  /* reset indent depth */
208 |  /* SOLARIS: profile:::tick-1sec */
209 |  profile:::tick-10Hz
210 |  {
211 | 	/* 
212 | 	 * some probes generated by the pid provider have entries
213 |  	 * but not returns. this is a kludge to fix that problem. this
214 | 	 * also explains fdepth[probefunc] rather than a single depth.
215 | 	 */
216 | 	depth = 0;
217 |  }
218 | 
219 |  dtrace:::END
220 | '
221 | 
222 | ### Run DTrace
223 | if [ $opt_command -eq 1 ]; then
224 | 	/usr/sbin/dtrace -Z -x dynvarsize=$buf -x evaltime=exec -n "$dtrace" \
225 | 	    -c "$command" >&2
226 | else
227 | 	/usr/sbin/dtrace -Z -x dynvarsize=$buf -n "$dtrace" -p "$pid" >&2
228 | fi
229 | 
230 | 


--------------------------------------------------------------------------------
/analyzer/darwin/lib/api/process.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 
  3 | this work by or on behalf of the U.S. Government. 
  4 | NOTICE:
  5 | For five (5) years from  the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE.
  6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
  7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws.
  8 | 
  9 | Provides the dylib injection functionality and a wrapper for process execution.
 10 | The classes that extend this for different file types are found in darwin/modules/packages.
 11 | """
 12 | 
 13 | import os
 14 | import logging
 15 | import random
 16 | import sys
 17 | import time
 18 | import subprocess
 19 | from shutil import copy
 20 | import signal
 21 | 
 22 | from lib.common.rand import random_string
 23 | from lib.common.constants import PATHS, PIPE, SHUTDOWN_MUTEX
 24 | from lib.core.config import Config #parses the analysis.conf configuration file
 25 | 
 26 | log = logging.getLogger(__name__)
 27 | 
 28 | #This is the all-important list of APIs to trace
 29 | #There is an overhead for each one you add, so choose them carefully. Some examples are below.
 30 | api_traces = ["libSystem*", #provides libc API as well as access to kernel methods and low-level system calls
 31 |               "CoreFoundation", #Provides primitives, data structures, etc.
 32 |               "Foundation", #data structure support
 33 |               "CoreServices", #access to things like Bonjour, Spotlight, AppleEvents, etc.
 34 |               "libgcc*", #gcc, obviously
 35 |               ]
 36 | 
 37 | def randomize_dylib(dylib_path):
 38 |     """Randomize dylib name, to prevent easy detection by malware.
 39 |     @return: new dylib path.
 40 |     """
 41 |     new_dylib_name = random_string(6) # generate a random name
 42 |     # make a path to the random name in the current working directory
 43 |     new_dylib_path = os.path.join(os.getcwd(), "dylib", "{0}.dylib".format(new_dylib_name))
 44 | 
 45 |     try:
 46 |         # copy the dylib file to the new path in the current working directory
 47 |         copy(dylib_path, new_dylib_path)
 48 |         return new_dylib_path
 49 |     except: #if this fails, just return the old path
 50 |         return dylib_path
 51 | 
 52 | class Process:
 53 |     """ OS X process """
 54 |     first_process = True
 55 |     cuckoohooks = "cuckoohooks.dylib"
 56 |     startup_time = 0
 57 | 
 58 |     def __init__(self, pid=0, h_process=None, thread_id=0, h_thread=0):
 59 |         """@param pid: PID.
 60 |         @param h_process: process handle.
 61 |         @param thread_id: thread id.
 62 |         @param h_thread: thread handle.
 63 |         """
 64 |         self.pid = pid
 65 |         self.h_process = h_process
 66 |         self.thread_id = thread_id
 67 |         self.h_thread = h_thread
 68 |         self.suspended = True #assume it is suspended to start with
 69 |         self.event_handle = None
 70 |         self.is_tracing = False
 71 | 
 72 |     def execute(self, path, args=None, suspended=False):
 73 |         """Execute sample process.
 74 |         @param path: sample path.
 75 |         @param args: process args.
 76 |         @param suspended: is suspended.
 77 |         @return: operation status.
 78 |         """
 79 |         #check to make sure the file is accessible
 80 |         if os.access(path, os.F_OK):
 81 |             log.info("File exists at path \"%s\"", path)
 82 | 
 83 |         #by default you can't execute in the /tmp directory, so have to change permissions
 84 |         i = 0
 85 |         while not os.access(path, os.X_OK) and i < 2:
 86 |             os.chmod(path, 0755)
 87 |             i += 1
 88 |         if not os.access(path, os.X_OK):
 89 |             log.error("No permissions to execute file at path \"%s\", "
 90 |                       "execution aborted", path)
 91 |             return False
 92 | 
 93 |         # fork a child process
 94 |         # Note: this could also be done with the subprocess or multiprocessing modules
 95 |         # but neither of them gave the independence I was looking for.
 96 |         try:
 97 |             newpid = os.fork()
 98 |         except OSError, e:
 99 |             log.error("Failed to execute process from path \"%s\" with "
100 |                       "arguments \"%s\" (Error: %s)", path, args, e)
101 |             return False
102 | 
103 |         # randomize the hooking library name
104 |         dylib = randomize_dylib(os.path.join("dylib", self.cuckoohooks))
105 | 
106 |         if newpid == 0: #if this is the child process
107 |             #set the environment variables for the syscall hook injection
108 |             new_environ = os.environ
109 |             new_environ['DYLD_FORCE_FLAT_NAMESPACE'] = '1'
110 |             new_environ['DYLD_INSERT_LIBRARIES'] = dylib
111 |             log.info("Child process with pid %d", os.getpid())
112 |             self.pid = os.getpid()
113 | 
114 |             Process.first_process = False
115 |             # set the sid to make this child process independent of the parent
116 |             os.setsid()
117 | 
118 |             # wait for traces to be initialized
119 |             app_log = os.path.join(PATHS["logs"], "api_calls_"+str(self.pid)+".log")
120 |             while not os.path.exists(app_log):
121 |                 time.sleep(0.3)
122 |             # execute the given executable
123 |             if args is None:
124 |                 os.execve(path, (path,), new_environ)
125 |             else:
126 |                 os.execve(path, args, new_environ)
127 | 
128 |             #exit when finished
129 |             os._exit(0)
130 |         else: #this is in the parent process
131 |             log.info("Parent process with pid %d", os.getpid())
132 |             #store the child process info
133 |             self.pid = newpid
134 |             self.h_process = psutil.Process(self.pid)
135 | 
136 |             self.start_trace()
137 | 
138 |             log.info("Successfully executed process from path \"%s\" with "
139 |                      "arguments \"%s\" with pid %d", path, args, self.pid)
140 | 
141 |             return True
142 | 
143 |     def start_trace(self):
144 |         """
145 |         Once a process has been started, write the library config file
146 |         and start the system call tracing.
147 |         @return: None
148 |         """
149 | 
150 |         # write configuration file for injected library
151 |         config_path = os.path.join(os.getenv("TMPDIR"), "%s.conf" % self.pid)
152 |         log.info("Writing configuration file at %s.", config_path)
153 |         with open(config_path, "w") as config:
154 |             cfg = Config("analysis.conf")
155 | 
156 |             # The first time we come up with a random startup-time.
157 |             if Process.first_process:
158 |                 # This adds 1 up to 30 times of 20 minutes to the startup
159 |                 # time of the process, therefore bypassing anti-vm checks
160 |                 # which check whether the VM has only been up for <10 minutes.
161 |                 Process.startup_time = random.randint(1, 30) * 20 * 60 * 1000
162 | 
163 |             config.write("host-ip={0}\n".format(cfg.ip))
164 |             config.write("host-port={0}\n".format(cfg.port))
165 |             config.write("pipe={0}\n".format(PIPE))
166 |             config.write("results={0}\n".format(PATHS["drop"]+"/"))
167 |             config.write("analyzer={0}\n".format(os.getcwd()))
168 |             config.write("first-process={0}\n".format(Process.first_process))
169 |             config.write("startup-time={0}\n".format(Process.startup_time))
170 |             config.write("shutdown-mutex={0}\n".format(SHUTDOWN_MUTEX))
171 | 
172 |             Process.first_process = False
173 | 
174 |         # Start system call tracing
175 |         # Dtruss traces system calls using Dtrace
176 |         pargs = ["dtruss", "-l", "-p", str(self.pid)]
177 |         truss_log = os.path.join(PATHS["logs"], "system_calls_"+str(self.pid)+".log")
178 |         results = open(truss_log, "a+")
179 |         try:
180 |             proc2 = subprocess.Popen(pargs, stdout=results, stderr=results)
181 |             log.info("Starting Dtruss on pid %d", self.pid)
182 |         except (OSError, ValueError):
183 |             log.exception("Failed to start system call monitor.")
184 |         results.close()
185 | 
186 |         # Wait for initialization lines to appear in log files
187 |         while os.path.getsize(truss_log) == 0:
188 |             time.sleep(0.5)
189 | 
190 |         # Dapptrace traces API calls using Dtrace. I used my own version modified for performance
191 |         # NOTE: This slows down the program A LOT if you use the -U option (tracks all libraries) instead of -u
192 |         os.chmod("lib/api/apitrace", 0755)
193 |         pargs = ["lib/api/apitrace", "-u", ",".join(api_traces), "-p", str(self.pid)]
194 |         app_log = os.path.join(PATHS["logs"], "api_calls_"+str(self.pid)+".log")
195 |         results2 = open(app_log, "a+")
196 |         try:
197 |             proc1 = subprocess.Popen(pargs, stdout=results2, stderr=results2)
198 |             log.info("Starting apitrace on pid %d", self.pid)
199 |         except (OSError, ValueError):
200 |             log.exception("Failed to start api call monitor.")
201 |         results2.close()
202 | 
203 |         # wait for initialization lines to appear in log files
204 |         while os.path.getsize(app_log) == 0:
205 |             time.sleep(0.5)
206 | 
207 | 
208 |         self.is_tracing = True
209 |         self.resume()
210 | 
211 |     def is_alive(self):
212 |         """Process is alive?
213 |         @return: process status.
214 |         """
215 |         exists = True
216 |         if not self.h_process:
217 |             exists = self.open()
218 | 
219 |         if not exists: #program has already exited
220 |             return False
221 | 
222 |         #make sure the process is both in the table and not a zombie (ie, terminated)
223 |         return self.h_process.is_running() and not (self.h_process.status() == psutil.STATUS_ZOMBIE)
224 | 
225 |     def get_filepath(self):
226 |         """Get process image file path.
227 |         @return: decoded file path.
228 |         """
229 |         if not self.h_process:
230 |             self.open()
231 | 
232 |         return self.h_process.name()
233 | 
234 | 
235 |     def exit_code(self):
236 |         """Get process exit code.
237 |         @return: exit code value.
238 |         """
239 |         if not self.h_process:
240 |             self.open()
241 | 
242 |         return os.waitpid(self.pid)
243 | 
244 |     def open(self):
245 |         """Open a process and/or thread.
246 |         @return: operation status.
247 |         """
248 |         ret = bool(self.pid or self.thread_id)
249 |         if self.pid and not self.h_process:
250 |             try:
251 |                 self.h_process = psutil.Process(self.pid)
252 |                 ret = True
253 |             except: #unable to get process
254 |                 ret = False
255 | 
256 |         return ret
257 | 
258 |     def get_parent_pid(self):
259 |         """Get the Parent Process ID."""
260 |         if not self.h_process:
261 |             self.open()
262 | 
263 |         return self.h_process.ppid()
264 | 
265 |     def terminate(self):
266 |         """Terminate process.
267 |         @return: operation status.
268 |         """
269 |         if self.h_process == 0:
270 |             self.open()
271 | 
272 |         pargs = ["kill", str(self.pid)]
273 |         count = 0 #sometimes this requires multiple tries
274 |         while self.h_process.status() == psutil.STATUS_RUNNING:
275 |             # Note: both self.h_process.terminate() and os.kill were unreliable for termination
276 |             log.info("Attempting to kill process " + str(self.pid) + ", attempt " + str(count))
277 |             proc = subprocess.Popen(pargs)
278 |             count +=1
279 |             if count > 5:
280 |                 break
281 |         if self.h_process.status() == psutil.STATUS_ZOMBIE or self.h_process.status() == psutil.STATUS_DEAD:
282 |             log.info("Successfully terminated process with pid %d", self.pid)
283 |             return True
284 |         else:
285 |             log.error("Failed to terminate process with pid %d", self.pid)
286 |             return False
287 | 
288 |     def resume(self):
289 |         """Resume a suspended thread.
290 |         @return: operation status.
291 |         """
292 |         if not self.suspended:
293 |             log.warning("The process with pid %d was not suspended, so it was not resumed"
294 |                         % self.pid)
295 |             return False
296 | 
297 |         if self.is_tracing: # only resume when Dtrace probes are in place
298 |             pargs = ["kill", "-SIGCONT", str(self.pid)]
299 |             count = 0 #sometimes this requires multiple tries
300 |             while self.h_process.status() == psutil.STATUS_STOPPED:
301 |                 # Note: both self.h_process.resume() and os.kill were unreliable for resuming
302 |                 log.info("Attempting to resume process " + str(self.pid) + ", attempt " + str(count))
303 |                 proc = subprocess.Popen(pargs)
304 |                 count +=1
305 |                 if count > 5:
306 |                     break
307 |             if self.h_process.status() == psutil.STATUS_RUNNING:
308 |                 log.info("Successfully resumed process with pid %d", self.pid)
309 |                 return True
310 |             else:
311 |                 log.error("Failed to resume process with pid %d", self.pid)
312 |                 return False
313 | 
314 | if __name__ == '__main__':
315 |     print "Why would you do that?"


--------------------------------------------------------------------------------
/analyzer/darwin/lib/api/screenshot.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 
  3 | this work by or on behalf of the U.S. Government. 
  4 | NOTICE:
  5 | For five (5) years from  the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE.
  6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
  7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws.
  8 | """
  9 | 
 10 | import math
 11 | import sys
 12 | import logging
 13 | import Quartz.CoreGraphics as CG
 14 | from AppKit import *
 15 | from Quartz import NSURL, CGImageDestinationCreateWithURL, CGImageDestinationAddImage, CGImageDestinationFinalize
 16 | from LaunchServices import kUTTypePNG
 17 | import os
 18 | import tempfile
 19 | 
 20 | sys.path.append('../common')
 21 | 
 22 | try:
 23 |     import ImageChops
 24 |     import Image
 25 |     import ImageOps
 26 |     HAVE_PIL = True
 27 | except:
 28 |     HAVE_PIL = False
 29 | 
 30 | log = logging.getLogger(__name__)
 31 | 
 32 | class Screenshot:
 33 |     """Get screenshots."""
 34 | 
 35 |     def have_pil(self):
 36 |         """Is Python Image Library installed?
 37 |         @return: installed status.
 38 |         """
 39 |         return HAVE_PIL
 40 | 
 41 |     def equal(self, img1, img2):
 42 |         """Compares two screenshots using Root-Mean-Square Difference (RMS).
 43 |         @param img1: screenshot to compare.
 44 |         @param img2: screenshot to compare.
 45 |         @return: equal status.
 46 |         """
 47 |         if not HAVE_PIL:
 48 |             return None
 49 | 
 50 |         # To get a measure of how similar two images are, we use
 51 |         # root-mean-square (RMS). If the images are exactly identical,
 52 |         # this value is zero.
 53 |         diff = ImageChops.difference(img1, img2)
 54 |         h = diff.histogram()
 55 |         sq = (value * ((idx % 256)**2) for idx, value in enumerate(h))
 56 |         sum_of_squares = sum(sq)
 57 |         rms = math.sqrt(sum_of_squares/float(img1.size[0] * img1.size[1]))
 58 | 
 59 |         # Might need to tweak the threshold. I have set it sensitive enough so that it should
 60 |         # detect installer changes but not so sensitive that it triggers every second with Activity Monitor open
 61 |         return rms < 50
 62 | 
 63 |     def take(self):
 64 |         """Take a screenshot.
 65 |         Unfortunately, PIL's screenshot functions are Windows-only.
 66 |         Most OS X solutions in Python involve loading an entire GUI library, like gtk or wxPython,
 67 |         or using the built-in screencapture program and then reading the screenshot from disk.
 68 |         In the interest of speed, I have tried to avoid those things by using PyObjC.
 69 |         @return: screenshot or None.
 70 |         """
 71 | 
 72 |         #get the size of the screen
 73 |         rect = NSScreen.mainScreen().frame()
 74 |         size = (int(rect.size.width), int(rect.size.height))
 75 | 
 76 |         try:
 77 |             image = CG.CGWindowListCreateImage(rect, CG.kCGWindowListOptionOnScreenOnly, CG.kCGNullWindowID, CG.kCGWindowImageDefault)
 78 |         except:
 79 |             log.exception("Unable to take screenshot.")
 80 | 
 81 |         # write the image to a temporary file in memory to save I/O performance
 82 |         f = tempfile.NamedTemporaryFile(mode='rwb+', delete=False)
 83 |         url = NSURL.fileURLWithPath_(f.name)
 84 |         destination = CGImageDestinationCreateWithURL(url, kUTTypePNG, 1, None)
 85 |         CGImageDestinationAddImage(destination, image, None)
 86 |         CGImageDestinationFinalize(destination)
 87 |         f.flush()
 88 |         f.seek(0)
 89 |         f.close()
 90 | 
 91 |         # Convert CGImage to PIL image for comparisons
 92 |         pil_image = Image.open(f.name)
 93 | 
 94 |         os.unlink(f.name)
 95 | 
 96 |         return pil_image
 97 | 
 98 |     def image_fix(self, image):
 99 |         #invert the color
100 |         inverted_image = image
101 |         """
102 |         if image.mode == 'RGBA':
103 |             r, g, b, a = image.split()
104 |             rgb_image = Image.merge('RGB', (r,g,b))
105 |             inverted_image = ImageOps.invert(rgb_image)
106 |             r2, g2, b2 = inverted_image.split()
107 |             inverted_image = Image.merge('RGBA', (r2,g2,b2,a))
108 | 
109 |         else: #this nice simple method only works with RGB
110 |             inverted_image = ImageOps.invert(image)
111 |         """
112 | 
113 |         # Rotate and flip horizontally to the correct orientation
114 |         return inverted_image.rotate(180).transpose(Image.FLIP_LEFT_RIGHT)


--------------------------------------------------------------------------------
/analyzer/darwin/lib/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/lib/common/__init__.py


--------------------------------------------------------------------------------
/analyzer/darwin/lib/common/abstracts.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Extremely bare-bones copy of the Cuckoo Package-Process structure.
  3 | I have converted what I could for OS X and scrapped the rest.
  4 | """
  5 | 
  6 | import os
  7 | import os.path
  8 | import plistlib
  9 | import logging
 10 | 
 11 | from lib.api.process import Process
 12 | from lib.common.exceptions import CuckooPackageError
 13 | 
 14 | #get logger
 15 | log = logging.getLogger()
 16 | 
 17 | class Package(object):
 18 |     """Base abstact analysis package."""
 19 |     PATHS = []
 20 | 
 21 |     def __init__(self, options={}):
 22 |         """@param options: options dict."""
 23 |         self.options = options
 24 |         self.pids = []
 25 | 
 26 |     def set_pids(self, pids):
 27 |         """Update list of monitored PIDs in the package context.
 28 |         @param pids: list of pids.
 29 |         """
 30 |         self.pids = pids
 31 | 
 32 |     def start(self):
 33 |         """Run analysis packege.
 34 |         @param path: sample path.
 35 |         @raise NotImplementedError: this method is abstract.
 36 |         """
 37 |         raise NotImplementedError
 38 | 
 39 |     def check(self):
 40 |         """Check."""
 41 |         return True
 42 | 
 43 |     def _enum_paths(self):
 44 |         raise NotImplementedError
 45 | 
 46 |     def get_path(self, application):
 47 |         raise NotImplementedError
 48 | 
 49 |     def execute(self, path, args):
 50 | 
 51 |         p = Process()
 52 |         if not p.execute(path=path, args=args, suspended=True):
 53 |             raise CuckooPackageError("Unable to execute the initial process, "
 54 |                                      "analysis aborted.")
 55 |         return p.pid
 56 | 
 57 |     def finish(self):
 58 |         """Finish run.
 59 |         If specified to do so, this method dumps the memory of
 60 |         all running processes.
 61 |         """
 62 |         return True
 63 | 
 64 |     def getAppFilePath(self, file_path):
 65 |         # the reason we don't just do "open file" is because OS X has restrictions on open that make it hard to trace
 66 |         # and you can't get the started process' pid from it
 67 |         open = "/usr/bin/open"
 68 | 
 69 |         #find Info.plist
 70 |         plist = ""
 71 |         path = self.findFile(file_path, "Info.plist")
 72 | 
 73 |         if path == "":  #no Info.plist found, this is an illegally structured app
 74 |             log.info("No Info.plist found within .app file")
 75 |             return open
 76 |         else:
 77 |             plist = plistlib.readPlist(path)
 78 | 
 79 |         try:
 80 |             #get the name of the main executable of this app
 81 |             exec_file = plist["CFBundleExecutable"]
 82 |         except KeyError: #no executable was listed, this is an illegally structured app
 83 |             log.info("No main executable name found in Info.plist")
 84 |             return open
 85 | 
 86 |         #get the full path of the executable
 87 |         return self.findFile(file_path, exec_file)
 88 | 
 89 |     def findFile(self, path, name):
 90 |         result = ""
 91 |         for root, dirs, files in os.walk(path):
 92 |             for f in files:
 93 |                 if name in f:
 94 |                     return os.path.join(root, f)
 95 |         return result
 96 | 
 97 |     def findDir(self, path, name):
 98 |         result = ""
 99 |         for root, dirs, files in os.walk(path):
100 |             for d in dirs:
101 |                 if name in d:
102 |                     return os.path.join(root, d)
103 |         return result
104 | 
105 | 
106 | class Auxiliary(object):
107 |     pass
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/analyzer/darwin/lib/common/constants.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import os
 4 | from lib.common.rand import random_string
 5 | 
 6 | 
 7 | ROOT = os.path.join(os.getenv("HOME"), random_string(6, 10))
 8 | 
 9 | PATHS = {"root"   : ROOT,
10 |          "logs"   : os.path.join(ROOT, "logs"),
11 |          "files"  : os.path.join(ROOT, "files"),
12 |          "shots"  : os.path.join(ROOT, "shots"),
13 |          "memory" : os.path.join(ROOT, "memory"),
14 |          "drop"   : os.path.join(ROOT, "drop")}
15 | 
16 | PIPE = os.path.join(os.getenv("TMPDIR"), random_string(6, 10))
17 | SHUTDOWN_MUTEX = "Global/" + random_string(6, 10)
18 | 


--------------------------------------------------------------------------------
/analyzer/darwin/lib/common/exceptions.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2010-2014 Cuckoo Foundation.
 2 | # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org
 3 | # See the file 'docs/LICENSE' for copying permission.
 4 | # Note: This is unmodified from the Cuckoo Windows version
 5 | 
 6 | class CuckooError(Exception):
 7 |     pass
 8 | 
 9 | 
10 | class CuckooPackageError(Exception):
11 |     pass
12 | 


--------------------------------------------------------------------------------
/analyzer/darwin/lib/common/hashing.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | BUFSIZE = 1024*1024
 4 | 
 5 | 
 6 | def hash_file(method, path):
 7 |     """Calculates an hash on a file by path.
 8 |     @param method: callable hashing method
 9 |     @param path: file path
10 |     @return: computed hash string
11 |     """
12 |     f = open(path, "rb")
13 |     h = method()
14 |     while True:
15 |         buf = f.read(BUFSIZE)
16 |         if not buf:
17 |             break
18 |         h.update(buf)
19 |     return h.hexdigest()
20 | 


--------------------------------------------------------------------------------
/analyzer/darwin/lib/common/rand.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import string
 3 | 
 4 | def random_string(minimum, maximum=None):
 5 |     if maximum is None:
 6 |         maximum = minimum
 7 | 
 8 |     count = random.randint(minimum, maximum)
 9 |     return "".join(random.choice(string.ascii_letters) for x in xrange(count))
10 | 
11 | def random_integer(digits):
12 |     start = 10 ** (digits - 1)
13 |     end = (10 ** digits) - 1
14 |     return random.randint(start, end)
15 | 


--------------------------------------------------------------------------------
/analyzer/darwin/lib/common/results.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Required only minimal changes to work with OS X
 3 | """
 4 | 
 5 | import logging
 6 | import socket
 7 | 
 8 | from lib.core.config import Config
 9 | 
10 | log = logging.getLogger(__name__)
11 | 
12 | BUFSIZE = 1024*1024
13 | 
14 | def upload_to_host(file_path, dump_path):
15 |     nc = infd = None
16 |     try:
17 |         nc = NetlogFile(dump_path)
18 | 
19 |         infd = open(file_path, "rb")
20 |         buf = infd.read(BUFSIZE)
21 |         while buf:
22 |             nc.send(buf)
23 |             buf = infd.read(BUFSIZE)
24 |     except Exception as e:
25 |         log.error("Exception uploading file to host: %s", e)
26 |     finally:
27 |         if infd:
28 |             infd.close()
29 |         if nc:
30 |             nc.close()
31 | 
32 | class NetlogConnection(object):
33 |     def __init__(self, proto=""):
34 |         config = Config(cfg="analysis.conf")
35 |         self.hostip, self.hostport = config.ip, config.port
36 |         self.sock, self.file = None, None
37 |         self.proto = proto
38 | 
39 |     def connect(self):
40 |         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
41 |         try:
42 |             s.connect((self.hostip, self.hostport))
43 |             s.sendall(self.proto)
44 |         except:
45 |             pass
46 |         else:
47 |             self.sock = s
48 |             self.file = s.makefile()
49 | 
50 |     def send(self, data, retry=True):
51 |         try:
52 |             self.sock.sendall(data)
53 |         except socket.error:
54 |             self.connect()
55 |             if retry:
56 |                 self.send(data, retry=False)
57 |         except:
58 |             # We really have nowhere to log this, if the netlog connection
59 |             # does not work, we can assume that any logging won't work either.
60 |             # So we just fail silently.
61 |             self.close()
62 | 
63 |     def close(self):
64 |         try:
65 |             self.file.close()
66 |             self.sock.close()
67 |         except Exception:
68 |             pass
69 | 
70 | class NetlogFile(NetlogConnection):
71 |     def __init__(self, filepath):
72 |         self.filepath = filepath
73 |         NetlogConnection.__init__(self, proto="FILE\n{0}\n".format(self.filepath))
74 |         self.connect()
75 | 
76 | class NetlogHandler(logging.Handler, NetlogConnection):
77 |     def __init__(self):
78 |         logging.Handler.__init__(self)
79 |         NetlogConnection.__init__(self, proto="LOG\n")
80 |         self.connect()
81 | 
82 |     def emit(self, record):
83 |         msg = self.format(record)
84 |         self.send("{0}\n".format(msg))
85 | 


--------------------------------------------------------------------------------
/analyzer/darwin/lib/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/lib/core/__init__.py


--------------------------------------------------------------------------------
/analyzer/darwin/lib/core/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2010-2014 Cuckoo Foundation.
 2 | # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org
 3 | # See the file 'docs/LICENSE' for copying permission.
 4 | # Note: This is unmodified from the Cuckoo Windows version
 5 | 
 6 | import ConfigParser
 7 | 
 8 | class Config:
 9 |     def __init__(self, cfg):
10 |         """@param cfg: configuration file."""
11 |         config = ConfigParser.ConfigParser(allow_no_value=True)
12 |         config.read(cfg)
13 | 
14 |         for section in config.sections():
15 |             for name, raw_value in config.items(section):
16 |                 if name == "file_name":
17 |                     value = config.get(section, name)
18 |                 else:
19 |                     try:
20 |                         value = config.getboolean(section, name)
21 |                     except ValueError:
22 |                         try:
23 |                             value = config.getint(section, name)
24 |                         except ValueError:
25 |                             value = config.get(section, name)
26 |                 setattr(self, name, value)
27 | 


--------------------------------------------------------------------------------
/analyzer/darwin/lib/core/packages.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 
 3 | this work by or on behalf of the U.S. Government. 
 4 | NOTICE:
 5 | For five (5) years from  the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE.
 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws.
 8 | """
 9 | 
10 | def choose_package(file_type, file_name):
11 |     """Choose analysis package due to file type and file extension.
12 |     @param file_type: file type.
13 |     @return: package or None.
14 |     """
15 |     if not file_type:
16 |         return None
17 | 
18 |     file_name = file_name.lower()
19 | 
20 |     if "Mach-O" in file_type:
21 |         return "macho"
22 |     elif "PDF" in file_type or file_name.endswith(".pdf"):
23 |         return "pdf"
24 |     elif "Microsoft Word" in file_type or \
25 |          "Microsoft Office Word" in file_type or \
26 |          file_name.endswith(".docx") or \
27 |          file_name.endswith(".doc"):
28 |         return "doc"
29 |     elif "Rich Text Format" in file_type or file_name.endswith(".rtf") \
30 |             or "property list" in file_type or file_name.endswith(".plist"):
31 |         return "rtf"
32 |     elif "HTML" in file_type or file_name.endswith(".htm") or file_name.endswith(".html"):
33 |         return "html"
34 |     elif file_name.endswith(".jar"):
35 |         return "jar"
36 |     elif "Zip" in file_type or file_name.endswith(".zip"):
37 |         return "zip"
38 |     elif file_name.endswith(".py") or "Python script" in file_type:
39 |         return "python"
40 |     else:
41 |         return "generic"
42 | 


--------------------------------------------------------------------------------
/analyzer/darwin/lib/core/startup.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import os
 4 | import logging
 5 | 
 6 | from lib.common.constants import PATHS
 7 | from lib.common.results import NetlogHandler
 8 | 
 9 | log = logging.getLogger()
10 | 
11 | def create_folders():
12 |     """Create folders in PATHS."""
13 |     for name, folder in PATHS.items():
14 |         if os.path.exists(folder):
15 |             continue
16 | 
17 |         try:
18 |             os.makedirs(folder)
19 |         except OSError:
20 |             log.error("Unable to create folder %s", folder)
21 | 
22 | def init_logging():
23 |     """Initialize logger."""
24 |     formatter = logging.Formatter("%(asctime)s [%(name)s] %(levelname)s: %(message)s")
25 |     sh = logging.StreamHandler()
26 |     sh.setFormatter(formatter)
27 |     log.addHandler(sh)
28 | 
29 |     nh = NetlogHandler()
30 |     nh.setFormatter(formatter)
31 |     log.addHandler(nh)
32 | 
33 |     log.setLevel(logging.DEBUG)
34 | 
35 |     #create the analyzer log file
36 |     f = open(os.path.join(PATHS["logs"], 'analyzer.log'), 'w')
37 |     f.write("")
38 |     f.close()
39 |     #use the file for logging
40 |     hdlr = logging.FileHandler(os.path.join(PATHS["logs"], 'analyzer.log'))
41 |     formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
42 |     hdlr.setFormatter(formatter)
43 |     log.addHandler(hdlr)
44 | 


--------------------------------------------------------------------------------
/analyzer/darwin/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/analyzer/darwin/modules/auxiliary/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/mac-sandbox/cb3ac3f31e97843309d4c8cc81bf703c70132c53/analyzer/darwin/modules/auxiliary/__init__.py


--------------------------------------------------------------------------------
/analyzer/darwin/modules/auxiliary/human.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 
 3 | this work by or on behalf of the U.S. Government. 
 4 | NOTICE:
 5 | For five (5) years from  the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE.
 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws.
 8 | """
 9 | 
10 | import random
11 | import logging
12 | import time
13 | from threading import Thread
14 | import Quartz.CoreGraphics as CG
15 | from AppKit import *
16 | 
17 | from lib.common.abstracts import Auxiliary
18 | 
19 | log = logging.getLogger(__name__)
20 | 
21 | class Human(Auxiliary, Thread):
22 |     """Human after all"""
23 | 
24 |     def __init__(self):
25 |         Thread.__init__(self)
26 |         self.do_run = True
27 |          #get the size of the screen
28 |         rect = NSScreen.mainScreen().frame()
29 |         self.width = int(rect.size.width)
30 |         self.height = int(rect.size.height)
31 | 
32 |     def stop(self):
33 |         self.do_run = False
34 | 
35 |     def run(self):
36 |         while self.do_run:
37 |             self.move_mouse()
38 |             self.click_mouse()
39 |             #sleep for one second
40 |             time.sleep(1)
41 | 
42 |     """
43 |     Move the mouse to a random place on screen.
44 |     This moves at computer-speed, so any malware checking for movement speed will be alerted.
45 |     """
46 |     def move_mouse(self):
47 |         x = random.randint(0, self.width)
48 |         y = random.randint(0, self.height)
49 |         #create the event
50 |         move = CG.CGEventCreateMouseEvent(None, CG.kCGEventMouseMoved, CG.CGPointMake(x, y), CG.kCGMouseButtonLeft)
51 |         #send the event
52 |         CG.CGEventPost(CG.kCGHIDEventTap, move)
53 | 
54 |     def click_mouse(self):
55 |         point = CG.CGPointMake(self.width/2, 250)
56 |         # Move mouse to top-middle position.
57 |         move = CG.CGEventCreateMouseEvent(None, CG.kCGEventMouseMoved, point, CG.kCGMouseButtonLeft)
58 |         # Mouse down.
59 |         down = CG.CGEventCreateMouseEvent(NULL, CG.kCGEventLeftMouseDown, point, CG.kCGMouseButtonLeft)
60 |         # Mouse up.
61 |         up = CG.CGEventCreateMouseEvent(NULL, CG.kCGEventLeftMouseUp, point, CG.kCGMouseButtonLeft)
62 | 
63 |         #send the events
64 |         CG.CGEventPost(CG.kCGHIDEventTap, move)
65 |         CG.CGEventPost(CG.kCGHIDEventTap, down)
66 |         time.sleep(0.05)
67 |         CG.CGEventPost(CG.kCGHIDEventTap, up)
68 | 


--------------------------------------------------------------------------------
/analyzer/darwin/modules/auxiliary/screenshots.py:
--------------------------------------------------------------------------------
  1 | """
  2 | “Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 
  3 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 
  4 | this work by or on behalf of the U.S. Government. 
  5 | NOTICE:
  6 | For five (5) years from  the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE.
  7 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
  8 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws.
  9 | """
 10 | 
 11 | import time
 12 | import logging
 13 | import StringIO
 14 | from threading import Thread
 15 | import Quartz.CoreGraphics as CG
 16 | 
 17 | from lib.common.abstracts import Auxiliary
 18 | from lib.common.results import NetlogFile
 19 | from lib.api.screenshot import Screenshot
 20 | 
 21 | log = logging.getLogger(__name__)
 22 | SHOT_DELAY = 1
 23 | 
 24 | class Screenshots(Auxiliary, Thread):
 25 |     """Take screenshots."""
 26 | 
 27 |     def __init__(self):
 28 |         Thread.__init__(self)
 29 |         self.do_run = True
 30 | 
 31 |     def stop(self):
 32 |         """Stop screenshotting."""
 33 |         self.do_run = False
 34 | 
 35 |     def run(self):
 36 |         """Run screenshotting.
 37 |         @return: operation status.
 38 |         """
 39 |         if not Screenshot().have_pil():
 40 |             log.warning("Python Image Library is not installed, "
 41 |                         "screenshots are disabled")
 42 |             return False
 43 | 
 44 |         img_counter = 0
 45 |         img_last = None
 46 | 
 47 |         while self.do_run:
 48 |             time.sleep(SHOT_DELAY)
 49 |             try:
 50 |                 img_current = Screenshot().take()
 51 |             except IOError as e:
 52 |                 log.error("Cannot take screenshot: %s", e)
 53 |                 continue
 54 | 
 55 |             if img_last:
 56 |                 if Screenshot().equal(img_last, img_current):
 57 |                     continue
 58 |             img_counter += 1
 59 | 
 60 |             #send a return keystroke for installers
 61 |             self.sendKey(0x24)
 62 | 
 63 |             try:
 64 |                 # workaround as PIL can't write to the socket file object :(
 65 |                 tmpio = StringIO.StringIO()
 66 |                 img_current.save(tmpio, format="PNG")
 67 |                 tmpio.seek(0)
 68 |             except:
 69 |                 log.exception("Unable to write screenshot to disk.")
 70 | 
 71 |             # now upload to host from the StringIO
 72 |             nf = NetlogFile("shots/%s.png" % str(img_counter).rjust(4, "0"))
 73 | 
 74 |             for chunk in tmpio:
 75 |                 nf.sock.sendall(chunk)
 76 | 
 77 |             nf.close()
 78 | 
 79 |             img_last = img_current
 80 | 
 81 |         return True
 82 | 
 83 |     """
 84 |     Send a keyboard event to the system at large using the Quartz Event Service
 85 |     https://developer.apple.com/library/mac/documentation/Carbon/Reference/QuartzEventServicesRef/Reference/reference.html
 86 |     0x24 is Return/Enter (more keys at http://webnnel.googlecode.com/svn/trunk/lib/Carbon.framework/Versions/A/Frameworks/HIToolbox.framework/Versions/A/Headers/Events.h)
 87 |     Modifiers: (defined in http://www.opensource.apple.com/source/IOHIDFamily/IOHIDFamily-308/IOHIDSystem/IOKit/hidsystem/IOLLEvent.h)
 88 |     NX_ALPHASHIFTMASK	0x00010000
 89 |     NX_SHIFTMASK		0x00020000
 90 |     NX_CONTROLMASK		0x00040000
 91 |     NX_ALTERNATEMASK	0x00080000
 92 |     NX_COMMANDMASK		0x00100000
 93 |     NX_NUMERICPADMASK	0x00200000
 94 |     NX_HELPMASK		    0x00400000
 95 |     NX_SECONDARYFNMASK	0x00800000
 96 |     """
 97 |     def sendKey(self, key, modifiers=0x0):
 98 | 
 99 |         source = CG.CGEventSourceCreate(CG.kCGEventSourceStateCombinedSessionState)
100 | 
101 |         keyDown = CG.CGEventCreateKeyboardEvent(source, key, True)
102 |         CG.CGEventSetFlags(keyDown, modifiers)
103 |         keyUp = CG.CGEventCreateKeyboardEvent(source, key, False)
104 | 
105 |         CG.CGEventPost(CG.kCGAnnotatedSessionEventTap, keyDown)
106 |         CG.CGEventPost(CG.kCGAnnotatedSessionEventTap, keyUp)
107 | 
108 |         #Apparently these lines are not needed on newer versions of PyObjC and cause a segfault
109 |         #CG.CFRelease(keyUp)
110 |         #CG.CFRelease(keyDown)
111 |         #CG.CFRelease(source)
112 | 
113 | 


--------------------------------------------------------------------------------
/analyzer/darwin/modules/packages/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/analyzer/darwin/modules/packages/doc.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | from lib.common.abstracts import Package
  4 | import os
  5 | import os.path
  6 | import plistlib
  7 | import logging
  8 | import subprocess
  9 | 
 10 | from AppKit import *
 11 | import Foundation
 12 | from PyObjCTools import AppHelper
 13 | 
 14 | #get logger
 15 | log = logging.getLogger()
 16 | 
 17 | class OpenDoc(NSObject):
 18 | 
 19 |     def setPath(self, p, test, app):
 20 |         self.path = p
 21 |         self.loop = test
 22 |         self.app = app
 23 | 
 24 |     def run_(self, event):
 25 |         if "Word" in str(event.userInfo()["NSApplicationName"]):
 26 |             #open the file in question
 27 |             subprocess.call(["/usr/bin/open", "-a", self.app, self.path])
 28 |             #remove the event observer
 29 |             ws = NSWorkspace.sharedWorkspace()
 30 |             nc = ws.notificationCenter()
 31 |             nc.removeObserver_name_object_(self, NSWorkspaceDidLaunchApplicationNotification, None)
 32 |             self.loop.stop()
 33 | 
 34 | class Doc(Package):
 35 |     """Word analysis package.
 36 |     Note that this tends to be a bit picky - if the document is too old for the version of Word
 37 |     on the VM, it may not open properly"""
 38 | 
 39 |     def stop(self):
 40 |         self.is_open = True
 41 | 
 42 |     def start(self, path):
 43 |         (word, app) = self.get_path("Microsoft Office")
 44 | 
 45 |         # There is no nice programmatic way to open a file in Word on OS X (other than open -a)
 46 |         # There is on Windows, not here. No command line arguments to work with at all
 47 |         #create the observer watching for application launch events
 48 |         ws = NSWorkspace.sharedWorkspace()
 49 |         nc = ws.notificationCenter()
 50 |         op = OpenDoc.new()
 51 |         op.setPath(path, self, app)
 52 |         nc.addObserver_selector_name_object_(op, 'run:', NSWorkspaceDidLaunchApplicationNotification, None)
 53 |         #start the Preview process
 54 |         pid = self.execute(word, (word,))
 55 | 
 56 |         #Wait until the process is open
 57 |         self.is_open = False
 58 |         runLoop = NSRunLoop.currentRunLoop()
 59 |         date = NSDate.dateWithTimeIntervalSinceNow_(1.0)
 60 |         while not self.is_open:
 61 |             date = NSDate.dateWithTimeIntervalSinceNow_(1.0)
 62 |             NSRunLoop.runUntilDate_(runLoop, date)
 63 | 
 64 |         #return the pid of Word
 65 |         return pid
 66 | 
 67 |         #Old, less reliable bash method
 68 |         #args = "\"" + word + "\" & sleep 5 && echo \'tell application \""+app+"\" to open \""+path+"\"\' | /usr/bin/osascript"
 69 |         #return self.execute(bash, (bash, "-c",  "%s" % args))
 70 | 
 71 |     def get_path(self, name):
 72 |         #attempt to find Microsoft Word
 73 |         word_dir = ""
 74 |         dir = self.findDir("/Applications/", name)
 75 |         word_dir = os.path.join(dir, "Microsoft Word.app")
 76 | 
 77 |         if word_dir != "":
 78 |             return (self.getAppFilePath(word_dir), "Microsoft Word")
 79 | 
 80 |         #If we can't find Word, try to find Pages
 81 |         word_dir = self.findDir("/Applications/", "Pages")
 82 | 
 83 |         if word_dir != "":
 84 |             return (self.getAppFilePath(word_dir), "Pages")
 85 | 
 86 |         #If no Pages, open the darn thing in TextEdit
 87 |         return (self.getAppFilePath("/Applications/TextEdit.app/"), "TextEdit")
 88 | 
 89 |     def getAppFilePath(self, file_path):
 90 |         # the reason we don't just do "open file.app" is because OS X has restrictions on open that make it hard to trace
 91 |         # and you can't get the started process' pid from it
 92 |         open = "/usr/bin/open"
 93 | 
 94 |         #find Info.plist
 95 |         plist = ""
 96 |         path = self.findFile(file_path, "Info.plist")
 97 | 
 98 |         if path == "":  #no Info.plist found, this is an illegally structured app
 99 |             log.info("No Info.plist found within .app file")
100 |             return open
101 |         else:
102 |             plist = plistlib.readPlist(path)
103 | 
104 |         try:
105 |             #get the name of the main executable of this app
106 |             exec_file = plist["CFBundleExecutable"]
107 |         except KeyError: #no executable was listed, this is an illegally structured app
108 |             log.info("No main executable name found in Info.plist")
109 |             return open
110 | 
111 |         #get the full path of the executable
112 |         return self.findFile(file_path, exec_file)
113 | 
114 |     def findDir(self, path, name):
115 |         result = ""
116 |         for root, dirs, files in os.walk(path):
117 |             for d in dirs:
118 |                 if name in d:
119 |                     return os.path.join(root, d)
120 |         return result
121 | 
122 |     def findFile(self, path, name):
123 |         result = ""
124 |         for root, dirs, files in os.walk(path):
125 |             for f in files:
126 |                 if name in f:
127 |                     return os.path.join(root, f)
128 |         return result
129 | 
130 | 


--------------------------------------------------------------------------------
/analyzer/darwin/modules/packages/generic.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the package of last resort - Apple's "open" utility is very good
 3 | at determining what to use to open a file. Unfortunately, we cnanot get the pid
 4 | of the new process from ope, which means the sandbox will probably prematurely quit once
 5 | the open process is finished.
 6 | """
 7 | 
 8 | from lib.common.abstracts import Package
 9 | 
10 | class Generic(Package):
11 |     """Generic analysis package."""
12 | 
13 |     def start(self, path):
14 |         open = "/usr/bin/open"
15 | 
16 |         return self.execute(open, (open, path))
17 | 


--------------------------------------------------------------------------------
/analyzer/darwin/modules/packages/html.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import shutil
 4 | import logging
 5 | import os
 6 | import os.path
 7 | import plistlib
 8 | 
 9 | from lib.common.abstracts import Package
10 | 
11 | log = logging.getLogger(__name__)
12 | 
13 | class HTML(Package):
14 |     """HTML file analysis package."""
15 | 
16 |     def start(self, path):
17 |         safari = self.getAppFilePath("/Applications/Safari.app")
18 | 
19 |         # Travelling inside malware universe you should bring a towel with you.
20 |         # If a file detected as HTML is submitted without a proper extension,
21 |         # or without an extension at all (are you used to name samples with hash?),
22 |         # it might be opened as a text file, so your precious sample will not
23 |         # be executed.
24 |         # We help you sample to execute renaming it with a proper extension.
25 |         if not path.endswith((".htm", ".html")):
26 |             shutil.copy(path, path + ".html")
27 |             path += ".html"
28 |             log.info("Submitted file is missing extension, adding .html")
29 | 
30 |         return self.execute(safari, (safari, path))
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/analyzer/darwin/modules/packages/jar.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import logging
 3 | import os.path
 4 | 
 5 | from lib.common.abstracts import Package
 6 | 
 7 | #get logger
 8 | log = logging.getLogger()
 9 | 
10 | class Jar(Package):
11 |     """Java analysis package."""
12 | 
13 |     def start(self, path):
14 |         #this is the standard path, actually a symlink to the read Java files
15 |         java = "/usr/bin/java"
16 |         #make sure there is Java on this system
17 |         if not os.path.isfile(java):
18 |             log.error("Cannot run jar file: No Java installed on the guest system!")
19 |             return None
20 | 
21 |         class_path = self.options.get("class")
22 | 
23 |         if class_path:
24 |             return self.execute(java, (java, "-cp", path, class_path))
25 |             #args = "-cp \"%s\" %s" % (path, class_path)
26 |         else:
27 |             return self.execute(java, (java, "-jar", path))
28 |             #args = "-jar \"%s\"" % path
29 | 


--------------------------------------------------------------------------------
/analyzer/darwin/modules/packages/macho.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This runs Mach-O executables and FAT files, provided they are the correct architecture for the VM in use.
 3 | """
 4 | 
 5 | from lib.common.abstracts import Package
 6 | 
 7 | class MachO(Package):
 8 | 
 9 |     def start(self, path):
10 |         args = self.options.get("arguments")
11 |         if args:
12 |             return self.execute(path, args)
13 |         return self.execute(path, (path,))
14 | 


--------------------------------------------------------------------------------
/analyzer/darwin/modules/packages/pdf.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import os
 4 | import os.path
 5 | import plistlib
 6 | import subprocess
 7 | import logging
 8 | import time
 9 | 
10 | from AppKit import *
11 | import Foundation
12 | from PyObjCTools import AppHelper
13 | 
14 | from lib.common.abstracts import Package
15 | 
16 | #get logger
17 | log = logging.getLogger()
18 | 
19 | class OpenPDF(NSObject):
20 | 
21 |     def setPath(self, p, test):
22 |         self.path = p
23 |         self.loop = test
24 | 
25 |     def run_(self, event):
26 |         if str(event.userInfo()["NSApplicationName"]) == "Preview":
27 |             #open the file in question
28 |             subprocess.call(["/usr/bin/open", "-a", "Preview", self.path])
29 |             #remove the event observer
30 |             ws = NSWorkspace.sharedWorkspace()
31 |             nc = ws.notificationCenter()
32 |             nc.removeObserver_name_object_(self, NSWorkspaceDidLaunchApplicationNotification, None)
33 |             self.loop.stop()
34 | 
35 | 
36 | class PDF(Package):
37 |     """PDF analysis package."""
38 | 
39 |     def stop(self):
40 |         self.is_open = True
41 | 
42 |     def start(self, path):
43 |         #Use Preview, the default PDF application
44 |         app = self.getAppFilePath("/Applications/Preview.app")
45 | 
46 |         #A lot of downloaded PDFs will contain a "quarantine" attribute until opened for the first time
47 |         # This will cause a permission issue unless we remove it
48 |         # But there are still other permission issues with downloaded files, apparently
49 |         os.chmod(path, 0o777)
50 |         result = subprocess.call(["xattr", "-c", path])
51 | 
52 |         # The remaining problem is that we can't fight Apple's sandbox - it will refuse to
53 |         # open the document in a nice scripted fashion unless it has already been opened
54 |         # once by a signed piece of software or the user
55 |         # In this case, 'open' is the signed software of choice
56 | 
57 |         #create the observer watching for application launch events
58 |         ws = NSWorkspace.sharedWorkspace()
59 |         nc = ws.notificationCenter()
60 |         op = OpenPDF.new()
61 |         op.setPath(path, self)
62 |         nc.addObserver_selector_name_object_(op, 'run:', NSWorkspaceDidLaunchApplicationNotification, None)
63 |         #start the Preview process
64 |         pid = self.execute(app, (app,))
65 | 
66 |         #Wait until the process is open
67 |         self.is_open = False
68 |         runLoop = NSRunLoop.currentRunLoop()
69 |         date = NSDate.dateWithTimeIntervalSinceNow_(1.0)
70 |         while not self.is_open:
71 |             date = NSDate.dateWithTimeIntervalSinceNow_(1.0)
72 |             NSRunLoop.runUntilDate_(runLoop, date)
73 | 
74 |         #return the pid of Preview
75 |         return pid
76 | 
77 |         #This is the simple method you could do if Preview weren't sandboxed
78 |         #return self.execute(app, (app, path))
79 | 
80 |         #Another alternate, unreliable method using bash and sleep
81 |         #args = "\"" + app + "\" & sleep 5  && open -a Preview "+path
82 |         #return self.execute(bash, (bash, "-c",  "%s" % args))
83 | 
84 | 


--------------------------------------------------------------------------------
/analyzer/darwin/modules/packages/python.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import logging
 4 | import os
 5 | 
 6 | from lib.common.abstracts import Package
 7 | 
 8 | #get logger
 9 | log = logging.getLogger()
10 | 
11 | class Python(Package):
12 |     """Python analysis package."""
13 | 
14 |     def start(self, path):
15 |         python = "/usr/bin/python" #symlink to actual python, default location for OS X
16 |         #make sure there is Python here
17 |         if not os.path.isfile(python):
18 |             log.error("Cannot run python file: Python not found in /usr/bin")
19 |             return None
20 | 
21 |         args = self.options.get("arguments", "")
22 | 
23 |         if args:
24 |             return self.execute(python, (python, path, args))
25 |         return self.execute(python, (python, path))
26 | 


--------------------------------------------------------------------------------
/analyzer/darwin/modules/packages/rtf.py:
--------------------------------------------------------------------------------
 1 | from lib.common.abstracts import Package
 2 | import os
 3 | import os.path
 4 | import plistlib
 5 | import logging
 6 | import subprocess
 7 | 
 8 | from AppKit import *
 9 | import Foundation
10 | from PyObjCTools import AppHelper
11 | 
12 | #get logger
13 | log = logging.getLogger()
14 | 
15 | class OpenFile(NSObject):
16 | 
17 |     def setPath(self, p, test):
18 |         self.path = p
19 |         self.loop = test
20 | 
21 |     def run_(self, event):
22 |         if str(event.userInfo()["NSApplicationName"]) == "TextEdit":
23 |             #open the file in question
24 |             subprocess.call(["/usr/bin/open", "-e", self.path])
25 |             #remove the event observer
26 |             ws = NSWorkspace.sharedWorkspace()
27 |             nc = ws.notificationCenter()
28 |             nc.removeObserver_name_object_(self, NSWorkspaceDidLaunchApplicationNotification, None)
29 |             self.loop.stop()
30 | 
31 | class RTF(Package):
32 |     """ Plist and RTF analysis package
33 |     """
34 | 
35 |     def stop(self):
36 |         self.is_open = True
37 | 
38 |     def start(self, path):
39 |         textedit = self.getAppFilePath("/Applications/TextEdit.app/")
40 | 
41 |         #fix what permissions issues we can
42 |         os.chmod(path, 0o777)
43 |         result = subprocess.call(["xattr", "-c", path])
44 | 
45 |         # The remaining problem is that we can't fight Apple's sandbox - it will refuse to
46 |         # open the document in a nice scripted fashion unless it has already been opened
47 |         # once by a signed piece of software or the user
48 |         # In this case, 'open' is the signed software of choice
49 | 
50 |         #create the observer watching for application launch events
51 |         ws = NSWorkspace.sharedWorkspace()
52 |         nc = ws.notificationCenter()
53 |         op = OpenFile.new()
54 |         op.setPath(path, self)
55 |         nc.addObserver_selector_name_object_(op, 'run:', NSWorkspaceDidLaunchApplicationNotification, None)
56 |         #start the Preview process
57 |         pid = self.execute(textedit, (textedit,))
58 | 
59 |         #Wait until the process is open
60 |         self.is_open = False
61 |         runLoop = NSRunLoop.currentRunLoop()
62 |         date = NSDate.dateWithTimeIntervalSinceNow_(1.0)
63 |         while not self.is_open:
64 |             date = NSDate.dateWithTimeIntervalSinceNow_(1.0)
65 |             NSRunLoop.runUntilDate_(runLoop, date)
66 | 
67 |         #return the pid of TextEdit
68 |         return pid
69 | 
70 |         #Old, less reliable bash method
71 |         #args = "\"" + textedit + "\" & sleep 2 && open -a \"TextEdit\" \""+path+"\""
72 |         #return self.execute(bash, (bash, "-c",  "%s" % args))
73 | 
74 |         #This is the simple method you could do if TextEdit weren't sandboxed
75 |         #return self.execute(textedit, (textedit, path))


--------------------------------------------------------------------------------
/analyzer/darwin/modules/packages/safari.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import os.path
 4 | import plistlib
 5 | import logging
 6 | import subprocess
 7 | 
 8 | from AppKit import *
 9 | import Foundation
10 | from PyObjCTools import AppHelper
11 | 
12 | from lib.common.abstracts import Package
13 | 
14 | #get logger
15 | log = logging.getLogger()
16 | 
17 | class OpenURL(NSObject):
18 | 
19 |     def setPath(self, p, test):
20 |         self.path = p
21 |         self.loop = test
22 | 
23 |     def run_(self, event):
24 |         if str(event.userInfo()["NSApplicationName"]) == "Safari":
25 |             #open the file in question
26 |             subprocess.call(["/usr/bin/open", "-a", "Safari", self.path])
27 |             #remove the event observer
28 |             ws = NSWorkspace.sharedWorkspace()
29 |             nc = ws.notificationCenter()
30 |             nc.removeObserver_name_object_(self, NSWorkspaceDidLaunchApplicationNotification, None)
31 |             self.loop.stop()
32 | 
33 | class Safari(Package):
34 |     """Safari analysis package."""
35 | 
36 |     def stop(self):
37 |         self.is_open = True
38 | 
39 |     def start(self, url):
40 |         safari = self.getAppFilePath("/Applications/Safari.app")
41 | 
42 |         #if we try and open the URL directly with "<safari> <url>", Safari treats it as a file path and tries to find it on the local machine
43 |         #Create an observer to watch for Safari opening
44 |         ws = NSWorkspace.sharedWorkspace()
45 |         nc = ws.notificationCenter()
46 |         op = OpenURL.new()
47 |         op.setPath(url, self)
48 |         nc.addObserver_selector_name_object_(op, 'run:', NSWorkspaceDidLaunchApplicationNotification, None)
49 |         #start Safari
50 |         pid = self.execute(safari, (safari,))
51 | 
52 |         #Wait until the process is open
53 |         self.is_open = False
54 |         runLoop = NSRunLoop.currentRunLoop()
55 |         date = NSDate.dateWithTimeIntervalSinceNow_(1.0)
56 |         while not self.is_open:
57 |             date = NSDate.dateWithTimeIntervalSinceNow_(1.0)
58 |             NSRunLoop.runUntilDate_(runLoop, date)
59 | 
60 |         #return the pid of Safari
61 |         return pid
62 | 
63 |         #Old, less reliable bash method
64 |         #args = "\"" + safari + "\" & sleep 5 && open -a Safari \""+url+"\"" #went with open because the AppleScript was unreliable
65 |         #return self.execute(bash, (bash, "-c",  "%s" % args))
66 | 


--------------------------------------------------------------------------------
/analyzer/darwin/modules/packages/zip.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import os.path
  4 | import stat
  5 | import logging
  6 | import plistlib
  7 | from zipfile import ZipFile, BadZipfile
  8 | 
  9 | from lib.common.abstracts import Package
 10 | from lib.common.exceptions import CuckooPackageError
 11 | 
 12 | #get logger
 13 | log = logging.getLogger()
 14 | 
 15 | class Zip(Package):
 16 |     """Zip analysis package."""
 17 | 
 18 |     def start(self, path):
 19 |         root = os.environ["TMPDIR"]
 20 |         password = self.options.get("password")
 21 | 
 22 |         with ZipFile(path, "r") as archive:
 23 |             zipinfos = archive.infolist()
 24 |             try:
 25 |                 archive.extractall(path=root, pwd=password)
 26 |             except BadZipfile as e:
 27 |                 raise CuckooPackageError("Invalid Zip file")
 28 |             except RuntimeError:
 29 |                 try:
 30 |                     archive.extractall(path=root, pwd="infected")
 31 |                 except RuntimeError as e:
 32 |                     raise CuckooPackageError("Unable to extract Zip file: "
 33 |                                              "{0}".format(e))
 34 | 
 35 |         file_name = self.options.get("file")
 36 |         # If no file name is provided via option, take the first file.
 37 |         if not file_name:
 38 |             # No name provided try to find a better name.
 39 |             if len(zipinfos):
 40 |                 # Take the first one.
 41 |                 file_name = zipinfos[0].filename
 42 |             else:
 43 |                 raise CuckooPackageError("Empty ZIP archive")
 44 | 
 45 |         file_path = os.path.join(root, file_name)
 46 |         #Have to make the file(s) executable
 47 |         os.chmod(file_path, 0o777 | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
 48 |         if os.path.isdir(file_path):
 49 |             for root, dirs, files in os.walk(file_path):
 50 |                 for d in files:
 51 |                     os.chmod(os.path.join(root, d), 0o777 | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
 52 |         #Handle .app files separately
 53 |         if file_name.endswith(".app") or file_name.endswith(".app/"):
 54 |             (exec_path, fpath) = self.getAppFilePath(file_path)
 55 |             args = self.options.get("arguments")
 56 |             if args is None:
 57 |                 return self.execute(exec_path, (exec_path, "%s" % fpath))
 58 |             return self.execute(exec_path, (exec_path, "%s %s" % (fpath, args)))
 59 | 
 60 |         return self.execute(file_path, (file_path, self.options.get("arguments")))
 61 | 
 62 |     def getAppFilePath(self, file_path):
 63 |         # the reason we don't just do "open file.app" is because OS X has restrictions on open that make it hard to trace
 64 |         open = "/usr/bin/open"
 65 | 
 66 |         #find Info.plist
 67 |         plist = ""
 68 |         path = self.findFile(file_path, "Info.plist")
 69 |         for root, dirs, files in os.walk(file_path):
 70 |             for f in files:
 71 |                 if f == "Info.plist":
 72 |                     #read the plist file
 73 |                     plist = plistlib.readPlist(os.path.join(root, f))
 74 |                     break
 75 | 
 76 |         if path == "":  #no Info.plist found, this is an illegally structured app
 77 |             log.info("No Info.plist found within .app file")
 78 |             return (open, file_path)
 79 |         else:
 80 |             plist = plistlib.readPlist(path)
 81 | 
 82 |         try:
 83 |             #get the name of the main executable of this app
 84 |             exec_file = plist["CFBundleExecutable"]
 85 |         except KeyError: #no executable was listed, this is an illegally structured app
 86 |             log.info("No main executable name found in Info.plist")
 87 |             return (open, file_path)
 88 | 
 89 |         #get the full path of the executable
 90 |         exec_path = self.findFile(file_path, exec_file)
 91 |         return (exec_path, "")
 92 | 
 93 | 
 94 |     def findDir(self, path, name):
 95 |         result = ""
 96 |         for root, dirs, files in os.walk(path):
 97 |             for d in dirs:
 98 |                 if name in d:
 99 |                     return os.path.join(root, d)
100 |         return result
101 | 
102 |     def findFile(self, path, name):
103 |         result = ""
104 |         for root, dirs, files in os.walk(path):
105 |             for f in files:
106 |                 if name in f:
107 |                     return os.path.join(root, f)
108 |         return result
109 | 
110 | 


--------------------------------------------------------------------------------
/modules/processing/behavior_osx.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 
 3 | this work by or on behalf of the U.S. Government. 
 4 | NOTICE:
 5 | For five (5) years from  the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE.
 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws.
 8 | 
 9 | The purpose of this module is simply to pull data from the logs of the darwin analyzer into a format that
10 | the signature and JSON module can use (aka a giant structure of dictionaries and lists).
11 | The existing behavior module only works for the Windows analyzer results.
12 | This module pulls data from the file activity logs, the processes log, and the api call logs.
13 | """
14 | 
15 | import os
16 | import os.path
17 | 
18 | from lib.cuckoo.common.abstracts import Processing
19 | from lib.cuckoo.common.exceptions import CuckooProcessingError
20 | 
21 | class BehaviorOSX(Processing):
22 |     """Pull results from log files"""
23 | 
24 |     def run(self):
25 |         """Run extract of printable strings.
26 |         @return: list of printable strings.
27 |         """
28 |         self.key = "behavior_osx"
29 | 
30 |         result = {} #dictionary of call lists to return
31 |         result["api_calls"] = {}
32 | 
33 |         #loop through to find the files of interest
34 |         for root, dirs, files in os.walk(self.logs_path):
35 |             for f in files:
36 |                 if "api_calls" in f: #api calls go in a dictionary of lists
37 |                     name = os.path.join(root, f)
38 |                     result["api_calls"][os.path.splitext(f)[0]] = self.readFile(name)
39 |                 elif "processes" in f: #processes are just a list
40 |                     name = os.path.join(root, f)
41 |                     result["processes"] = self.readFile(name)
42 |                 elif "file_" in f: #there are 4 logs related to file activity
43 |                     name = os.path.join(root, f)
44 |                     result[os.path.splitext(f)[0]] = self.readFile(name, skip=False)
45 | 
46 |         return result
47 | 
48 |     def readFile(self, fname, skip=True):
49 |         """
50 |         Turns a newline-separated file into a list
51 |         :param skip: When true, this means you skip the first line of the file as headers
52 |         :param fname: The name of the file to process
53 |         :return: a list of the file info
54 |         """
55 | 
56 |         results = [] # the list of lines in the file
57 | 
58 |         f = open(fname, "r")
59 | 
60 |         if skip: #skip the first line of the file
61 |             f.readline()
62 | 
63 |         line = f.readline()
64 |         while line is not None and line != "":
65 |             results.append(line)
66 |             line = f.readline()
67 | 
68 |         f.close()
69 | 
70 |         return results
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/modules/processing/filter_syscall.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 
 3 | this work by or on behalf of the U.S. Government. 
 4 | NOTICE:
 5 | For five (5) years from  the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE.
 6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
 7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws.
 8 | """
 9 | 
10 | import os
11 | import os.path
12 | 
13 | from lib.cuckoo.common.abstracts import Processing
14 | from lib.cuckoo.common.exceptions import CuckooProcessingError
15 | 
16 | filters = ["FILE_ACTIVITY:", "FILE_CREATE:", "FILE_DELETE:", "FILE_WRITE:", "PROCESS:"]
17 | 
18 | class FilterSyscall(Processing):
19 |     """Filter out syscalls caused by system call hooks."""
20 | 
21 |     def filterLine(self, line):
22 |         for f in filters:
23 |             if f in line:
24 |                 return True
25 |         return False
26 | 
27 |     def filterFile(self, fname):
28 |         data = open(fname, 'r')
29 |         output = []
30 | 
31 |         #take out the 4 commands that make up the pipe write
32 |         i = 0
33 |         line = data.readline()
34 |         while line != '' and not line is None:
35 |             if self.filterLine(line):
36 |                 if i > 2:
37 |                     del output[-1]
38 |                     del output[-1]
39 |                 line = data.readline()
40 |                 i += 1
41 |             else:
42 |                 output.append(line)
43 |             line = data.readline()
44 |             i += 1
45 | 
46 | 
47 |         #close the source file
48 |         data.close()
49 |         #delete the source file
50 |         os.remove(fname)
51 |         #rewrite the system call log
52 |         out = open(fname, 'w+')
53 |         for line in output:
54 |             out.write(line)
55 |         out.close()
56 | 
57 |         return output
58 | 
59 | 
60 |     def run(self):
61 |         """Filter and extract syscall logs
62 |         @return: dictionary of list of syscalls.
63 |         """
64 |         self.key = "filter_syscall"
65 |         call_logs = [] #list of all the syscall file logs
66 |         result = {} #dictionary of call lists to return
67 | 
68 |         #find all the syscall logs, if any
69 |         for root, dirs, files in os.walk(self.logs_path):
70 |             for f in files:
71 |                 if "system_calls" in f:
72 |                     call_logs.append(os.path.join(root, f))
73 | 
74 |         #for each file, look for calls caused by the hook library
75 |         for c in call_logs:
76 |             result[c] = self.filterFile(c)
77 | 
78 |         return result
79 | 


--------------------------------------------------------------------------------
/modules/processing/macho_data.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 
  3 | this work by or on behalf of the U.S. Government. 
  4 | NOTICE:
  5 | For five (5) years from  the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE.
  6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
  7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws.
  8 | 
  9 | This file contains the dictionary structures for whatever human-readable parsing we need that wasn't done by macholib.
 10 | A lot of the constants were determined using the MachOView program.
 11 | http://sourceforge.net/projects/machoview/
 12 | '''
 13 | 
 14 | ### CPU SUBTYPES ###
 15 | 
 16 | CPU_SUBTYPE_ANY = {
 17 |     -1:     'MULTIPLE',
 18 |     0:      'LITTLE_ENDIAN',
 19 |     1:      'BIG_ENDIAN',
 20 | }
 21 | 
 22 | CPU_SUBTYPE_HIGH = {
 23 |     2147483648: 'LIB64',
 24 |     4278190080: 'MASK',
 25 | }
 26 | 
 27 | CPU_SUBTYPE_ARM = {
 28 |     0:     'ARM_ALL',
 29 |     5:     'ARM_V4T',
 30 |     6:     'ARM_v6',
 31 |     7:     'ARM_V5TEJ',
 32 |     9:     'ARM_V7',
 33 |     8:     'ARM_XSCALE',
 34 | }
 35 | 
 36 | CPU_SUBTYPE_HPPA = {
 37 |     0:     'HPPA_7100 OR HPPA_ALL',
 38 |     1:     'HPPA_7100LC',
 39 | }
 40 | 
 41 | CPU_SUBTYPE_I860 = {
 42 |     0:     'I860_ALL',
 43 |     1:     'I860_860',
 44 | }
 45 | 
 46 | CPU_SUBTYPE_I386 = {
 47 |     3:     'I386_ALL',
 48 |     5:     'PENT',
 49 |     8:     'PENTIUM_3',
 50 |     9:     'PENTIUM_M',
 51 |     10:    'PENTIUM_4',
 52 |     24:    'PENTIUM_3_M',
 53 |     22:    'PENTPRO',
 54 |     26:    'PENTIUM_4_M',
 55 |     40:    'PENTIUM_3_XEON',
 56 |     54:    'PENTII_M3',
 57 |     86:    'PENTII_M5',
 58 |     3:      '386',
 59 |     4:      '486',
 60 |     5:      '586',
 61 |     11:     'ITANIUM',
 62 |     12:     'XEON',
 63 |     15:     'INTEL_FAMILY_MAX',
 64 |     27:     'ITANIUM_2',
 65 |     28:     'XEON_MP',
 66 |     103:    'CELERON',
 67 |     119:    'CELERON_MOBILE',
 68 | }
 69 | 
 70 | CPU_SUBTYPE_MC680x0 = {
 71 |     1:     'MC680x0_ALL',
 72 |     2:     'MC68040',
 73 |     3:     'MC68030_ONLY',
 74 | }
 75 | 
 76 | CPU_SUBTYPE_MC88000 = {
 77 |     0:     'MC88000_ALL',
 78 |     1:     'MC88100',
 79 |     2:     'MC88110',
 80 | 
 81 | }
 82 | 
 83 | CPU_SUBTYPE_MIPS = {
 84 |     0:     'MIPS_ALL',
 85 |     1:     'MIPS_R2300',
 86 |     2:     'MIPS_R2600',
 87 |     3:     'MIPS_R2800',
 88 |     4:     'MIPS_R2000a',
 89 |     5:     'MIPS_R2000',
 90 |     6:     'MIPS_R3000a',
 91 |     7:     'MIPS_R3000',
 92 | }
 93 | 
 94 | CPU_SUBTYPE_MC98000 = {
 95 |     0:     'MC98000_ALL',
 96 |     1:     'MC98601',
 97 | }
 98 | 
 99 | CPU_SUBTYPE_POWERPC = {
100 |     0:     'POWERPC_ALL',
101 |     1:     'POWERPC_601',
102 |     2:     'POWERPC_602',
103 |     3:     'POWERPC_603',
104 |     4:     'POWERPC_603e',
105 |     5:     'POWERPC_603ev',
106 |     6:     'POWERPC_604',
107 |     7:     'POWERPC_604e',
108 |     8:     'POWERPC_620',
109 |     9:     'POWERPC_750',
110 |     10:    'POWERPC_7400',
111 |     11:    'POWERPC_7450',
112 |     100:   'POWERPC_970',
113 | }
114 | 
115 | CPU_SUBTYPE_SPARC = {
116 |     0:     'SPARC_ALL',
117 | }
118 | 
119 | CPU_SUBTYPE_VAX = {
120 |     0:     'VAX_ALL',
121 |     1:     'VAX780',
122 |     2:     'VAX785',
123 |     3:     'VAX750',
124 |     4:     'VAX730',
125 |     5:     'UVAXI',
126 |     6:     'UVAXII',
127 |     7:     'VAX8200',
128 |     8:     'VAX8500',
129 |     9:     'VAX8600',
130 |     10:    'VAX8650',
131 |     11:    'VAX8800',
132 |     12:    'UVAXIII',
133 | }
134 | 
135 | CPU_SUBTYPE_X86 = {
136 |     3:     'X86_ALL',
137 |     4:     'X86_ARCH1',
138 | }
139 | 
140 | CPU_SUBTYPE_X86_64 = {
141 |     3:     'X86_64_ALL',
142 | }
143 | 
144 | ### File Types ###
145 | 
146 | FILE_TYPE = {
147 |     1:     'MH_OBJECT', #relocatable object file
148 |     2:     'MH_EXECUTE', #demand page executable file
149 |     3:     'MH_FVMLIB', #fixed VM shared library file
150 |     4:     'MH_CORE', #core file
151 |     5:     'MH_PRELOAD', #preloaded executable file
152 |     6:     'MH_DYLIB', #dynamically bound shared library
153 |     7:     'MH_DYLINKER', #dynamic link editor
154 |     8:     'MH_BUNDLE', #dynamically bound bundle file
155 |     9:     'MH_DYLIB_STUB', #shared library stub for static linking only
156 |     10:    'MH_DSYM', #companion file with only debug sections
157 |     11:    'MH_KTEXT_BUNDLE', #x86_64 ktexts
158 | }
159 | 
160 | ### Mach-O Header Flags ###
161 | 
162 | MACHO_FLAGS = {
163 |     0x1:    'MH_NOUNDEFS', #the object file has no undefined references
164 |     0x2:    'MH_INCRLINK', # the object file is the output of an incremental link against a base file
165 |     0x4:    'MH_DYLDLINK',  # the object file is input for the dynamic linker
166 |     0x8:    'MH_BINDATLOAD', # the object file's undefined references are bound by the dynamic linker when loaded.
167 |     0x10:   'MH_PREBOUND', # the file has its dynamic undefined references prebound.
168 |     0x20:   'MH_SPLIT_SEGS', # the file has its read-only and read-write segments split
169 |     0x40:   'MH_LAZY_INIT', #the shared library init routine is to be run lazily via catching memory faults to its writeable segments (obsolete)
170 |     0x80:   'MH_TWOLEVEL', # the image is using two-level name space bindings
171 |     0x100:  'MH_FORCE_FLAT', # the executable is forcing all images to use flat name space bindings
172 |     0x200:  'MH_NOMULTIDEFS', #this umbrella guarantees no multiple defintions of symbols in its sub-images
173 |     0x400:  'MH_NOFIXPREBINDING', # do not have dyld notify the prebinding agent about this executable
174 |     0x800:  'MH_PREBINDABLE', #the binary is not prebound but can have its prebinding redone. only used  when MH_PREBOUND is not set.
175 |     0x1000: 'MH_ALLMODSBOUND', # indicates that this binary binds to all two-level namespace modules of its dependent libraries. only used when MH_PREBINDABLE and MH_TWOLEVEL are both set.
176 |     0x2000: 'MH_SUBSECTIONS_VIA_SYMBOLS', # safe to divide up the sections into sub-sections via symbols for dead code stripping
177 |     0x4000: 'MH_CANONICAL', # the binary has been canonicalized via the unprebind operation
178 |     0x8000: 'MH_WEAK_DEFINES', # the final linked image contains external weak symbols
179 |     0x10000: 'MH_BINDS_TO_WEAK', # the final linked image uses weak symbols
180 |     0x20000: 'MH_ALLOW_STACK_EXECUTION', # When this bit is set, all stacks in the task will be given stack execution privilege.  Only used in MH_EXECUTE filetypes.
181 |     0x40000: 'MH_ROOT_SAFE', #When this bit is set, the binary declares it is safe for use in processes with uid zero
182 |     0x80000: 'MH_SETUID_SAFE', # When this bit is set, the binary declares it is safe for use in processes when issetugid() is true
183 |     0x100000: 'MH_NO_REEXPORTED_DYLIBS', # When this bit is set on a dylib, the static linker does not need to examine dependent dylibs to see if any are re-exported
184 |     0x200000: 'MH_PIE', #When this bit is set, the OS will load the main executable at a random address.  Only used in MH_EXECUTE filetypes.
185 |     0x400000: 'MH_DEAD_STRIPPABLE_DYLIB', #Only for use on dylibs.  When linking against a dylib that has this bit set, the static linker will automatically not create a
186 |                          # LC_LOAD_DYLIB load command to the dylib if no symbols are being referenced from the dylib.
187 |     0x800000: 'MH_HAS_TLV_DESCRIPTORS', # Contains a section of type S_THREAD_LOCAL_VARIABLES
188 |     0x1000000: 'MH_NO_HEAP_EXECUTION', # When this bit is set, the OS will run the main executable with a non-executable heap even on
189 |                        # platforms (e.g. i386) that don't require it. Only used in MH_EXECUTE filetypes.
190 | }
191 | 
192 | ### Mach-O Load Commands ###
193 | 
194 | LOAD_CMDS = {
195 |     # After MacOS X 10.1 when a new load command is added that is required to be
196 |     # understood by the dynamic linker for the image to execute properly the
197 |     # LC_REQ_DYLD bit will be or'ed into the load command constant.
198 |     0x80000000:  'LC_REQ_DYLD ',
199 | 
200 |     0x1:    'LC_SEGMENT', # segment of this file to be mapped
201 |     0x2:    'LC_SYMTAB', # link-edit stab symbol table info
202 |     0x3:    'LC_SYMSEG', # link-edit gdb symbol table info (obsolete)
203 |     0x4:    'LC_THREAD', # thread
204 |     0x5:    'LC_UNIXTHREAD', # unix thread (includes a stack), replaced by LC_MAIN for OS X 10.8+
205 |     0x6:    'LC_LOADFVMLIB', # load a specified fixed VM shared library
206 |     0x7:    'LC_IDFVMLIB', # fixed VM shared library identification
207 |     0x8:    'LC_IDENT', # object identification info (obsolete)
208 |     0x9:    'LC_FVMFILE', # fixed VM file inclusion (internal use)
209 |     0xA:    'LC_PREPAGE', # prepage command (internal use)
210 |     0xB:    'LC_DYSYMTAB',# dynamic link-edit symbol table info
211 |     0xC:    'LC_LOAD_DYLIB', #load a dynamically linked shared library
212 |     0xD:    'LC_ID_DYLIB', # dynamically linked shared library identification
213 |     0xE:    'LC_LOAD_DYLINKER', # load a dynamic linker
214 |     0xF:    'LC_ID_DYLINKER', # dynamic linker identification
215 |     0x10:   'LC_PREBOUND_DYLIB', # modules prebound for a dynamically linked shared library
216 |     0x11:   'LC_ROUTINES', # image routines
217 |     0x12:   'LC_SUB_FRAMEWORK', # sub framework
218 |     0x13:   'LC_SUB_UMBRELLA', # sub umbrella
219 |     0x14:   'LC_SUB_CLIENT', # sub client
220 |     0x15:   'LC_SUB_LIBRARY', # sub library
221 |     0x16:   'LC_TWOLEVEL_HINTS', # two-level namespace lookup hints
222 |     0x17:   'LC_PREBIND_CKSUM', # prebind checksum
223 |     0x18:   'LC_LOAD_WEAK_DYLIB',
224 |     0x19:   'LC_SEGMENT_64', #64-bit segment of this file to be mapped
225 |     0x1a:   'LC_ROUTINES_64', # 64-bit image routines
226 |     0x1b:   'LC_UUID', # the uuid
227 |     0x1c:   'LC_RPATH', # runpath additions
228 |     0x1d:   'LC_CODE_SIGNATURE', # location of code signature
229 |     0x1e:   'LC_SEGMENT_SPLIT_INFO', # location of info to split segments
230 |     0x1f:   'LC_REEXPORT_DYLIB', # load and re-export dylib
231 |     0x20:   'LC_LAZY_LOAD_DYLIB', # delay load of dylib until first use
232 |     0x21:   'LC_ENCRYPTION_INFO', # encrypted segment information
233 |     0x22:   'LC_DYLD_INFO', # compressed dyld information
234 |     0x80000022: 'LC_DYLD_INFO_ONLY', # (0x22|LC_REQ_DYLD) - compressed dyld information only
235 |     0x23:   'LC_LOAD_UPWARD_DYLIB', # load upward dylib
236 |     0x24:   'LC_VERSION_MIN_MACOSX', # build for MacOSX min OS version
237 |     0x25:   'LC_VERSION_MIN_IPHONEOS', # build for iPhoneOS min OS version
238 |     0x26:   'LC_FUNCTION_STARTS', # compressed table of function start addresses
239 |     0x27:   'LC_DYLD_ENVIRONMENT', # string for dyld to treat like environment variable
240 |     0x28:   'LC_MAIN', # replacement for LC_UNIXTHREAD
241 |     0x29:   'LC_DATA_IN_CODE', # table of non-instructions in __text
242 |     0x2a:   'LC_SOURCE_VERSION', # source version used to build binary
243 |     0x2b:   'LC_DYLIB_CODE_SIGN_DRS', # Code signing DRs copied from linked dylibs
244 |     0x2c:   'LC_ENCRYPTION_INFO_64', # 64-bit encrypted segment information
245 |     0x2d:   'LC_LINKER_OPTION', #linker options in MH_OBJECT files
246 | }
247 | 
248 | ### Section Flags ###
249 | 
250 | SECTION_TYPES = { #the lowest byte - note each section has only one type
251 |     0x0:    'S_REGULAR', # regular section
252 |     0x1:    'S_ZEROFILL', # zero fill on demand section
253 |     0x2:    'S_CSTRING_LITERALS', # section with only literal C strings
254 |     0x3:    'S_4BYTE_LITERALS', # section with only 4 byte literals
255 |     0x4:    'S_8BYTE_LITERALS', # section with only 8 byte literals
256 |     0x5:    'S_LITERAL_POINTERS', # section with only pointers to literals
257 |     0x6:    'S_NON_LAZY_SYMBOL_POINTERS', # section with only non-lazy symbol pointers
258 |     0x7:    'S_LAZY_SYMBOL_POINTERS', # section with only lazy symbol pointers
259 |     0x8:    'S_SYMBOL_STUBS', # section with only symbol stubs, byte size of stub in the reserved2 field
260 |     0x9:    'S_MOD_INIT_FUNC_POINTERS', #section with only function pointers for initialization
261 |     0xa:    'S_MOD_TERM_FUNC_POINTERS', # section with only function pointers for termination
262 |     0xb:    'S_COALESCED', # section contains symbols that are to be coalesced
263 |     0xc:    'S_GB_ZEROFILL', # zero fill on demand section (that can be larger than 4 gigabytes)
264 |     0xd:    'S_INTERPOSING', # section with only pairs of function pointers for interposing
265 |     0xe:    'S_16BYTE_LITERALS', # section with only 16 byte literals
266 |     0xf:    'S_DTRACE_DOF', # section contains DTrace Object Format
267 |     0x10:   'S_LAZY_DYLIB_SYMBOL_POINTERS', # section with only lazy symbol pointers to lazy loaded dylibs
268 |     #types for thread local variables (TLVs)
269 |     0x11:   'S_THREAD_LOCAL_REGULAR', # template of initial values for TLVs
270 |     0x12:   'S_THREAD_LOCAL_ZEROFILL', # template of initial values for TLVs
271 |     0x13:   'S_THREAD_LOCAL_VARIABLES', # TLV descriptors
272 |     0x14:   'S_THREAD_LOCAL_VARIABLE_POINTERS', # pointers to TLV descriptors
273 |     0x15:   'S_THREAD_LOCAL_INIT_FUNCTION_POINTERS', # functions to call to initialize TLV values
274 | }
275 | 
276 | SECTION_ATTR = { #a section can have multiple attributes - high 3 bytes
277 |     #User-settable attributes -  first byte
278 |     0x80000000:  'S_ATTR_PURE_INSTRUCTIONS', # section contains only true machine instructions
279 |     0x40000000:  'S_ATTR_NO_TOC', # section contains coalesced symbols that are not to be in a ranlib table of contents
280 |     0x20000000:  'S_ATTR_STRIP_STATIC_SYMS', # ok to strip static symbols in this section in files with the MH_DYLDLINK flag
281 |     0x10000000:  'S_ATTR_NO_DEAD_STRIP', # no dead stripping
282 |     0x08000000:  'S_ATTR_LIVE_SUPPORT', # blocks are live if they reference live blocks
283 |     0x04000000:  'S_ATTR_SELF_MODIFYING_CODE', # Used with i386 code stubs written on by dyld
284 |     0x02000000:  'S_ATTR_DEBUG', # a debug section
285 |     #System-settable attributes - next two bytes
286 |     0x00000400:  'S_ATTR_SOME_INSTRUCTIONS', # section contains some machine instructions
287 |     0x00000200:  'S_ATTR_EXT_RELOC', # section has external relocation entries
288 |     0x00000100:  'S_ATTR_LOC_RELOC', # section has local relocation entries
289 | }
290 | 
291 | ### Virtual Memory Protection Flags - see mach/vm_prot.h ###
292 | 
293 | VM_PROT = {
294 |     0x00:        'VM_PROT_NONE',
295 |     0x01:        'VM_PROT_READ', #read permission
296 |     0x02:        'VM_PROT_WRITE', #write permission
297 |     0x04:        'VM_PROT_EXECUTE', #execute permission
298 |     0x08:        'VM_PROT_NO_CHANGE', #technically invalid, only used by memory_object_lock_request
299 |     0x10:        'VM_PROT_COPY', #when the caller cannot obtain write permission, this can be used to make a working copy
300 |     0x10:        'VM_PROT_WANTS_COPY', #only used by memory_object_data_request
301 | 
302 |     (0x01 | 0x02):  'VM_PROT_DEFAULT (rw)', #read and write permissions, the default for new virtual memory
303 |     (0x01 | 0x02 | 0x04):   'VM_PROT_ALL (rwe)', #max possible permissions, used for parameter checking
304 | }


--------------------------------------------------------------------------------
/modules/processing/static_macho.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (2014) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive license for use of 
  3 | this work by or on behalf of the U.S. Government. 
  4 | NOTICE:
  5 | For five (5) years from  the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly, by or on behalf of the Government. There is provision for the possible extension of the term of this license. Subsequent to that period or any extension granted, the United States Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. The specific term of the license can be identified by inquiry made to Sandia Corporation or DOE.
  6 | NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
  7 | Any licensee of this software has the obligation and responsibility to abide by the applicable export control laws, regulations, and general prohibitions relating to the export of technical data. Failure to obtain an export control license or other authority from the Government may result in criminal liability under U.S. laws.
  8 | 
  9 | This script extracts the metadata, imports, and whatever else we can get statically from Mach-O and FAT files.
 10 | 
 11 | # Depends on python-magic (and libmagic) and macholib
 12 | # Macholib: https://pypi.python.org/pypi/macholib/
 13 | # Python-magic: https://github.com/ahupp/python-magic
 14 | # Libmagic Instructions: http://www.brambraakman.com/blog/comments/installing_libmagic_in_mac_os_x_for_python-magic/
 15 | 
 16 | """
 17 | 
 18 | import struct
 19 | from zipfile import ZipFile, BadZipfile
 20 | import os
 21 | import os.path
 22 | import plistlib
 23 | import logging
 24 | import shutil
 25 | import subprocess
 26 | 
 27 | #the magic library is used to identify the file type, since we can only handle certain kinds
 28 | try:
 29 |     import magic
 30 |     HAVE_MAGIC = True
 31 | except ImportError:
 32 |     HAVE_MAGIC = False
 33 | # The macholib library is used to handle the byte-parsing of a lot of the file structures
 34 | try:
 35 |     #import the parsing stuff from the macholib library
 36 |     from macholib.MachO import MachO
 37 |     #import the header constants
 38 |     from macholib.mach_o import *
 39 |     HAVE_MACHO = True
 40 | except ImportError:
 41 |     HAVE_MACHO = False
 42 | 
 43 | '''Cuckoo libraries'''
 44 | from lib.cuckoo.common.abstracts import Processing #the framework for all processing modules
 45 | from lib.cuckoo.common.exceptions import CuckooProcessingError #error to throw if something goes wrong
 46 | import modules.processing.macho_data as data #custom library of human-readable field conversions
 47 | 
 48 | #get logger
 49 | log = logging.getLogger()
 50 | 
 51 | 
 52 | class MachO_Parse:
 53 |     """Mach-O and FAT file static analysis"""
 54 | 
 55 |     def __init__(self, file_path):
 56 |          ### Class Variables ###
 57 |         self.file_name = file_path #a single file to parse
 58 | 
 59 |     def parse(self):
 60 |         """Parse the file's static attributes.
 61 |         @return: analysis results dict or None.
 62 |         """
 63 |         results = {}
 64 | 
 65 |         # get the signature info via the codesign utility
 66 |         args = ["codesign","-dvvvv", self.file_name]
 67 |         proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 68 |         output, error_output = proc.communicate()
 69 |         if proc.returncode: #error, probably file not signed
 70 |             results["signature"] = error_output
 71 |         else:
 72 |             results["signature"] = output
 73 | 
 74 |         #get the file object
 75 |         file_object = open(self.file_name, 'rb')
 76 | 
 77 | 
 78 |         #Use the macho library to parse out some structures
 79 |         pFile = MachO(self.file_name)
 80 | 
 81 |         #if this is a fat file, it will have multiple Mach-O objects inside it
 82 |         results["FAT_header"] = self.parseFATHeader(file_object, pFile)
 83 | 
 84 |         #parse all the Mach-O headers
 85 |         i = 1
 86 |         for h in pFile.headers:
 87 |             results["MachO_header" + str(i)] = self.parseMachOHeader(h, file_object)
 88 |             i +=1
 89 | 
 90 |         #close the file
 91 |         file_object.close()
 92 | 
 93 |         #return the dict of results
 94 |         return results
 95 | 
 96 |     def parseMachOHeader(self, header, file_object):
 97 |         results = {}
 98 |         m = header.MH_MAGIC
 99 |         #get down to the actual header info
100 |         h = header.header
101 | 
102 |         ### get human-readable strings ###
103 |         cpu_type = CPU_TYPE_NAMES.get(h.cputype, h.cputype)
104 |         results["cpu_type"] = cpu_type
105 |         #this needs a mask due to a couple high-bit types like lib64
106 |         hex_stype = (h.cpusubtype + (1 << 32)) % (1 << 32)  #because some numbers turn out negative when read
107 |         cpu_stype = self.getCPUSubtype(cpu_type, hex_stype & ~0xff000000)
108 |         #test for the high-bit ones
109 |         try:
110 |             s = self.getCPUSubtype('high', hex_stype & 0xff000000)
111 |             cpu_stype += ", " + s
112 |         except TypeError: #meaning no matches
113 |             pass
114 |         results["cpu_subtype"] = cpu_stype
115 |         #get the file type - library, executable, etc.
116 |         results["ftype"] = data.FILE_TYPE.get(int(h.filetype))
117 |         #get the list of flags
118 |         results["flags"] = self.getFlags(h.flags)
119 | 
120 | 
121 |         #reserved field only exists in 64-bit headers, so set to None if 32-bit
122 |         res = 'NULL'
123 |         if hasattr(h, 'reserved'):
124 |             res = h.reserved
125 |         results["reserved"] = res
126 | 
127 |         #parse the load commands
128 |         (results["load_commands"], results["sections"]) = self.parseLoadCommands(header.commands)
129 | 
130 |         #get a stand-alone list of the dynamically linked libraries, just for convenience
131 |         libs = []
132 |         for lib in header.walkRelocatables():
133 |             libs.append(lib[2])
134 |         results["DyLinkedLibs"] = libs
135 | 
136 |         #get the imports/exports from the symbol table
137 |         sym_cmd = header.getSymbolTableCommand() #get the symbol table load command from the header
138 |         dyn_cmd = header.getDynamicSymbolTableCommand() #get the dynamic symbol table load command from the header
139 |         (results["DefExtSymbols"], results["UndefExtSymbols"]) = self.parseSymbolTable(sym_cmd, dyn_cmd, file_object, header)
140 | 
141 | 
142 |         return results
143 | 
144 |     def parseLoadCommands(self, commands):
145 |         results = [] #list of all load commands to return
146 |         sections = []
147 | 
148 |         # Each command is a tuple with 3 entries
149 |         i = 0
150 |         for cmd in commands: #list of load commands for one MachO header
151 |             c = {}
152 |             #the first entry is a load_command structure, made up of the command type and its size
153 |             #get the human-readable command name
154 |             cmd_name = data.LOAD_CMDS.get(cmd[0].cmd)
155 |             #test for the high-bit LC_REQ_DYLD
156 |             if cmd_name is None:
157 |                 s1 = data.LOAD_CMDS.get(cmd[0].cmd & 0xff000000) #check the high bit separately
158 |                 cmd_name = s1
159 |                 s2 = data.LOAD_CMDS.get(cmd[0].cmd & 0x00ffffff) #check the low bits
160 |                 cmd_name += ", " + s2
161 |             c["cmd_name"] = cmd_name
162 |             c["cmd_size"] = int(cmd[0].cmdsize)
163 | 
164 |             #the second entry in the tuple is the actual content of the command, which varies depending on the command
165 |             # Since we can't predict the content without an excessively long switch statement, we just pull out
166 |             # the structure attributes directly as a dict. It's not elegant but it works.
167 |             c["cmd_content"] = cmd[1].__dict__["_objects_"]
168 | 
169 |             #sometimes some of the dict objects will create JSON errors
170 |             for key in c["cmd_content"]:
171 |                 if isinstance(c["cmd_content"][key], str): #these strings are ASCII, and sometimes they don't play well with UTF-8
172 |                     c["cmd_content"][key] = c["cmd_content"][key].decode('utf-8', 'ignore').strip('\u0000')
173 |                 if isinstance(c["cmd_content"][key], mach_version_helper): #these are Python objects JSON can't handle
174 |                     c["cmd_content"][key] = c["cmd_content"][key].__dict__["_objects_"]
175 | 
176 | 
177 |             # the third thing in the tuple is a string used by the command (usually a library/framework name)
178 |             if "LC_SEGMENT" in c["cmd_name"]: #unless its a segment, then the sections need to be parsed
179 |                 #get human-readable memory flags for the segment
180 |                 c["cmd_content"]["maxprot"] = self.getMemProt(c["cmd_content"]["maxprot"])
181 |                 c["cmd_content"]["initprot"] = self.getMemProt(c["cmd_content"]["initprot"])
182 |                 #parse the sections in the segment
183 |                 for sec in cmd[2]:
184 |                     sec2 = sec.__dict__["_objects_"]
185 | 
186 |                     #parse the flags of sections into human-readable text
187 |                     # There is a type flag and one or more attribute flags in the 4-byte field
188 |                     sec2["flags"] = self.parseSectionFlags(sec2["flags"])
189 | 
190 |                     #add the section to the list
191 |                     sections.append(sec.__dict__["_objects_"])
192 |             else:
193 |                 c["strings"] = cmd[2]
194 |                 if isinstance(c["strings"], str): #these strings are ASCII, and sometimes they don't play well with UTF-8
195 |                     c["strings"] = c["strings"].decode('utf-8', 'ignore')
196 | 
197 |             results.append(c) #add the command to the list
198 |             i += 1
199 | 
200 | 
201 |         return (results, sections)
202 | 
203 |     def getMemProt(self, flags):
204 |         #check to see if it has a single dict value
205 |         if not data.VM_PROT.get(flags) is None:
206 |             return data.VM_PROT.get(flags)
207 |         else:
208 |             f = '' #string to hold the flags
209 |             for i in range(0, 31): #flags are each one bit, so check each bit in 4 bytes
210 |                 mask = 1 << i
211 |                 flag = flags & mask
212 |                 if flag in data.VM_PROT and flag != 0:
213 |                     if f != '':
214 |                         f += ", "
215 |                     f += data.VM_PROT.get(flag)
216 |             return f
217 | 
218 |     def parseSectionFlags(self, flags):
219 |         f = '' #variable to store all the flags in
220 | 
221 |         #get the type - stored in the lowest byte
222 |         type = data.SECTION_TYPES.get(flags & 0x000000ff)
223 |         if not type is None:
224 |             f += type
225 | 
226 |         #get the user-settable attributes - highest byte
227 |         a1 = data.SECTION_ATTR.get(flags & 0xff000000)
228 |         if not a1 is None:
229 |             f += ", " + a1
230 | 
231 |         #get the system-settable attributes - middle two bytes
232 |         a2 = data.SECTION_ATTR.get(flags & 0x00ffff00)
233 |         if not a2 is None:
234 |             f += ", " + a2
235 | 
236 |         return f
237 | 
238 | 
239 |     def parseSymbolTable(self, sym_cmd, dyn_cmd, file_object, header):
240 |         if dyn_cmd is None or sym_cmd is None:
241 |             return
242 |         try:
243 |             offset = header.offset
244 |             endian = header.endian
245 |             # The symbol table is actually made up of several partitions. These partitions and their offsets
246 |             # are listed in the LC_DYSYMTAB load command.
247 |             symbols = []
248 |             # the human-readable string of the symbol table are actually stored in the strings table, so get those
249 |             #go to the beginning of the strings table, offset from the beginning of the Mach-O object
250 |             file_object.seek(0)
251 |             file_object.seek(sym_cmd.stroff+offset)
252 |             #file_object.seek(sym_cmd.stroff, offset) #for some reason this throws an IOError
253 |             strs = file_object.read(sym_cmd.strsize) #read in the entire string table
254 |             #each string is null (00) terminated, so you can split on that
255 |             # however the indexes to the string table are byte offsets, so this is not necessary really
256 |             # strings = strs.split('\x00')
257 | 
258 |             # go to the beginning of the symbol table
259 |             file_object.seek(0)
260 |             file_object.seek(sym_cmd.symoff+offset)
261 |             undef = [] #undefined external symbols
262 |             defined = [] #defined external symbols
263 |             #seek to the beginning index of the defined external symbols
264 |             for i in xrange(dyn_cmd.iextdefsym):
265 |                 file_object.read(12)
266 |                 #if this is a 64-bit object file, there will be an extra 4 blank bytes
267 |                 if isinstance(header.header, mach_header_64):
268 |                     file_object.read(4)
269 | 
270 |             #read the number of defined external symbols specified in LC_DYSYMTAB
271 |             for i in xrange(dyn_cmd.nextdefsym):
272 |                 # get the index to the strings table - this is 4 bytes long
273 |                 t = file_object.read(4)
274 |                 # the endian of the Mach-O object is in the header
275 |                 index = struct.unpack(endian+'L', t)[0]
276 |                 #indirect.append(''.join('%02x' % ord(byte) for byte in t))
277 |                 file_object.read(8) #skip the rest of the symbol table entry - 8 bytes total
278 |                 #if this is a 64-bit object file, there will be an extra 4 blank bytes
279 |                 if isinstance(header.header, mach_header_64):
280 |                     file_object.read(4)
281 | 
282 |                 if index == 0: # a null string has an index of 0
283 |                     defined.append('NULL')
284 |                 else: #get the human-readable string at the index
285 |                     str = ''
286 |                     b = strs[index]
287 |                     i = 0
288 |                     while (b != b'\x00'):
289 |                         str = str + b
290 |                         i += 1
291 |                         b = strs[index+i]
292 |                     defined.append(str)
293 | 
294 |             #read the number of undefined external symbols specified in LC_DYSYMTAB
295 |             for i in xrange(dyn_cmd.nundefsym):
296 |                 # get the index to the strings table - this is 4 bytes long
297 |                 t = file_object.read(4)
298 |                 # the endian of the Mach-O object is in the header
299 |                 index = struct.unpack(endian+'L', t)[0]
300 |                 #indirect.append(''.join('%02x' % ord(byte) for byte in t))
301 |                 file_object.read(8) #skip the rest of the symbol table entry - 8 bytes total
302 |                 #if this is a 64-bit object file, there will be an extra 4 blank bytes
303 |                 if isinstance(header.header, mach_header_64):
304 |                     file_object.read(4)
305 | 
306 |                 if index == 0: # a null string has an index of 0
307 |                     undef.append('NULL')
308 |                 else: #get the human-readable string at the index
309 |                     str = ''
310 |                     b = strs[index]
311 |                     i = 0
312 |                     while (b != b'\x00'):
313 |                         str = str + b
314 |                         i += 1
315 |                         b = strs[index+i]
316 |                     undef.append(str)
317 |         except:
318 |             defined = "Error: malformed symbol table"
319 |             undef = []
320 | 
321 |         return (defined, undef)
322 | 
323 |     def getFlags(self, flags):
324 |         f = '' #string to hold the flags
325 |         for i in range(0, 31): #flags are each one bit, so check each bit in 4 bytes
326 |             mask = 1 << i
327 |             flag = flags & mask
328 |             if flag in data.MACHO_FLAGS:
329 |                 if f != '':
330 |                     f += ", "
331 |                 f += data.MACHO_FLAGS.get(flag)
332 |         return f
333 | 
334 |     def parseFATHeader(self, f, pFile):
335 |         results = {}
336 |         #If this is a FAT file, it will have an extra header
337 |         if not (pFile.fat is None):
338 | 
339 |             #insert the main FAT header fields
340 |             results["Magic"] = pFile.fat.magic
341 |             results["n_arch"] = pFile.fat.nfat_arch
342 | 
343 |             #seek past the first couple FAT header fields (2 fields, 4 bytes each)
344 |             f.seek(8)
345 |             #parse the sub-file object structures (fat_arch structures)
346 |             archs = [fat_arch.from_fileobj(f) for i in range(pFile.fat.nfat_arch)]
347 |             a_results = {}
348 |             for a in archs:
349 |                 ar = {}
350 |                 #get human-readable names
351 |                 cpu_type = CPU_TYPE_NAMES.get(a.cputype, a.cputype)
352 |                 cpu_stype = self.getCPUSubtype(cpu_type, a.cpusubtype)
353 | 
354 |                 ar["cpu_subtype"] = cpu_stype
355 |                 ar["offset"] = a.offset
356 |                 ar["size"] = a.size
357 |                 ar["alignment"] = a.align
358 |                 a_results[cpu_type] = ar
359 | 
360 |             results["archs"] = a_results
361 | 
362 |         return results
363 | 
364 |     '''Get the human-readable cpu subtype.
365 |     This is a bit complicate because there seems to be no defined mapping for cpu_type to cpu_subtype, so I had to guess for some.
366 |     ctype = human-readable cpu_type
367 |     stype = cpu_subtype '''
368 |     def getCPUSubtype(self, ctype, stype):
369 |         if 'ARM' in ctype:
370 |             return data.CPU_SUBTYPE_ARM.get(stype)
371 |         elif 'HPPA' in ctype:
372 |             return data.CPU_SUBTYPE_HPPA.get(stype)
373 |         elif 'i860' in ctype:
374 |             return data.CPU_SUBTYPE_I860.get(stype)
375 |         elif 'i386' in ctype:
376 |             return data.CPU_SUBTYPE_I386.get(stype)
377 |         elif 'MC68' in ctype:
378 |             return data.CPU_SUBTYPE_MC680x0.get(stype)
379 |         elif 'MC88' in ctype:
380 |             return data.CPU_SUBTYPE_MC88000.get(stype)
381 |         elif 'MC98' in ctype:
382 |             return data.CPU_SUBTYPE_MC98000.get(stype)
383 |         elif 'MIPS' in ctype:
384 |             return data.CPU_SUBTYPE_MIPS.get(stype)
385 |         elif 'PowerPC' in ctype:
386 |             return data.CPU_SUBTYPE_POWERPC.get(stype)
387 |         elif 'SPARC' in ctype:
388 |             return data.CPU_SUBTYPE_SPARC.get(stype)
389 |         elif 'VAX' in ctype:
390 |             return data.CPU_SUBTYPE_VAX.get(stype)
391 |         elif 'x86_64' in ctype:
392 |             return data.CPU_SUBTYPE_X86_64.get(stype)
393 |         elif 'x86' in ctype:
394 |             return data.CPU_SUBTYPE_X86.get(stype)
395 |         elif 'high' in ctype:
396 |             data.CPU_SUBTYPE_HIGH.get(stype)
397 |         else:
398 |             return data.CPU_SUBTYPE_ANY.get(stype)
399 | 
400 | 
401 | class StaticMac(Processing):
402 |     """
403 |     The class that is actually called by Cuckoo when the processing modules are run.
404 |     It collects the results from the MachO class, which does all the real work.
405 |     """
406 | 
407 |     def run(self):
408 |         """
409 |         Run the analysis.
410 |         @return: results dict.
411 |         """
412 |         #This is the name of the subcontainer Cuckoo will use for the returned data
413 |         self.key = "static_macho"
414 |         static_macho = {} #the dictionary to store the results in
415 | 
416 |         if self.task["category"] == "file": #If cuckoo analyzes a file, not a URL
417 |             if HAVE_MACHO and HAVE_MAGIC: #if the proper libraries are installed
418 |                 if not (self.file_path is None): #if the file exists
419 |                     kind = magic.from_file(self.file_path) #get the file type
420 |                     #if it is Mach-O, parse it. Note FAT files are listed as Mach-O with multiple architectures
421 |                     if not (kind is None) and ("Mach-O" in kind):
422 |                         static_macho = MachO_Parse(self.file_path).parse()
423 |                     elif not (kind is None) and ("Zip" in kind): #could be an app file
424 |                         log.info(".zip file found, checking for executables inside")
425 |                         static_macho = self.handleZip()
426 |                     else:
427 |                         log.info("File is not Mach-O or FAT file, quitting module")
428 | 
429 |         return static_macho
430 | 
431 |     """
432 |     .app files contain Mach-O files, but they have to be submitted as zips
433 |     This attempts to analyze the main executable if it is a .app file.
434 |     """
435 |     def handleZip(self):
436 |         root = os.environ["TMPDIR"]
437 |         static_macho = {} #the dictionary to store the results in
438 | 
439 |         with ZipFile(self.file_path, "r") as archive:
440 |             zipinfos = archive.namelist()
441 | 
442 |             if not len(zipinfos): #this is an empty zip file
443 |                 return static_macho
444 | 
445 |             try:
446 |                 exec_file = ""
447 |                 for z in zipinfos:
448 |                     if z.endswith(".app") or z.endswith(".app/"): #there is an app file
449 |                         # extract the Info.plist
450 |                         try:
451 |                             plist_path = archive.open(z + "Contents/Info.plist")
452 |                             plist = plistlib.readPlist(plist_path)
453 |                             exec_file = plist["CFBundleExecutable"]
454 |                         except KeyError:
455 |                             log.info("Malformed .app file " + z + ", aborting static analysis")
456 |                         break
457 | 
458 |                 if exec_file == "":
459 |                     return static_macho
460 | 
461 |                 for z in zipinfos:
462 |                     if os.path.basename(z) == exec_file:
463 |                         #write out the executable file with only read permissions
464 |                         outpath = os.path.join(root, os.path.basename(z))
465 |                         out1 = open(outpath, "w+")
466 |                         out1.close()
467 |                         os.chmod(outpath, 0664)
468 |                         #write the file out
469 |                         out2 = open(outpath, "w+")
470 |                         bytes = archive.read(z)
471 |                         out2.write(bytes)
472 |                         out2.close()
473 |                         #parse the file
474 |                         static_macho = MachO_Parse(outpath).parse()
475 |                         #delete the file
476 |                         os.remove(outpath)
477 | 
478 |             except BadZipfile:
479 |                 log.error("Unable to open zip file")
480 |                 return static_macho
481 |             except RuntimeError:
482 |                 return static_macho
483 | 
484 | 


--------------------------------------------------------------------------------