Package lib :: Package cuckoo :: Package common :: Module netlog
[hide private]
[frames] | no frames]

Source Code for Module lib.cuckoo.common.netlog

  1  # Copyright (C) 2010-2015 Cuckoo Foundation. 
  2  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
  3  # See the file 'docs/LICENSE' for copying permission. 
  4   
  5  import logging 
  6  import struct 
  7  import datetime 
  8  import string 
  9   
 10  try: 
 11      import bson 
 12      HAVE_BSON = True 
 13  except ImportError: 
 14      HAVE_BSON = False 
 15  else: 
 16      # The BSON module provided by pymongo works through its "BSON" class. 
 17      if hasattr(bson, "BSON"): 
 18          bson_decode = lambda d: bson.BSON(d).decode() 
 19      # The BSON module provided by "pip install bson" works through the 
 20      # "loads" function (just like pickle etc.) 
 21      elif hasattr(bson, "loads"): 
 22          bson_decode = lambda d: bson.loads(d) 
 23      else: 
 24          HAVE_BSON = False 
 25   
 26  from lib.cuckoo.common.defines import REG_SZ, REG_EXPAND_SZ 
 27  from lib.cuckoo.common.defines import REG_DWORD_BIG_ENDIAN 
 28  from lib.cuckoo.common.defines import REG_DWORD_LITTLE_ENDIAN 
 29  from lib.cuckoo.common.exceptions import CuckooResultError 
 30  from lib.cuckoo.common.logtbl import table as LOGTBL 
 31  from lib.cuckoo.common.utils import get_filename_from_path 
 32   
 33  log = logging.getLogger(__name__) 
 34   
 35   
 36  # TODO: should probably prettify this. 
37 -def expand_format(fs):
38 out = "" 39 i = 0 40 while i < len(fs): 41 x = fs[i] 42 if x in string.digits: 43 out += fs[i+1] * int(x) 44 i += 1 45 else: 46 out += x 47 i += 1 48 return out
49 50 51 ############################################################################### 52 # Custom Cuckoomon "Netlog" protocol - by skier and rep. 53 # Kind of deprecated, more generic BSON protocol below. 54 ############################################################################### 55
56 -class NetlogParser(object):
57 - def __init__(self, handler):
58 self.handler = handler 59 60 self.formatmap = { 61 "s": self.read_string, 62 "S": self.read_string, 63 "u": self.read_string, 64 "U": self.read_string, 65 "b": self.read_buffer, 66 "B": self.read_buffer, 67 "i": self.read_int32, 68 "l": self.read_int32, 69 "L": self.read_int32, 70 "p": self.read_ptr, 71 "P": self.read_ptr, 72 "o": self.read_string, 73 "O": self.read_string, 74 "a": self.read_argv, 75 "A": self.read_argv, 76 "r": self.read_registry, 77 "R": self.read_registry, 78 }
79
80 - def close(self):
81 pass
82
83 - def read_next_message(self):
84 apiindex, status = struct.unpack("BB", self.handler.read(2)) 85 returnval, tid, timediff = struct.unpack("III", self.handler.read(12)) 86 context = apiindex, status, returnval, tid, timediff 87 88 if apiindex == 0: 89 # New process message. 90 timelow = self.read_int32() 91 timehigh = self.read_int32() 92 # FILETIME is 100-nanoseconds from 1601 :/ 93 vmtimeunix = (timelow + (timehigh << 32)) 94 vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 95 try: 96 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) 97 except: 98 log.critical("vmtime in new-process-messsage out of range " 99 "(protocol out of sync?)") 100 return False 101 102 pid = self.read_int32() 103 ppid = self.read_int32() 104 105 try: 106 modulepath = self.read_string() 107 procname = get_filename_from_path(modulepath) 108 except: 109 log.exception("Exception in netlog protocol, stopping parser.") 110 return False 111 112 if len(procname) > 255: 113 log.critical("Huge process name (>255), assuming netlog " 114 "protocol out of sync.") 115 log.debug("Process name: %s", repr(procname)) 116 return False 117 118 self.handler.log_process(context, vmtime, pid, ppid, 119 modulepath, procname) 120 121 elif apiindex == 1: 122 # New thread message. 123 pid = self.read_int32() 124 self.handler.log_thread(context, pid) 125 126 else: 127 # Actual API call. 128 try: 129 apiname, modulename, parseinfo = LOGTBL[apiindex] 130 except IndexError: 131 log.debug("Netlog LOGTBL lookup error for API index {0} " 132 "(pid={1}, tid={2})".format(apiindex, None, tid)) 133 return False 134 135 formatspecifiers = expand_format(parseinfo[0]) 136 argnames = parseinfo[1:] 137 arguments = [] 138 for pos in range(len(formatspecifiers)): 139 fs = formatspecifiers[pos] 140 argname = argnames[pos] 141 fn = self.formatmap.get(fs, None) 142 if fn: 143 try: 144 r = fn() 145 except: 146 log.exception("Exception in netlog protocol, " 147 "stopping parser.") 148 return False 149 150 arguments.append((argname, r)) 151 else: 152 log.warning("No handler for format specifier {0} on " 153 "apitype {1}".format(fs, apiname)) 154 155 self.handler.log_call(context, apiname, modulename, arguments) 156 157 return True
158
159 - def read_int32(self):
160 """Reads a 32bit integer from the socket.""" 161 return struct.unpack("I", self.handler.read(4))[0]
162
163 - def read_ptr(self):
164 """Read a pointer from the socket.""" 165 value = self.read_int32() 166 return "0x%08x" % value
167
168 - def read_string(self):
169 """Reads an utf8 string from the socket.""" 170 length, maxlength = struct.unpack("II", self.handler.read(8)) 171 if length < 0 or length > 0x10000: 172 log.critical("read_string length weirdness " 173 "length: %d maxlength: %d", length, maxlength) 174 raise CuckooResultError("read_string length failure, " 175 "protocol broken?") 176 177 s = self.handler.read(length) 178 if maxlength > length: 179 s += "... (truncated)" 180 return s
181
182 - def read_buffer(self):
183 """Reads a memory socket from the socket.""" 184 length, maxlength = struct.unpack("II", self.handler.read(8)) 185 # Only return the maxlength, as we don't log the actual 186 # buffer right now. 187 buf = self.handler.read(length) 188 if maxlength > length: 189 buf += " ... (truncated)" 190 return buf
191
192 - def read_registry(self):
193 """Read logged registry data from the socket.""" 194 typ = struct.unpack("I", self.handler.read(4))[0] 195 # Do something depending on type. 196 if typ == REG_DWORD_BIG_ENDIAN or typ == REG_DWORD_LITTLE_ENDIAN: 197 value = self.read_int32() 198 elif typ == REG_SZ or typ == REG_EXPAND_SZ: 199 value = self.read_string() 200 else: 201 value = "(unable to dump buffer content)" 202 return value
203
204 - def read_list(self, fn):
205 """Reads a list of _fn_ from the socket.""" 206 count = struct.unpack("I", self.handler.read(4))[0] 207 ret = [] 208 for x in xrange(count): 209 item = fn() 210 ret.append(item) 211 return ret
212
213 - def read_argv(self):
214 return self.read_list(self.read_string)
215 216 217 ############################################################################### 218 # Generic BSON based protocol - by rep 219 # Allows all kinds of languages / sources to generate input for Cuckoo, 220 # thus we can reuse report generation / signatures for other API trace sources. 221 ############################################################################### 222 223 TYPECONVERTERS = { 224 "p": lambda v: "0x%08x" % default_converter(v), 225 } 226 227 # 20 Mb max message length. 228 MAX_MESSAGE_LENGTH = 20 * 1024 * 1024 229
230 -def default_converter(v):
231 # Fix signed ints (bson is kind of limited there). 232 if type(v) in (int, long) and v < 0: 233 return v + 0x100000000 234 return v
235
236 -def check_names_for_typeinfo(arginfo):
237 argnames = [i[0] if type(i) in (list, tuple) else i for i in arginfo] 238 239 converters = [] 240 for i in arginfo: 241 if type(i) in (list, tuple): 242 r = TYPECONVERTERS.get(i[1], None) 243 if not r: 244 log.debug("Analyzer sent unknown format " 245 "specifier '{0}'".format(i[1])) 246 r = default_converter 247 converters.append(r) 248 else: 249 converters.append(default_converter) 250 251 return argnames, converters
252 253
254 -class BsonParser(object):
255 - def __init__(self, handler):
256 self.handler = handler 257 self.infomap = {} 258 259 if not HAVE_BSON: 260 log.critical("Starting BsonParser, but bson is not available! (install with `pip install bson`)")
261
262 - def close(self):
263 pass
264
265 - def read_next_message(self):
266 data = self.handler.read(4) 267 blen = struct.unpack("I", data)[0] 268 if blen > MAX_MESSAGE_LENGTH: 269 log.critical("BSON message larger than MAX_MESSAGE_LENGTH, " 270 "stopping handler.") 271 return False 272 273 data += self.handler.read(blen-4) 274 275 try: 276 dec = bson_decode(data) 277 except Exception as e: 278 log.warning("BsonParser decoding problem {0} on " 279 "data[:50] {1}".format(e, repr(data[:50]))) 280 return False 281 282 mtype = dec.get("type", "none") 283 index = dec.get("I", -1) 284 tid = dec.get("T", 0) 285 time = dec.get("t", 0) 286 287 context = [index, 1, 0, tid, time] 288 289 if mtype == "info": 290 # API call index info message, explaining the argument names, etc. 291 name = dec.get("name", "NONAME") 292 arginfo = dec.get("args", []) 293 category = dec.get("category") 294 295 # Bson dumps that were generated before cuckoomon exported the 296 # "category" field have to get the category using the old method. 297 if not category: 298 # Try to find the entry/entries with this api name. 299 category = [_ for _ in LOGTBL if _[0] == name] 300 301 # If we found an entry, take its category, otherwise we take 302 # the default string "unknown." 303 category = category[0][1] if category else "unknown" 304 305 argnames, converters = check_names_for_typeinfo(arginfo) 306 self.infomap[index] = name, arginfo, argnames, converters, category 307 308 elif mtype == "debug": 309 log.info("Debug message from monitor: " 310 "{0}".format(dec.get("msg", ""))) 311 312 elif mtype == "new_process": 313 # new_process message from VMI monitor. 314 vmtime = datetime.datetime.fromtimestamp(dec.get("starttime", 0)) 315 procname = dec.get("name", "NONAME") 316 ppid = 0 317 modulepath = "DUMMY" 318 319 self.handler.log_process(context, vmtime, None, ppid, 320 modulepath, procname) 321 322 else: 323 # Regular api call. 324 if index not in self.infomap: 325 log.warning("Got API with unknown index - monitor needs " 326 "to explain first: {0}".format(dec)) 327 return True 328 329 apiname, arginfo, argnames, converters, category = self.infomap[index] 330 args = dec.get("args", []) 331 332 if len(args) != len(argnames): 333 log.warning("Inconsistent arg count (compared to arg names) " 334 "on {2}: {0} names {1}".format(dec, argnames, 335 apiname)) 336 return True 337 338 argdict = dict((argnames[i], converters[i](args[i])) 339 for i in range(len(args))) 340 341 if apiname == "__process__": 342 # Special new process message from cuckoomon. 343 timelow = argdict["TimeLow"] 344 timehigh = argdict["TimeHigh"] 345 # FILETIME is 100-nanoseconds from 1601 :/ 346 vmtimeunix = (timelow + (timehigh << 32)) 347 vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 348 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) 349 350 pid = argdict["ProcessIdentifier"] 351 ppid = argdict["ParentProcessIdentifier"] 352 modulepath = argdict["ModulePath"] 353 procname = get_filename_from_path(modulepath) 354 355 self.handler.log_process(context, vmtime, pid, ppid, 356 modulepath, procname) 357 return True 358 359 elif apiname == "__thread__": 360 pid = argdict["ProcessIdentifier"] 361 self.handler.log_thread(context, pid) 362 return True 363 364 # elif apiname == "__anomaly__": 365 # tid = argdict["ThreadIdentifier"] 366 # subcategory = argdict["Subcategory"] 367 # msg = argdict["Message"] 368 # self.handler.log_anomaly(subcategory, tid, msg) 369 # return True 370 371 context[1] = argdict.pop("is_success", 1) 372 context[2] = argdict.pop("retval", 0) 373 arguments = argdict.items() 374 arguments += dec.get("aux", {}).items() 375 376 self.handler.log_call(context, apiname, category, arguments) 377 378 return True
379