1
2
3
4
5 import os
6 import re
7 import struct
8 import socket
9 import logging
10 from urlparse import urlunparse
11
12 from lib.cuckoo.common.abstracts import Processing
13 from lib.cuckoo.common.config import Config
14 from lib.cuckoo.common.dns import resolve
15 from lib.cuckoo.common.irc import ircMessage
16 from lib.cuckoo.common.objects import File
17 from lib.cuckoo.common.utils import convert_to_printable
18 from lib.cuckoo.common.exceptions import CuckooProcessingError
19
20 try:
21 import dpkt
22 IS_DPKT = True
23 except ImportError:
24 IS_DPKT = False
25
26
27
28
29 import heapq
30 from tempfile import gettempdir
31 from itertools import islice
32 from collections import namedtuple
33
34 TMPD = gettempdir()
35 Keyed = namedtuple("Keyed", ["key", "obj"])
36 Packet = namedtuple("Packet", ["raw", "ts"])
37
39 """Reads network data from PCAP file."""
40
42 """Creates a new instance.
43 @param filepath: path to PCAP file
44 """
45 self.filepath = filepath
46
47
48 self.hosts = []
49
50 self.unique_hosts = []
51
52 self.unique_domains = []
53
54 self.tcp_connections = []
55 self.tcp_connections_seen = set()
56
57 self.udp_connections = []
58 self.udp_connections_seen = set()
59
60 self.icmp_requests = []
61
62 self.http_requests = {}
63
64 self.dns_requests = {}
65 self.dns_answers = set()
66
67 self.smtp_requests = []
68
69 self.smtp_flow = {}
70
71 self.irc_requests = []
72
73 self.results = {}
74
76 """Get host by name wrapper.
77 @param name: hostname.
78 @return: IP address or blank
79 """
80 if Config().processing.resolve_dns:
81 ip = resolve(name)
82 else:
83 ip = ""
84 return ip
85
87 """Check if the IP belongs to private network blocks.
88 @param ip: IP address to verify.
89 @return: boolean representing whether the IP belongs or not to
90 a private network block.
91 """
92 networks = [
93 "0.0.0.0/8",
94 "10.0.0.0/8",
95 "100.64.0.0/10",
96 "127.0.0.0/8",
97 "169.254.0.0/16",
98 "172.16.0.0/12",
99 "192.0.0.0/24",
100 "192.0.2.0/24",
101 "192.88.99.0/24",
102 "192.168.0.0/16",
103 "198.18.0.0/15",
104 "198.51.100.0/24",
105 "203.0.113.0/24",
106 "240.0.0.0/4",
107 "255.255.255.255/32",
108 "224.0.0.0/4"
109 ]
110
111 for network in networks:
112 try:
113 ipaddr = struct.unpack(">I", socket.inet_aton(ip))[0]
114
115 netaddr, bits = network.split("/")
116
117 network_low = struct.unpack(">I", socket.inet_aton(netaddr))[0]
118 network_high = network_low | (1 << (32 - int(bits))) - 1
119
120 if ipaddr <= network_high and ipaddr >= network_low:
121 return True
122 except:
123 continue
124
125 return False
126
128 """Add IPs to unique list.
129 @param connection: connection data
130 """
131 try:
132 if connection["src"] not in self.hosts:
133 ip = convert_to_printable(connection["src"])
134
135
136 if ip not in self.hosts:
137
138
139 if self._is_private_ip(ip):
140 self.hosts.append(ip)
141
142 if connection["dst"] not in self.hosts:
143 ip = convert_to_printable(connection["dst"])
144
145 if ip not in self.hosts:
146 self.hosts.append(ip)
147
148
149
150
151 if not self._is_private_ip(ip):
152 self.unique_hosts.append(ip)
153 except:
154 pass
155
157 """Runs all TCP dissectors.
158 @param conn: connection.
159 @param data: payload data.
160 """
161 if self._check_http(data):
162 self._add_http(data, conn["dport"])
163
164 if conn["dport"] == 25:
165 self._reassemble_smtp(conn, data)
166
167 if conn["dport"] != 21 and self._check_irc(data):
168 self._add_irc(data)
169
171 """Runs all UDP dissectors.
172 @param conn: connection.
173 @param data: payload data.
174 """
175
176 if conn["dport"] == 53 or conn["sport"] == 53 or conn["dport"] == 5353 or conn["sport"] == 5353:
177 if self._check_dns(data):
178 self._add_dns(data)
179
181 """Checks for ICMP traffic.
182 @param icmp_data: ICMP data flow.
183 """
184 try:
185 return isinstance(icmp_data, dpkt.icmp.ICMP) and \
186 len(icmp_data.data) > 0
187 except:
188 return False
189
191 """Runs all ICMP dissectors.
192 @param conn: connection.
193 @param data: payload data.
194 """
195
196 if self._check_icmp(data):
197
198
199 if conn["src"] == Config().resultserver.ip:
200 return
201
202 entry = {}
203 entry["src"] = conn["src"]
204 entry["dst"] = conn["dst"]
205 entry["type"] = data.type
206
207
208 try:
209 entry["data"] = convert_to_printable(data.data.data)
210 except:
211 entry["data"] = ""
212
213 self.icmp_requests.append(entry)
214
216 """Checks for DNS traffic.
217 @param udpdata: UDP data flow.
218 """
219 try:
220 dpkt.dns.DNS(udpdata)
221 except:
222 return False
223
224 return True
225
227 """Adds a DNS data flow.
228 @param udpdata: UDP data flow.
229 """
230 dns = dpkt.dns.DNS(udpdata)
231
232
233 query = {}
234
235 if dns.rcode == dpkt.dns.DNS_RCODE_NOERR or \
236 dns.qr == dpkt.dns.DNS_R or \
237 dns.opcode == dpkt.dns.DNS_QUERY or True:
238
239 try:
240 q_name = dns.qd[0].name
241 q_type = dns.qd[0].type
242 except IndexError:
243 return False
244
245 query["request"] = q_name
246 if q_type == dpkt.dns.DNS_A:
247 query["type"] = "A"
248 if q_type == dpkt.dns.DNS_AAAA:
249 query["type"] = "AAAA"
250 elif q_type == dpkt.dns.DNS_CNAME:
251 query["type"] = "CNAME"
252 elif q_type == dpkt.dns.DNS_MX:
253 query["type"] = "MX"
254 elif q_type == dpkt.dns.DNS_PTR:
255 query["type"] = "PTR"
256 elif q_type == dpkt.dns.DNS_NS:
257 query["type"] = "NS"
258 elif q_type == dpkt.dns.DNS_SOA:
259 query["type"] = "SOA"
260 elif q_type == dpkt.dns.DNS_HINFO:
261 query["type"] = "HINFO"
262 elif q_type == dpkt.dns.DNS_TXT:
263 query["type"] = "TXT"
264 elif q_type == dpkt.dns.DNS_SRV:
265 query["type"] = "SRV"
266
267
268 query["answers"] = []
269 for answer in dns.an:
270 ans = {}
271 if answer.type == dpkt.dns.DNS_A:
272 ans["type"] = "A"
273 try:
274 ans["data"] = socket.inet_ntoa(answer.rdata)
275 except socket.error:
276 continue
277 elif answer.type == dpkt.dns.DNS_AAAA:
278 ans["type"] = "AAAA"
279 try:
280 ans["data"] = socket.inet_ntop(socket.AF_INET6,
281 answer.rdata)
282 except (socket.error, ValueError):
283 continue
284 elif answer.type == dpkt.dns.DNS_CNAME:
285 ans["type"] = "CNAME"
286 ans["data"] = answer.cname
287 elif answer.type == dpkt.dns.DNS_MX:
288 ans["type"] = "MX"
289 ans["data"] = answer.mxname
290 elif answer.type == dpkt.dns.DNS_PTR:
291 ans["type"] = "PTR"
292 ans["data"] = answer.ptrname
293 elif answer.type == dpkt.dns.DNS_NS:
294 ans["type"] = "NS"
295 ans["data"] = answer.nsname
296 elif answer.type == dpkt.dns.DNS_SOA:
297 ans["type"] = "SOA"
298 ans["data"] = ",".join([answer.mname,
299 answer.rname,
300 str(answer.serial),
301 str(answer.refresh),
302 str(answer.retry),
303 str(answer.expire),
304 str(answer.minimum)])
305 elif answer.type == dpkt.dns.DNS_HINFO:
306 ans["type"] = "HINFO"
307 ans["data"] = " ".join(answer.text)
308 elif answer.type == dpkt.dns.DNS_TXT:
309 ans["type"] = "TXT"
310 ans["data"] = " ".join(answer.text)
311
312
313 query["answers"].append(ans)
314
315 self._add_domain(query["request"])
316
317 reqtuple = (query["type"], query["request"])
318 if not reqtuple in self.dns_requests:
319 self.dns_requests[reqtuple] = query
320 else:
321 new_answers = set((i["type"], i["data"]) for i in query["answers"]) - self.dns_answers
322 self.dns_requests[reqtuple]["answers"] += [dict(type=i[0], data=i[1]) for i in new_answers]
323
324 return True
325
326 - def _add_domain(self, domain):
327 """Add a domain to unique list.
328 @param domain: domain name.
329 """
330 filters = [
331 ".*\\.windows\\.com$",
332 ".*\\.in\\-addr\\.arpa$"
333 ]
334
335 regexps = [re.compile(filter) for filter in filters]
336 for regexp in regexps:
337 if regexp.match(domain):
338 return
339
340 for entry in self.unique_domains:
341 if entry["domain"] == domain:
342 return
343
344 self.unique_domains.append({"domain": domain,
345 "ip": self._dns_gethostbyname(domain)})
346
348 """Checks for HTTP traffic.
349 @param tcpdata: TCP data flow.
350 """
351 try:
352 r = dpkt.http.Request()
353 r.method, r.version, r.uri = None, None, None
354 r.unpack(tcpdata)
355 except dpkt.dpkt.UnpackError:
356 if not r.method is None or not r.version is None or \
357 not r.uri is None:
358 return True
359 return False
360
361 return True
362
364 """Adds an HTTP flow.
365 @param tcpdata: TCP data flow.
366 @param dport: destination port.
367 """
368 if tcpdata in self.http_requests:
369 self.http_requests[tcpdata]["count"] += 1
370 return True
371
372 try:
373 http = dpkt.http.Request()
374 http.unpack(tcpdata)
375 except dpkt.dpkt.UnpackError:
376 pass
377
378 try:
379 entry = {"count": 1}
380
381 if "host" in http.headers:
382 entry["host"] = convert_to_printable(http.headers["host"])
383 else:
384 entry["host"] = ""
385
386 entry["port"] = dport
387
388
389
390 netloc = entry["host"]
391 if dport != 80 and ":" not in netloc:
392 netloc += ":" + str(entry["port"])
393
394 entry["data"] = convert_to_printable(tcpdata)
395 entry["uri"] = convert_to_printable(urlunparse(("http",
396 netloc,
397 http.uri, None,
398 None, None)))
399 entry["body"] = convert_to_printable(http.body)
400 entry["path"] = convert_to_printable(http.uri)
401
402 if "user-agent" in http.headers:
403 entry["user-agent"] = \
404 convert_to_printable(http.headers["user-agent"])
405
406 entry["version"] = convert_to_printable(http.version)
407 entry["method"] = convert_to_printable(http.method)
408
409 self.http_requests[tcpdata] = entry
410 except Exception:
411 return False
412
413 return True
414
416 """Reassemble a SMTP flow.
417 @param conn: connection dict.
418 @param data: raw data.
419 """
420 if conn["dst"] in self.smtp_flow:
421 self.smtp_flow[conn["dst"]] += data
422 else:
423 self.smtp_flow[conn["dst"]] = data
424
426 """Process SMTP flow."""
427 for conn, data in self.smtp_flow.iteritems():
428
429 if data.startswith("EHLO") or data.startswith("HELO"):
430 self.smtp_requests.append({"dst": conn, "raw": data})
431
433 """
434 Checks for IRC traffic.
435 @param tcpdata: tcp data flow
436 """
437 try:
438 req = ircMessage()
439 except Exception:
440 return False
441
442 return req.isthereIRC(tcpdata)
443
445 """
446 Adds an IRC communication.
447 @param tcpdata: TCP data in flow
448 @param dport: destination port
449 """
450
451 try:
452 reqc = ircMessage()
453 reqs = ircMessage()
454 filters_sc = ["266"]
455 self.irc_requests = self.irc_requests + \
456 reqc.getClientMessages(tcpdata) + \
457 reqs.getServerMessagesFilter(tcpdata, filters_sc)
458 except Exception:
459 return False
460
461 return True
462
464 """Process PCAP.
465 @return: dict with network analysis data.
466 """
467 log = logging.getLogger("Processing.Pcap")
468
469 if not IS_DPKT:
470 log.error("Python DPKT is not installed, aborting PCAP analysis.")
471 return self.results
472
473 if not os.path.exists(self.filepath):
474 log.warning("The PCAP file does not exist at path \"%s\".",
475 self.filepath)
476 return self.results
477
478 if os.path.getsize(self.filepath) == 0:
479 log.error("The PCAP file at path \"%s\" is empty." % self.filepath)
480 return self.results
481
482 try:
483 file = open(self.filepath, "rb")
484 except (IOError, OSError):
485 log.error("Unable to open %s" % self.filepath)
486 return self.results
487
488 try:
489 pcap = dpkt.pcap.Reader(file)
490 except dpkt.dpkt.NeedData:
491 log.error("Unable to read PCAP file at path \"%s\".",
492 self.filepath)
493 return self.results
494 except ValueError:
495 log.error("Unable to read PCAP file at path \"%s\". File is "
496 "corrupted or wrong format." % self.filepath)
497 return self.results
498
499 offset = file.tell()
500 first_ts = None
501 for ts, buf in pcap:
502 if not first_ts: first_ts = ts
503
504 try:
505 ip = iplayer_from_raw(buf, pcap.datalink())
506
507 connection = {}
508 if isinstance(ip, dpkt.ip.IP):
509 connection["src"] = socket.inet_ntoa(ip.src)
510 connection["dst"] = socket.inet_ntoa(ip.dst)
511 elif isinstance(ip, dpkt.ip6.IP6):
512 connection["src"] = socket.inet_ntop(socket.AF_INET6,
513 ip.src)
514 connection["dst"] = socket.inet_ntop(socket.AF_INET6,
515 ip.dst)
516 else:
517 offset = file.tell()
518 continue
519
520 self._add_hosts(connection)
521
522 if ip.p == dpkt.ip.IP_PROTO_TCP:
523 tcp = ip.data
524 if not isinstance(tcp, dpkt.tcp.TCP):
525 tcp = dpkt.tcp.TCP(tcp)
526
527 if len(tcp.data) > 0:
528 connection["sport"] = tcp.sport
529 connection["dport"] = tcp.dport
530 self._tcp_dissect(connection, tcp.data)
531
532 src, sport, dst, dport = (connection["src"], connection["sport"], connection["dst"], connection["dport"])
533 if not ((dst, dport, src, sport) in self.tcp_connections_seen or (src, sport, dst, dport) in self.tcp_connections_seen):
534 self.tcp_connections.append((src, sport, dst, dport, offset, ts-first_ts))
535 self.tcp_connections_seen.add((src, sport, dst, dport))
536
537 elif ip.p == dpkt.ip.IP_PROTO_UDP:
538 udp = ip.data
539 if not isinstance(udp, dpkt.udp.UDP):
540 udp = dpkt.udp.UDP(udp)
541
542 if len(udp.data) > 0:
543 connection["sport"] = udp.sport
544 connection["dport"] = udp.dport
545 self._udp_dissect(connection, udp.data)
546
547 src, sport, dst, dport = (connection["src"], connection["sport"], connection["dst"], connection["dport"])
548 if not ((dst, dport, src, sport) in self.udp_connections_seen or (src, sport, dst, dport) in self.udp_connections_seen):
549 self.udp_connections.append((src, sport, dst, dport, offset, ts-first_ts))
550 self.udp_connections_seen.add((src, sport, dst, dport))
551
552 elif ip.p == dpkt.ip.IP_PROTO_ICMP:
553 icmp = ip.data
554 if not isinstance(icmp, dpkt.icmp.ICMP):
555 icmp = dpkt.icmp.ICMP(icmp)
556
557 self._icmp_dissect(connection, icmp)
558
559 offset = file.tell()
560 except AttributeError:
561 continue
562 except dpkt.dpkt.NeedData:
563 continue
564 except Exception as e:
565 log.exception("Failed to process packet: %s", e)
566
567 file.close()
568
569
570 self._process_smtp()
571
572
573 self.results["hosts"] = self.unique_hosts
574 self.results["domains"] = self.unique_domains
575 self.results["tcp"] = [conn_from_flowtuple(i) for i in self.tcp_connections]
576 self.results["udp"] = [conn_from_flowtuple(i) for i in self.udp_connections]
577 self.results["icmp"] = self.icmp_requests
578 self.results["http"] = self.http_requests.values()
579 self.results["dns"] = self.dns_requests.values()
580 self.results["smtp"] = self.smtp_requests
581 self.results["irc"] = self.irc_requests
582
583 return self.results
584
586 """Network analysis."""
587
589 self.key = "network"
590
591 sorted_path = self.pcap_path.replace("dump.", "dump_sorted.")
592 if Config().processing.sort_pcap:
593 sort_pcap(self.pcap_path, sorted_path)
594 results = Pcap(sorted_path).run()
595 else:
596 results = Pcap(pcap_path).run()
597
598
599 if os.path.exists(self.pcap_path):
600 results["pcap_sha256"] = File(self.pcap_path).get_sha256()
601 if os.path.exists(sorted_path):
602 results["sorted_pcap_sha256"] = File(sorted_path).get_sha256()
603
604 return results
605
607 """Converts a raw packet to a dpkt packet regarding of link type.
608 @param raw: raw packet
609 @param linktype: integer describing link type as expected by dpkt
610 """
611 if linktype == 1:
612 pkt = dpkt.ethernet.Ethernet(raw)
613 ip = pkt.data
614 elif linktype == 101:
615 ip = dpkt.ip.IP(raw)
616 else:
617 raise CuckooProcessingError("unknown PCAP linktype")
618 return ip
619
621 """Convert the flow tuple into a dictionary (suitable for JSON)"""
622 sip, sport, dip, dport, offset, relts = ft
623 return {"src": sip, "sport": sport, "dst": dip, "dport": dport, "offset": offset, "time": relts}
624
625
626
627 -def batch_sort(input_iterator, output_path, buffer_size=32000, output_class=None):
628 """batch sort helper with temporary files, supports sorting large stuff"""
629 if not output_class:
630 output_class = input_iterator.__class__
631
632 chunks = []
633 try:
634 while True:
635 current_chunk = list(islice(input_iterator,buffer_size))
636 if not current_chunk:
637 break
638 current_chunk.sort()
639 output_chunk = output_class(os.path.join(TMPD, "%06i" % len(chunks)))
640 chunks.append(output_chunk)
641
642 for elem in current_chunk:
643 output_chunk.write(elem.obj)
644 output_chunk.close()
645
646 output_file = output_class(output_path)
647 for elem in heapq.merge(*chunks):
648 output_file.write(elem.obj)
649 output_file.close()
650 finally:
651 for chunk in chunks:
652 try:
653 chunk.close()
654 os.remove(chunk.name)
655 except Exception:
656 pass
657
658
660 """SortCap is a wrapper around the packet lib (dpkt) that allows us to sort pcaps
661 together with the batch_sort function above."""
662
664 self.name = path
665 self.linktype = linktype
666 self.fd = None
667 self.ctr = 0
668 self.conns = set()
669
671 if not self.fd:
672 self.fd = dpkt.pcap.Writer(open(self.name, "wb"), linktype=self.linktype)
673 self.fd.writepkt(p.raw, p.ts)
674
676 if not self.fd:
677 self.fd = dpkt.pcap.Reader(open(self.name, "rb"))
678 self.fditer = iter(self.fd)
679 self.linktype = self.fd.datalink()
680 return self
681
683 self.fd.close()
684 self.fd = None
685
687 rp = next(self.fditer)
688 if rp is None: return None
689 self.ctr += 1
690
691 ts, raw = rp
692 rpkt = Packet(raw, ts)
693
694 sip, dip, sport, dport, proto = flowtuple_from_raw(raw, self.linktype)
695
696
697 if (dip, sip, dport, sport, proto) in self.conns:
698 flowtuple = (dip, sip, dport, sport, proto)
699 else:
700 flowtuple = (sip, dip, sport, dport, proto)
701
702 self.conns.add(flowtuple)
703 return Keyed((flowtuple, ts, self.ctr), rpkt)
704
706 """Use SortCap class together with batch_sort to sort a pcap"""
707 inc = SortCap(inpath)
708 batch_sort(inc, outpath, output_class=lambda path: SortCap(path, linktype=inc.linktype))
709 return 0
710
712 """Parse a packet from a pcap just enough to gain a flow description tuple"""
713 ip = iplayer_from_raw(raw, linktype)
714
715 if isinstance(ip, dpkt.ip.IP):
716 sip, dip = socket.inet_ntoa(ip.src), socket.inet_ntoa(ip.dst)
717 proto = ip.p
718
719 if proto == dpkt.ip.IP_PROTO_TCP or proto == dpkt.ip.IP_PROTO_UDP:
720 l3 = ip.data
721 sport, dport = l3.sport, l3.dport
722 else:
723 sport, dport = 0, 0
724
725 else:
726 sip, dip, proto = 0, 0, -1
727 sport, dport = 0, 0
728
729 flowtuple = (sip, dip, sport, dport, proto)
730 return flowtuple
731
733 """Get the payload from a packet, the data below TCP/UDP basically"""
734 ip = iplayer_from_raw(raw, linktype)
735 try: return ip.data.data
736 except:
737 return ""
738
740 """Extract all packets belonging to the same flow from a pcap packet iterator"""
741 first_ft = None
742
743 for ts, raw in piter:
744 ft = flowtuple_from_raw(raw, linktype)
745 if not first_ft: first_ft = ft
746
747 sip, dip, sport, dport, proto = ft
748 if not (first_ft == ft or first_ft == (dip, sip, dport, sport, proto)):
749 break
750
751 yield {
752 "src": sip, "dst": dip, "sport": sport, "dport": dport,
753 "raw": payload_from_raw(raw, linktype).encode("base64"), "direction": first_ft == ft,
754 }
755
757 """Open a PCAP, seek to a packet offset, then get all packets belonging to the same connection"""
758 pcap = dpkt.pcap.Reader(fobj)
759 pcapiter = iter(pcap)
760 ts, raw = pcapiter.next()
761
762 fobj.seek(offset)
763 for p in next_connection_packets(pcapiter, linktype=pcap.datalink()):
764 yield p
765