Package modules :: Package reporting :: Module mongodb
[hide private]
[frames] | no frames]

Source Code for Module modules.reporting.mongodb

  1  # Copyright (C) 2010-2015 Cuckoo Foundation. 
  2  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
  3  # See the file 'docs/LICENSE' for copying permission. 
  4   
  5  import os 
  6   
  7  from lib.cuckoo.common.abstracts import Report 
  8  from lib.cuckoo.common.exceptions import CuckooDependencyError 
  9  from lib.cuckoo.common.exceptions import CuckooReportError 
 10  from lib.cuckoo.common.objects import File 
 11   
 12  try: 
 13      from pymongo import MongoClient 
 14      from pymongo.errors import ConnectionFailure 
 15      from gridfs import GridFS 
 16      from gridfs.errors import FileExists 
 17      HAVE_MONGO = True 
 18  except ImportError: 
 19      HAVE_MONGO = False 
 20   
21 -class MongoDB(Report):
22 """Stores report in MongoDB.""" 23 24 # Mongo schema version, used for data migration. 25 SCHEMA_VERSION = "1" 26
27 - def connect(self):
28 """Connects to Mongo database, loads options and set connectors. 29 @raise CuckooReportError: if unable to connect. 30 """ 31 host = self.options.get("host", "127.0.0.1") 32 port = self.options.get("port", 27017) 33 db = self.options.get("db", "cuckoo") 34 35 try: 36 self.conn = MongoClient(host, port) 37 self.db = self.conn[db] 38 self.fs = GridFS(self.db) 39 except TypeError: 40 raise CuckooReportError("Mongo connection port must be integer") 41 except ConnectionFailure: 42 raise CuckooReportError("Cannot connect to MongoDB")
43
44 - def store_file(self, file_obj, filename=""):
45 """Store a file in GridFS. 46 @param file_obj: object to the file to store 47 @param filename: name of the file to store 48 @return: object id of the stored file 49 """ 50 if not filename: 51 filename = file_obj.get_name() 52 53 existing = self.db.fs.files.find_one({"sha256": file_obj.get_sha256()}) 54 55 if existing: 56 return existing["_id"] 57 else: 58 new = self.fs.new_file(filename=filename, 59 contentType=file_obj.get_content_type(), 60 sha256=file_obj.get_sha256()) 61 for chunk in file_obj.get_chunks(): 62 new.write(chunk) 63 try: 64 new.close() 65 except FileExists: 66 to_find = {"sha256": file_obj.get_sha256()} 67 return self.db.fs.files.find_one(to_find)["_id"] 68 else: 69 return new._id
70
71 - def run(self, results):
72 """Writes report. 73 @param results: analysis results dictionary. 74 @raise CuckooReportError: if fails to connect or write to MongoDB. 75 """ 76 # We put the raise here and not at the import because it would 77 # otherwise trigger even if the module is not enabled in the config. 78 if not HAVE_MONGO: 79 raise CuckooDependencyError("Unable to import pymongo " 80 "(install with `pip install pymongo`)") 81 82 self.connect() 83 84 # Set mongo schema version. 85 # TODO: This is not optimal becuase it run each analysis. Need to run 86 # only one time at startup. 87 if "cuckoo_schema" in self.db.collection_names(): 88 if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: 89 CuckooReportError("Mongo schema version not expected, check data migration tool") 90 else: 91 self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) 92 93 # Set an unique index on stored files, to avoid duplicates. 94 # From pymongo docs: 95 # Returns the name of the created index if an index is actually 96 # created. 97 # Returns None if the index already exists. 98 # TODO: This is not optimal because it run each analysis. Need to run 99 # only one time at startup. 100 self.db.fs.files.ensure_index("sha256", unique=True, 101 sparse=True, name="sha256_unique") 102 103 # Create a copy of the dictionary. This is done in order to not modify 104 # the original dictionary and possibly compromise the following 105 # reporting modules. 106 report = dict(results) 107 if not "network" in report: 108 report["network"] = {} 109 110 # Store the sample in GridFS. 111 if results["info"]["category"] == "file" and "target" in results: 112 sample = File(self.file_path) 113 if sample.valid(): 114 fname = results["target"]["file"]["name"] 115 sample_id = self.store_file(sample, filename=fname) 116 report["target"] = {"file_id": sample_id} 117 report["target"].update(results["target"]) 118 119 # Store the PCAP file in GridFS and reference it back in the report. 120 pcap_path = os.path.join(self.analysis_path, "dump.pcap") 121 pcap = File(pcap_path) 122 if pcap.valid(): 123 pcap_id = self.store_file(pcap) 124 report["network"]["pcap_id"] = pcap_id 125 126 sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap") 127 spcap = File(sorted_pcap_path) 128 if spcap.valid(): 129 spcap_id = self.store_file(spcap) 130 report["network"]["sorted_pcap_id"] = spcap_id 131 132 # Store the process memory dump file in GridFS and reference it back in the report. 133 if "procmemory" in report and self.options.get("store_memdump", False): 134 for idx, procmem in enumerate(report["procmemory"]): 135 procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem["pid"])) 136 procmem_file = File(procmem_path) 137 if procmem_file.valid(): 138 procmem_id = self.store_file(procmem_file) 139 report["procmemory"][idx].update({"procmem_id": procmem_id}) 140 141 # Walk through the dropped files, store them in GridFS and update the 142 # report with the ObjectIds. 143 new_dropped = [] 144 if "dropped" in report: 145 for dropped in report["dropped"]: 146 new_drop = dict(dropped) 147 drop = File(dropped["path"]) 148 if drop.valid(): 149 dropped_id = self.store_file(drop, filename=dropped["name"]) 150 new_drop["object_id"] = dropped_id 151 152 new_dropped.append(new_drop) 153 154 report["dropped"] = new_dropped 155 156 # Add screenshots. 157 report["shots"] = [] 158 shots_path = os.path.join(self.analysis_path, "shots") 159 if os.path.exists(shots_path): 160 # Walk through the files and select the JPGs. 161 shots = [shot for shot in os.listdir(shots_path) 162 if shot.endswith(".jpg")] 163 164 for shot_file in sorted(shots): 165 shot_path = os.path.join(self.analysis_path, 166 "shots", shot_file) 167 shot = File(shot_path) 168 # If the screenshot path is a valid file, store it and 169 # reference it back in the report. 170 if shot.valid(): 171 shot_id = self.store_file(shot) 172 report["shots"].append(shot_id) 173 174 # Store chunks of API calls in a different collection and reference 175 # those chunks back in the report. In this way we should defeat the 176 # issue with the oversized reports exceeding MongoDB's boundaries. 177 # Also allows paging of the reports. 178 if "behavior" in report and "processes" in report["behavior"]: 179 new_processes = [] 180 for process in report["behavior"]["processes"]: 181 new_process = dict(process) 182 183 chunk = [] 184 chunks_ids = [] 185 # Loop on each process call. 186 for index, call in enumerate(process["calls"]): 187 # If the chunk size is 100 or if the loop is completed then 188 # store the chunk in MongoDB. 189 if len(chunk) == 100: 190 to_insert = {"pid": process["process_id"], 191 "calls": chunk} 192 chunk_id = self.db.calls.insert(to_insert) 193 chunks_ids.append(chunk_id) 194 # Reset the chunk. 195 chunk = [] 196 197 # Append call to the chunk. 198 chunk.append(call) 199 200 # Store leftovers. 201 if chunk: 202 to_insert = {"pid": process["process_id"], "calls": chunk} 203 chunk_id = self.db.calls.insert(to_insert) 204 chunks_ids.append(chunk_id) 205 206 # Add list of chunks. 207 new_process["calls"] = chunks_ids 208 new_processes.append(new_process) 209 210 # Store the results in the report. 211 report["behavior"] = dict(report["behavior"]) 212 report["behavior"]["processes"] = new_processes 213 214 # Store the report and retrieve its object id. 215 self.db.analysis.save(report) 216 self.conn.disconnect()
217