1
2
3
4
5 import binascii
6 import hashlib
7 import logging
8 import os
9 import subprocess
10
11 from lib.cuckoo.common.constants import CUCKOO_ROOT
12
13 try:
14 import magic
15 HAVE_MAGIC = True
16 except ImportError:
17 HAVE_MAGIC = False
18
19 try:
20 import pydeep
21 HAVE_PYDEEP = True
22 except ImportError:
23 HAVE_PYDEEP = False
24
25 try:
26 import yara
27 HAVE_YARA = True
28 except ImportError:
29 HAVE_YARA = False
30
31 log = logging.getLogger(__name__)
32
33 FILE_CHUNK_SIZE = 16 * 1024
36 """Cuckoo custom dict."""
37
40
41 __setattr__ = dict.__setitem__
42 __delattr__ = dict.__delitem__
43
45 """URL base object."""
46
48 """@param url: URL"""
49 self.url = url
50
52 """Basic file object class with all useful utilities."""
53
54
55
56 notified_yara = False
57 notified_pydeep = False
58
60 """@param file_path: file path."""
61 self.file_path = file_path
62
63
64 self._file_data = None
65 self._crc32 = None
66 self._md5 = None
67 self._sha1 = None
68 self._sha256 = None
69 self._sha512 = None
70
72 """Get file name.
73 @return: file name.
74 """
75 file_name = os.path.basename(self.file_path)
76 return file_name
77
79 return os.path.exists(self.file_path) and \
80 os.path.isfile(self.file_path) and \
81 os.path.getsize(self.file_path) != 0
82
84 """Read file contents.
85 @return: data.
86 """
87 return self.file_data
88
90 """Read file contents in chunks (generator)."""
91
92 with open(self.file_path, "rb") as fd:
93 while True:
94 chunk = fd.read(FILE_CHUNK_SIZE)
95 if not chunk: break
96 yield chunk
97
99 """Calculate all possible hashes for this file."""
100 crc = 0
101 md5 = hashlib.md5()
102 sha1 = hashlib.sha1()
103 sha256 = hashlib.sha256()
104 sha512 = hashlib.sha512()
105
106 for chunk in self.get_chunks():
107 crc = binascii.crc32(chunk, crc)
108 md5.update(chunk)
109 sha1.update(chunk)
110 sha256.update(chunk)
111 sha512.update(chunk)
112
113 self._crc32 = "".join("%02X" % ((crc>>i)&0xff) for i in [24, 16, 8, 0])
114 self._md5 = md5.hexdigest()
115 self._sha1 = sha1.hexdigest()
116 self._sha256 = sha256.hexdigest()
117 self._sha512 = sha512.hexdigest()
118
119 @property
121 if not self._file_data: self._file_data = open(self.file_path, "rb").read()
122 return self._file_data
123
125 """Get file size.
126 @return: file size.
127 """
128 return os.path.getsize(self.file_path)
129
131 """Get CRC32.
132 @return: CRC32.
133 """
134 if not self._crc32: self.calc_hashes()
135 return self._crc32
136
138 """Get MD5.
139 @return: MD5.
140 """
141 if not self._md5: self.calc_hashes()
142 return self._md5
143
145 """Get SHA1.
146 @return: SHA1.
147 """
148 if not self._sha1: self.calc_hashes()
149 return self._sha1
150
152 """Get SHA256.
153 @return: SHA256.
154 """
155 if not self._sha256: self.calc_hashes()
156 return self._sha256
157
159 """
160 Get SHA512.
161 @return: SHA512.
162 """
163 if not self._sha512: self.calc_hashes()
164 return self._sha512
165
167 """Get SSDEEP.
168 @return: SSDEEP.
169 """
170 if not HAVE_PYDEEP:
171 if not File.notified_pydeep:
172 File.notified_pydeep = True
173 log.warning("Unable to import pydeep (install with `pip install pydeep`)")
174 return None
175
176 try:
177 return pydeep.hash_file(self.file_path)
178 except Exception:
179 return None
180
182 """Get MIME file type.
183 @return: file type.
184 """
185 file_type = None
186 if HAVE_MAGIC:
187 try:
188 ms = magic.open(magic.MAGIC_NONE)
189 ms.load()
190 file_type = ms.file(self.file_path)
191 except:
192 try:
193 file_type = magic.from_file(self.file_path)
194 except:
195 pass
196 finally:
197 try:
198 ms.close()
199 except:
200 pass
201
202 if file_type is None:
203 try:
204 p = subprocess.Popen(["file", "-b", self.file_path],
205 stdout=subprocess.PIPE)
206 file_type = p.stdout.read().strip()
207 except:
208 pass
209
210 return file_type
211
213 """Get MIME content file type (example: image/jpeg).
214 @return: file content type.
215 """
216 file_type = None
217 if HAVE_MAGIC:
218 try:
219 ms = magic.open(magic.MAGIC_MIME)
220 ms.load()
221 file_type = ms.file(self.file_path)
222 except:
223 try:
224 file_type = magic.from_file(self.file_path, mime=True)
225 except:
226 pass
227 finally:
228 try:
229 ms.close()
230 except:
231 pass
232
233 if file_type is None:
234 try:
235 p = subprocess.Popen(["file", "-b", "--mime-type", self.file_path],
236 stdout=subprocess.PIPE)
237 file_type = p.stdout.read().strip()
238 except:
239 pass
240
241 return file_type
242
244 """Get Yara signatures matches.
245 @return: matched Yara signatures.
246 """
247 matches = []
248
249 if HAVE_YARA:
250 if os.path.getsize(self.file_path) > 0:
251 if not os.path.exists(rulepath):
252 log.warning("The specified rule file at %s doesn't exist, skip",
253 rulepath)
254 return
255
256 try:
257 rules = yara.compile(rulepath, error_on_warning=True)
258
259 for match in rules.match(self.file_path):
260 strings = []
261 for s in match.strings:
262
263 try:
264 new = s[2].encode("utf-8")
265 except UnicodeDecodeError:
266 s = s[2].lstrip("uU").encode("hex").upper()
267 s = " ".join(s[i:i+2] for i in range(0, len(s), 2))
268 new = "{ %s }" % s
269
270 if new not in strings:
271 strings.append(new)
272
273 matches.append({"name": match.rule,
274 "meta": match.meta,
275 "strings": strings})
276 except Exception as e:
277 log.warning("Unable to match Yara signatures: %s", e)
278 else:
279 if not File.notified_yara:
280 File.notified_yara = True
281 log.warning("Unable to import yara (please compile from sources)")
282
283 return matches
284
286 """Get all information available.
287 @return: information dict.
288 """
289 infos = {}
290 infos["name"] = self.get_name()
291 infos["path"] = self.file_path
292 infos["size"] = self.get_size()
293 infos["crc32"] = self.get_crc32()
294 infos["md5"] = self.get_md5()
295 infos["sha1"] = self.get_sha1()
296 infos["sha256"] = self.get_sha256()
297 infos["sha512"] = self.get_sha512()
298 infos["ssdeep"] = self.get_ssdeep()
299 infos["type"] = self.get_type()
300 infos["yara"] = self.get_yara()
301
302 return infos
303