Package lib :: Package cuckoo :: Package core :: Module database
[hide private]
[frames] | no frames]

Source Code for Module lib.cuckoo.core.database

   1  # Copyright (C) 2010-2015 Cuckoo Foundation. 
   2  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
   3  # See the file 'docs/LICENSE' for copying permission. 
   4   
   5  import os 
   6  import json 
   7  import logging 
   8  from datetime import datetime 
   9   
  10  from lib.cuckoo.common.config import Config 
  11  from lib.cuckoo.common.constants import CUCKOO_ROOT 
  12  from lib.cuckoo.common.exceptions import CuckooDatabaseError 
  13  from lib.cuckoo.common.exceptions import CuckooOperationalError 
  14  from lib.cuckoo.common.exceptions import CuckooDependencyError 
  15  from lib.cuckoo.common.objects import File, URL 
  16  from lib.cuckoo.common.utils import create_folder, Singleton, classlock, SuperLock 
  17   
  18  try: 
  19      from sqlalchemy import create_engine, Column 
  20      from sqlalchemy import Integer, String, Boolean, DateTime, Enum 
  21      from sqlalchemy import ForeignKey, Text, Index, Table 
  22      from sqlalchemy.ext.declarative import declarative_base 
  23      from sqlalchemy.exc import SQLAlchemyError, IntegrityError 
  24      from sqlalchemy.orm import sessionmaker, relationship, joinedload, backref 
  25      Base = declarative_base() 
  26  except ImportError: 
  27      raise CuckooDependencyError("Unable to import sqlalchemy " 
  28                                  "(install with `pip install sqlalchemy`)") 
  29   
  30  log = logging.getLogger(__name__) 
  31   
  32  SCHEMA_VERSION = "495d5a6edef3" 
  33  TASK_PENDING = "pending" 
  34  TASK_RUNNING = "running" 
  35  TASK_COMPLETED = "completed" 
  36  TASK_RECOVERED = "recovered" 
  37  TASK_REPORTED = "reported" 
  38  TASK_FAILED_ANALYSIS = "failed_analysis" 
  39  TASK_FAILED_PROCESSING = "failed_processing" 
  40  TASK_FAILED_REPORTING = "failed_reporting" 
  41   
  42  # Secondary table used in association Machine - Tag. 
  43  machines_tags = Table( 
  44      "machines_tags", Base.metadata, 
  45      Column("machine_id", Integer, ForeignKey("machines.id")), 
  46      Column("tag_id", Integer, ForeignKey("tags.id")) 
  47  ) 
  48   
  49  # Secondary table used in association Task - Tag. 
  50  tasks_tags = Table( 
  51      "tasks_tags", Base.metadata, 
  52      Column("task_id", Integer, ForeignKey("tasks.id")), 
  53      Column("tag_id", Integer, ForeignKey("tags.id")) 
  54  ) 
55 56 -class Machine(Base):
57 """Configured virtual machines to be used as guests.""" 58 __tablename__ = "machines" 59 60 id = Column(Integer(), primary_key=True) 61 name = Column(String(255), nullable=False) 62 label = Column(String(255), nullable=False) 63 ip = Column(String(255), nullable=False) 64 platform = Column(String(255), nullable=False) 65 tags = relationship("Tag", secondary=machines_tags, cascade="all, delete", 66 single_parent=True, backref=backref("machine", cascade="all")) 67 interface = Column(String(255), nullable=True) 68 snapshot = Column(String(255), nullable=True) 69 locked = Column(Boolean(), nullable=False, default=False) 70 locked_changed_on = Column(DateTime(timezone=False), nullable=True) 71 status = Column(String(255), nullable=True) 72 status_changed_on = Column(DateTime(timezone=False), nullable=True) 73 resultserver_ip = Column(String(255), nullable=False) 74 resultserver_port = Column(String(255), nullable=False) 75
76 - def __repr__(self):
77 return "<Machine('{0}','{1}')>".format(self.id, self.name)
78
79 - def to_dict(self):
80 """Converts object to dict. 81 @return: dict 82 """ 83 d = {} 84 for column in self.__table__.columns: 85 value = getattr(self, column.name) 86 if isinstance(value, datetime): 87 d[column.name] = value.strftime("%Y-%m-%d %H:%M:%S") 88 else: 89 d[column.name] = value 90 91 # Tags are a relation so no column to iterate. 92 d["tags"] = [tag.name for tag in self.tags] 93 return d
94
95 - def to_json(self):
96 """Converts object to JSON. 97 @return: JSON data 98 """ 99 return json.dumps(self.to_dict())
100
101 - def __init__(self, name, label, ip, platform, interface, snapshot, 102 resultserver_ip, resultserver_port):
103 self.name = name 104 self.label = label 105 self.ip = ip 106 self.platform = platform 107 self.interface = interface 108 self.snapshot = snapshot 109 self.resultserver_ip = resultserver_ip 110 self.resultserver_port = resultserver_port
111
112 -class Tag(Base):
113 """Tag describing anything you want.""" 114 __tablename__ = "tags" 115 116 id = Column(Integer(), primary_key=True) 117 name = Column(String(255), nullable=False, unique=True) 118
119 - def __repr__(self):
120 return "<Tag('{0}','{1}')>".format(self.id, self.name)
121
122 - def __init__(self, name):
123 self.name = name
124
125 -class Guest(Base):
126 """Tracks guest run.""" 127 __tablename__ = "guests" 128 129 id = Column(Integer(), primary_key=True) 130 name = Column(String(255), nullable=False) 131 label = Column(String(255), nullable=False) 132 manager = Column(String(255), nullable=False) 133 started_on = Column(DateTime(timezone=False), 134 default=datetime.now, 135 nullable=False) 136 shutdown_on = Column(DateTime(timezone=False), nullable=True) 137 task_id = Column(Integer, 138 ForeignKey("tasks.id"), 139 nullable=False, 140 unique=True) 141
142 - def __repr__(self):
143 return "<Guest('{0}','{1}')>".format(self.id, self.name)
144
145 - def to_dict(self):
146 """Converts object to dict. 147 @return: dict 148 """ 149 d = {} 150 for column in self.__table__.columns: 151 value = getattr(self, column.name) 152 if isinstance(value, datetime): 153 d[column.name] = value.strftime("%Y-%m-%d %H:%M:%S") 154 else: 155 d[column.name] = value 156 return d
157
158 - def to_json(self):
159 """Converts object to JSON. 160 @return: JSON data 161 """ 162 return json.dumps(self.to_dict())
163
164 - def __init__(self, name, label, manager):
165 self.name = name 166 self.label = label 167 self.manager = manager
168
169 -class Sample(Base):
170 """Submitted files details.""" 171 __tablename__ = "samples" 172 173 id = Column(Integer(), primary_key=True) 174 file_size = Column(Integer(), nullable=False) 175 file_type = Column(Text(), nullable=False) 176 md5 = Column(String(32), nullable=False) 177 crc32 = Column(String(8), nullable=False) 178 sha1 = Column(String(40), nullable=False) 179 sha256 = Column(String(64), nullable=False) 180 sha512 = Column(String(128), nullable=False) 181 ssdeep = Column(String(255), nullable=True) 182 __table_args__ = Index("hash_index", "md5", "crc32", "sha1", 183 "sha256", "sha512", unique=True), 184
185 - def __repr__(self):
186 return "<Sample('{0}','{1}')>".format(self.id, self.sha256)
187
188 - def to_dict(self):
189 """Converts object to dict. 190 @return: dict 191 """ 192 d = {} 193 for column in self.__table__.columns: 194 d[column.name] = getattr(self, column.name) 195 return d
196
197 - def to_json(self):
198 """Converts object to JSON. 199 @return: JSON data 200 """ 201 return json.dumps(self.to_dict())
202
203 - def __init__(self, md5, crc32, sha1, sha256, sha512, 204 file_size, file_type=None, ssdeep=None):
205 self.md5 = md5 206 self.sha1 = sha1 207 self.crc32 = crc32 208 self.sha256 = sha256 209 self.sha512 = sha512 210 self.file_size = file_size 211 if file_type: 212 self.file_type = file_type 213 if ssdeep: 214 self.ssdeep = ssdeep
215
216 -class Error(Base):
217 """Analysis errors.""" 218 __tablename__ = "errors" 219 220 id = Column(Integer(), primary_key=True) 221 message = Column(String(255), nullable=False) 222 task_id = Column(Integer, ForeignKey("tasks.id"), nullable=False) 223
224 - def to_dict(self):
225 """Converts object to dict. 226 @return: dict 227 """ 228 d = {} 229 for column in self.__table__.columns: 230 d[column.name] = getattr(self, column.name) 231 return d
232
233 - def to_json(self):
234 """Converts object to JSON. 235 @return: JSON data 236 """ 237 return json.dumps(self.to_dict())
238
239 - def __init__(self, message, task_id):
240 self.message = message 241 self.task_id = task_id
242
243 - def __repr__(self):
244 return "<Error('{0}','{1}','{2}')>".format(self.id, self.message, self.task_id)
245
246 -class Task(Base):
247 """Analysis task queue.""" 248 __tablename__ = "tasks" 249 250 id = Column(Integer(), primary_key=True) 251 target = Column(Text(), nullable=False) 252 category = Column(String(255), nullable=False) 253 timeout = Column(Integer(), server_default="0", nullable=False) 254 priority = Column(Integer(), server_default="1", nullable=False) 255 custom = Column(String(255), nullable=True) 256 machine = Column(String(255), nullable=True) 257 package = Column(String(255), nullable=True) 258 tags = relationship("Tag", secondary=tasks_tags, cascade="all, delete", 259 single_parent=True, backref=backref("task", cascade="all"), 260 lazy="subquery") 261 options = Column(String(255), nullable=True) 262 platform = Column(String(255), nullable=True) 263 memory = Column(Boolean, nullable=False, default=False) 264 enforce_timeout = Column(Boolean, nullable=False, default=False) 265 clock = Column(DateTime(timezone=False), 266 default=datetime.now, 267 nullable=False) 268 added_on = Column(DateTime(timezone=False), 269 default=datetime.now, 270 nullable=False) 271 started_on = Column(DateTime(timezone=False), nullable=True) 272 completed_on = Column(DateTime(timezone=False), nullable=True) 273 status = Column(Enum(TASK_PENDING, TASK_RUNNING, TASK_COMPLETED, 274 TASK_REPORTED, TASK_RECOVERED, TASK_FAILED_ANALYSIS, 275 TASK_FAILED_PROCESSING, TASK_FAILED_REPORTING, name="status_type"), 276 server_default=TASK_PENDING, 277 nullable=False) 278 sample_id = Column(Integer, ForeignKey("samples.id"), nullable=True) 279 sample = relationship("Sample", backref="tasks") 280 guest = relationship("Guest", uselist=False, backref="tasks", cascade="save-update, delete") 281 errors = relationship("Error", backref="tasks", cascade="save-update, delete") 282
283 - def to_dict(self):
284 """Converts object to dict. 285 @return: dict 286 """ 287 d = {} 288 for column in self.__table__.columns: 289 value = getattr(self, column.name) 290 if isinstance(value, datetime): 291 d[column.name] = value.strftime("%Y-%m-%d %H:%M:%S") 292 else: 293 d[column.name] = value 294 295 # Tags are a relation so no column to iterate. 296 d["tags"] = [tag.name for tag in self.tags] 297 return d
298
299 - def to_json(self):
300 """Converts object to JSON. 301 @return: JSON data 302 """ 303 return json.dumps(self.to_dict())
304
305 - def __init__(self, target=None):
306 self.target = target
307
308 - def __repr__(self):
309 return "<Task('{0}','{1}')>".format(self.id, self.target)
310
311 -class AlembicVersion(Base):
312 """Table used to pinpoint actual database schema release.""" 313 __tablename__ = "alembic_version" 314 315 version_num = Column(String(32), nullable=False, primary_key=True)
316
317 -class Database(object):
318 """Analysis queue database. 319 320 This class handles the creation of the database user for internal queue 321 management. It also provides some functions for interacting with it. 322 """ 323 __metaclass__ = Singleton 324
325 - def __init__(self, dsn=None, schema_check=True):
326 """@param dsn: database connection string. 327 @param schema_check: disable or enable the db schema version check 328 """ 329 self._lock = SuperLock() 330 cfg = Config() 331 332 if dsn: 333 self._connect_database(dsn) 334 elif cfg.database.connection: 335 self._connect_database(cfg.database.connection) 336 else: 337 db_file = os.path.join(CUCKOO_ROOT, "db", "cuckoo.db") 338 if not os.path.exists(db_file): 339 db_dir = os.path.dirname(db_file) 340 if not os.path.exists(db_dir): 341 try: 342 create_folder(folder=db_dir) 343 except CuckooOperationalError as e: 344 raise CuckooDatabaseError("Unable to create database directory: {0}".format(e)) 345 346 self._connect_database("sqlite:///%s" % db_file) 347 348 # Disable SQL logging. Turn it on for debugging. 349 self.engine.echo = False 350 # Connection timeout. 351 if cfg.database.timeout: 352 self.engine.pool_timeout = cfg.database.timeout 353 else: 354 self.engine.pool_timeout = 60 355 # Create schema. 356 try: 357 Base.metadata.create_all(self.engine) 358 except SQLAlchemyError as e: 359 raise CuckooDatabaseError("Unable to create or connect to database: {0}".format(e)) 360 361 # Get db session. 362 self.Session = sessionmaker(bind=self.engine) 363 364 # Deal with schema versioning. 365 # TODO: it's a little bit dirty, needs refactoring. 366 tmp_session = self.Session() 367 if not tmp_session.query(AlembicVersion).count(): 368 # Set database schema version. 369 tmp_session.add(AlembicVersion(version_num=SCHEMA_VERSION)) 370 try: 371 tmp_session.commit() 372 except SQLAlchemyError as e: 373 raise CuckooDatabaseError("Unable to set schema version: {0}".format(e)) 374 tmp_session.rollback() 375 finally: 376 tmp_session.close() 377 else: 378 # Check if db version is the expected one. 379 last = tmp_session.query(AlembicVersion).first() 380 tmp_session.close() 381 if last.version_num != SCHEMA_VERSION and schema_check: 382 raise CuckooDatabaseError( 383 "DB schema version mismatch: found {0}, expected {1}. " 384 "Try to apply all migrations (cd utils/db_migration/ && " 385 "alembic upgrade head).".format(last.version_num, 386 SCHEMA_VERSION))
387
388 - def __del__(self):
389 """Disconnects pool.""" 390 self.engine.dispose()
391
392 - def _connect_database(self, connection_string):
393 """Connect to a Database. 394 @param connection_string: Connection string specifying the database 395 """ 396 try: 397 # TODO: this is quite ugly, should improve. 398 if connection_string.startswith("sqlite"): 399 # Using "check_same_thread" to disable sqlite safety check on multiple threads. 400 self.engine = create_engine(connection_string, connect_args={"check_same_thread": False}) 401 elif connection_string.startswith("postgres"): 402 # Disabling SSL mode to avoid some errors using sqlalchemy and multiprocesing. 403 # See: http://www.postgresql.org/docs/9.0/static/libpq-ssl.html#LIBPQ-SSL-SSLMODE-STATEMENTS 404 self.engine = create_engine(connection_string, connect_args={"sslmode": "disable"}) 405 else: 406 self.engine = create_engine(connection_string) 407 except ImportError as e: 408 lib = e.message.split()[-1] 409 raise CuckooDependencyError("Missing database driver, unable to " 410 "import %s (install with `pip " 411 "install %s`)" % (lib, lib))
412
413 - def _get_or_create(self, session, model, **kwargs):
414 """Get an ORM instance or create it if not exist. 415 @param session: SQLAlchemy session object 416 @param model: model to query 417 @return: row instance 418 """ 419 instance = session.query(model).filter_by(**kwargs).first() 420 if instance: 421 return instance 422 else: 423 instance = model(**kwargs) 424 return instance
425 426 @classlock
427 - def drop(self):
428 """Drop all tables.""" 429 try: 430 Base.metadata.drop_all(self.engine) 431 except SQLAlchemyError as e: 432 raise CuckooDatabaseError("Unable to create or connect to database: {0}".format(e))
433 434 @classlock
435 - def clean_machines(self):
436 """Clean old stored machines and related tables.""" 437 # Secondary table. 438 # TODO: this is better done via cascade delete. 439 self.engine.execute(machines_tags.delete()) 440 441 session = self.Session() 442 try: 443 session.query(Machine).delete() 444 session.commit() 445 except SQLAlchemyError as e: 446 log.debug("Database error cleaning machines: {0}".format(e)) 447 session.rollback() 448 finally: 449 session.close()
450 451 @classlock
452 - def add_machine(self, name, label, ip, platform, tags, interface, 453 snapshot, resultserver_ip, resultserver_port):
454 """Add a guest machine. 455 @param name: machine id 456 @param label: machine label 457 @param ip: machine IP address 458 @param platform: machine supported platform 459 @param tags: list of comma separated tags 460 @param interface: sniffing interface for this machine 461 @param snapshot: snapshot name to use instead of the current one, if configured 462 @param resultserver_ip: IP address of the Result Server 463 @param resultserver_port: port of the Result Server 464 """ 465 session = self.Session() 466 machine = Machine(name=name, 467 label=label, 468 ip=ip, 469 platform=platform, 470 interface=interface, 471 snapshot=snapshot, 472 resultserver_ip=resultserver_ip, 473 resultserver_port=resultserver_port) 474 # Deal with tags format (i.e., foo,bar,baz) 475 if tags: 476 for tag in tags.replace(" ", "").split(","): 477 machine.tags.append(self._get_or_create(session, Tag, name=tag)) 478 session.add(machine) 479 480 try: 481 session.commit() 482 except SQLAlchemyError as e: 483 log.debug("Database error adding machine: {0}".format(e)) 484 session.rollback() 485 finally: 486 session.close()
487 488 @classlock
489 - def set_status(self, task_id, status):
490 """Set task status. 491 @param task_id: task identifier 492 @param status: status string 493 @return: operation status 494 """ 495 session = self.Session() 496 try: 497 row = session.query(Task).get(task_id) 498 row.status = status 499 500 if status == TASK_RUNNING: 501 row.started_on = datetime.now() 502 elif status == TASK_COMPLETED: 503 row.completed_on = datetime.now() 504 505 session.commit() 506 except SQLAlchemyError as e: 507 log.debug("Database error setting status: {0}".format(e)) 508 session.rollback() 509 finally: 510 session.close()
511 512 @classlock
513 - def fetch(self, lock=True, machine=""):
514 """Fetches a task waiting to be processed and locks it for running. 515 @return: None or task 516 """ 517 session = self.Session() 518 row = None 519 try: 520 if machine != "": 521 row = session.query(Task).filter_by(status=TASK_PENDING).filter(Machine.name==machine).order_by("priority desc, added_on").first() 522 else: 523 row = session.query(Task).filter_by(status=TASK_PENDING).order_by("priority desc, added_on").first() 524 525 if not row: 526 return None 527 528 if lock: 529 self.set_status(task_id=row.id, status=TASK_RUNNING) 530 session.refresh(row) 531 532 return row 533 except SQLAlchemyError as e: 534 log.debug("Database error fetching task: {0}".format(e)) 535 session.rollback() 536 finally: 537 session.close()
538 539 @classlock
540 - def guest_start(self, task_id, name, label, manager):
541 """Logs guest start. 542 @param task_id: task identifier 543 @param name: vm name 544 @param label: vm label 545 @param manager: vm manager 546 @return: guest row id 547 """ 548 session = self.Session() 549 guest = Guest(name, label, manager) 550 try: 551 session.query(Task).get(task_id).guest = guest 552 session.commit() 553 session.refresh(guest) 554 return guest.id 555 except SQLAlchemyError as e: 556 log.debug("Database error logging guest start: {0}".format(e)) 557 session.rollback() 558 return None 559 finally: 560 session.close()
561 562 @classlock
563 - def guest_remove(self, guest_id):
564 """Removes a guest start entry.""" 565 session = self.Session() 566 try: 567 guest = session.query(Guest).get(guest_id) 568 session.delete(guest) 569 session.commit() 570 except SQLAlchemyError as e: 571 log.debug("Database error logging guest remove: {0}".format(e)) 572 session.rollback() 573 return None 574 finally: 575 session.close()
576 577 @classlock
578 - def guest_stop(self, guest_id):
579 """Logs guest stop. 580 @param guest_id: guest log entry id 581 """ 582 session = self.Session() 583 try: 584 session.query(Guest).get(guest_id).shutdown_on = datetime.now() 585 session.commit() 586 except SQLAlchemyError as e: 587 log.debug("Database error logging guest stop: {0}".format(e)) 588 session.rollback() 589 except TypeError: 590 log.warning("Data inconsistency in guests table detected, it might be a crash leftover. Continue") 591 session.rollback() 592 finally: 593 session.close()
594 595 @classlock
596 - def list_machines(self, locked=False):
597 """Lists virtual machines. 598 @return: list of virtual machines 599 """ 600 session = self.Session() 601 try: 602 if locked: 603 machines = session.query(Machine).options(joinedload("tags")).filter_by(locked=True).all() 604 else: 605 machines = session.query(Machine).options(joinedload("tags")).all() 606 return machines 607 except SQLAlchemyError as e: 608 log.debug("Database error listing machines: {0}".format(e)) 609 return [] 610 finally: 611 session.close()
612 613 @classlock
614 - def lock_machine(self, label=None, platform=None, tags=None):
615 """Places a lock on a free virtual machine. 616 @param label: optional virtual machine label 617 @param platform: optional virtual machine platform 618 @param tags: optional tags required (list) 619 @return: locked machine 620 """ 621 session = self.Session() 622 623 # Preventive checks. 624 if label and platform: 625 # Wrong usage. 626 log.error("You can select machine only by label or by platform.") 627 return None 628 elif label and tags: 629 # Also wrong usage. 630 log.error("You can select machine only by label or by tags.") 631 return None 632 633 try: 634 machines = session.query(Machine) 635 if label: 636 machines = machines.filter_by(label=label) 637 if platform: 638 machines = machines.filter_by(platform=platform) 639 if tags: 640 for tag in tags: 641 machines = machines.filter(Machine.tags.any(name=tag.name)) 642 643 # Check if there are any machines that satisfy the 644 # selection requirements. 645 if not machines.count(): 646 raise CuckooOperationalError("No machines match selection criteria.") 647 648 # Get the first free machine. 649 machine = machines.filter_by(locked=False).first() 650 except SQLAlchemyError as e: 651 log.debug("Database error locking machine: {0}".format(e)) 652 session.close() 653 return None 654 655 if machine: 656 machine.locked = True 657 machine.locked_changed_on = datetime.now() 658 try: 659 session.commit() 660 session.refresh(machine) 661 except SQLAlchemyError as e: 662 log.debug("Database error locking machine: {0}".format(e)) 663 session.rollback() 664 return None 665 finally: 666 session.close() 667 668 return machine
669 670 @classlock
671 - def unlock_machine(self, label):
672 """Remove lock form a virtual machine. 673 @param label: virtual machine label 674 @return: unlocked machine 675 """ 676 session = self.Session() 677 try: 678 machine = session.query(Machine).filter_by(label=label).first() 679 except SQLAlchemyError as e: 680 log.debug("Database error unlocking machine: {0}".format(e)) 681 session.close() 682 return None 683 684 if machine: 685 machine.locked = False 686 machine.locked_changed_on = datetime.now() 687 try: 688 session.commit() 689 session.refresh(machine) 690 except SQLAlchemyError as e: 691 log.debug("Database error locking machine: {0}".format(e)) 692 session.rollback() 693 return None 694 finally: 695 session.close() 696 697 return machine
698 699 @classlock
700 - def count_machines_available(self):
701 """How many virtual machines are ready for analysis. 702 @return: free virtual machines count 703 """ 704 session = self.Session() 705 try: 706 machines_count = session.query(Machine).filter_by(locked=False).count() 707 return machines_count 708 except SQLAlchemyError as e: 709 log.debug("Database error counting machines: {0}".format(e)) 710 return 0 711 finally: 712 session.close()
713 714 @classlock
715 - def get_available_machines(self):
716 """ Which machines are available 717 @return: free virtual machines 718 """ 719 session = self.Session() 720 try: 721 machines = session.query(Machine).filter_by(locked=False).all() 722 return machines 723 except SQLAlchemyError as e: 724 log.debug("Database error getting available machines: {0}".format(e)) 725 return 0 726 finally: 727 session.close()
728 729 @classlock
730 - def set_machine_status(self, label, status):
731 """Set status for a virtual machine. 732 @param label: virtual machine label 733 @param status: new virtual machine status 734 """ 735 session = self.Session() 736 try: 737 machine = session.query(Machine).filter_by(label=label).first() 738 except SQLAlchemyError as e: 739 log.debug("Database error setting machine status: {0}".format(e)) 740 session.close() 741 return 742 743 if machine: 744 machine.status = status 745 machine.status_changed_on = datetime.now() 746 try: 747 session.commit() 748 session.refresh(machine) 749 except SQLAlchemyError as e: 750 log.debug("Database error setting machine status: {0}".format(e)) 751 session.rollback() 752 finally: 753 session.close() 754 else: 755 session.close()
756 757 @classlock
758 - def add_error(self, message, task_id):
759 """Add an error related to a task. 760 @param message: error message 761 @param task_id: ID of the related task 762 """ 763 session = self.Session() 764 error = Error(message=message, task_id=task_id) 765 session.add(error) 766 try: 767 session.commit() 768 except SQLAlchemyError as e: 769 log.debug("Database error adding error log: {0}".format(e)) 770 session.rollback() 771 finally: 772 session.close()
773 774 # The following functions are mostly used by external utils. 775 776 @classlock
777 - def add(self, obj, timeout=0, package="", options="", priority=1, 778 custom="", machine="", platform="", tags=None, 779 memory=False, enforce_timeout=False, clock=None):
780 """Add a task to database. 781 @param obj: object to add (File or URL). 782 @param timeout: selected timeout. 783 @param options: analysis options. 784 @param priority: analysis priority. 785 @param custom: custom options. 786 @param machine: selected machine. 787 @param platform: platform. 788 @param tags: optional tags that must be set for machine selection 789 @param memory: toggle full memory dump. 790 @param enforce_timeout: toggle full timeout execution. 791 @param clock: virtual machine clock time 792 @return: cursor or None. 793 """ 794 session = self.Session() 795 796 # Convert empty strings and None values to a valid int 797 if not timeout: 798 timeout = 0 799 if not priority: 800 priority = 1 801 802 if isinstance(obj, File): 803 sample = Sample(md5=obj.get_md5(), 804 crc32=obj.get_crc32(), 805 sha1=obj.get_sha1(), 806 sha256=obj.get_sha256(), 807 sha512=obj.get_sha512(), 808 file_size=obj.get_size(), 809 file_type=obj.get_type(), 810 ssdeep=obj.get_ssdeep()) 811 session.add(sample) 812 813 try: 814 session.commit() 815 except IntegrityError: 816 session.rollback() 817 try: 818 sample = session.query(Sample).filter_by(md5=obj.get_md5()).first() 819 except SQLAlchemyError as e: 820 log.debug("Error querying sample for hash: {0}".format(e)) 821 session.close() 822 return None 823 except SQLAlchemyError as e: 824 log.debug("Database error adding task: {0}".format(e)) 825 session.close() 826 return None 827 828 task = Task(obj.file_path) 829 task.sample_id = sample.id 830 elif isinstance(obj, URL): 831 task = Task(obj.url) 832 833 task.category = obj.__class__.__name__.lower() 834 task.timeout = timeout 835 task.package = package 836 task.options = options 837 task.priority = priority 838 task.custom = custom 839 task.machine = machine 840 task.platform = platform 841 task.memory = memory 842 task.enforce_timeout = enforce_timeout 843 844 # Deal with tags format (i.e., foo,bar,baz) 845 if tags: 846 for tag in tags.replace(" ", "").split(","): 847 task.tags.append(self._get_or_create(session, Tag, name=tag)) 848 849 if clock: 850 if isinstance(clock, str) or isinstance(clock, unicode): 851 try: 852 task.clock = datetime.strptime(clock, "%m-%d-%Y %H:%M:%S") 853 except ValueError: 854 log.warning("The date you specified has an invalid format, using current timestamp.") 855 task.clock = datetime.now() 856 else: 857 task.clock = clock 858 859 session.add(task) 860 861 try: 862 session.commit() 863 task_id = task.id 864 except SQLAlchemyError as e: 865 log.debug("Database error adding task: {0}".format(e)) 866 session.rollback() 867 return None 868 finally: 869 session.close() 870 871 return task_id
872
873 - def add_path(self, file_path, timeout=0, package="", options="", 874 priority=1, custom="", machine="", platform="", tags=None, 875 memory=False, enforce_timeout=False, clock=None):
876 """Add a task to database from file path. 877 @param file_path: sample path. 878 @param timeout: selected timeout. 879 @param options: analysis options. 880 @param priority: analysis priority. 881 @param custom: custom options. 882 @param machine: selected machine. 883 @param platform: platform. 884 @param tags: Tags required in machine selection 885 @param memory: toggle full memory dump. 886 @param enforce_timeout: toggle full timeout execution. 887 @param clock: virtual machine clock time 888 @return: cursor or None. 889 """ 890 if not file_path or not os.path.exists(file_path): 891 log.warning("File does not exist: %s.", file_path) 892 return None 893 894 # Convert empty strings and None values to a valid int 895 if not timeout: 896 timeout = 0 897 if not priority: 898 priority = 1 899 900 return self.add(File(file_path), timeout, package, options, priority, 901 custom, machine, platform, tags, memory, 902 enforce_timeout, clock)
903 904 @classlock
905 - def add_url(self, url, timeout=0, package="", options="", priority=1, 906 custom="", machine="", platform="", tags=None, memory=False, 907 enforce_timeout=False, clock=None):
908 """Add a task to database from url. 909 @param url: url. 910 @param timeout: selected timeout. 911 @param options: analysis options. 912 @param priority: analysis priority. 913 @param custom: custom options. 914 @param machine: selected machine. 915 @param platform: platform. 916 @param tags: tags for machine selection 917 @param memory: toggle full memory dump. 918 @param enforce_timeout: toggle full timeout execution. 919 @param clock: virtual machine clock time 920 @return: cursor or None. 921 """ 922 923 # Convert empty strings and None values to a valid int 924 if not timeout: 925 timeout = 0 926 if not priority: 927 priority = 1 928 929 return self.add(URL(url), timeout, package, options, priority, 930 custom, machine, platform, tags, memory, 931 enforce_timeout, clock)
932 933 @classlock
934 - def reschedule(self, task_id):
935 """Reschedule a task. 936 @param task_id: ID of the task to reschedule. 937 @return: ID of the newly created task. 938 """ 939 task = self.view_task(task_id) 940 941 if not task: 942 return None 943 944 if task.category == "file": 945 add = self.add_path 946 elif task.category == "url": 947 add = self.add_url 948 949 # Change status to recovered. 950 session = self.Session() 951 session.query(Task).get(task_id).status = TASK_RECOVERED 952 try: 953 session.commit() 954 except SQLAlchemyError as e: 955 log.debug("Database error rescheduling task: {0}".format(e)) 956 session.rollback() 957 return False 958 finally: 959 session.close() 960 961 # Normalize tags. 962 if task.tags: 963 tags = ",".join(tag.name for tag in task.tags) 964 else: 965 tags = task.tags 966 967 return add(task.target, task.timeout, task.package, task.options, 968 task.priority, task.custom, task.machine, task.platform, 969 tags, task.memory, task.enforce_timeout, task.clock)
970 971 @classlock
972 - def list_tasks(self, limit=None, details=False, category=None, 973 offset=None, status=None, sample_id=None, not_status=None, 974 completed_after=None, order_by=None):
975 """Retrieve list of task. 976 @param limit: specify a limit of entries. 977 @param details: if details about must be included 978 @param category: filter by category 979 @param offset: list offset 980 @param status: filter by task status 981 @param sample_id: filter tasks for a sample 982 @param not_status: exclude this task status from filter 983 @param completed_after: only list tasks completed after this timestamp 984 @param order_by: definition which field to sort by 985 @return: list of tasks. 986 """ 987 session = self.Session() 988 try: 989 search = session.query(Task) 990 991 if status: 992 search = search.filter_by(status=status) 993 if not_status: 994 search = search.filter(Task.status != not_status) 995 if category: 996 search = search.filter_by(category=category) 997 if details: 998 search = search.options(joinedload("guest"), joinedload("errors"), joinedload("tags")) 999 if sample_id is not None: 1000 search = search.filter_by(sample_id=sample_id) 1001 if completed_after: 1002 search = search.filter(Task.completed_on > completed_after) 1003 1004 search = search.order_by(order_by or "added_on desc") 1005 tasks = search.limit(limit).offset(offset).all() 1006 return tasks 1007 except SQLAlchemyError as e: 1008 log.debug("Database error listing tasks: {0}".format(e)) 1009 return [] 1010 finally: 1011 session.close()
1012 1013 @classlock
1014 - def count_tasks(self, status=None):
1015 """Count tasks in the database 1016 @param status: apply a filter according to the task status 1017 @return: number of tasks found 1018 """ 1019 session = self.Session() 1020 try: 1021 if status: 1022 tasks_count = session.query(Task).filter_by(status=status).count() 1023 else: 1024 tasks_count = session.query(Task).count() 1025 return tasks_count 1026 except SQLAlchemyError as e: 1027 log.debug("Database error counting tasks: {0}".format(e)) 1028 return 0 1029 finally: 1030 session.close()
1031 1032 @classlock
1033 - def view_task(self, task_id, details=False):
1034 """Retrieve information on a task. 1035 @param task_id: ID of the task to query. 1036 @return: details on the task. 1037 """ 1038 session = self.Session() 1039 try: 1040 if details: 1041 task = session.query(Task).options(joinedload("guest"), joinedload("errors"), joinedload("tags")).get(task_id) 1042 else: 1043 task = session.query(Task).get(task_id) 1044 except SQLAlchemyError as e: 1045 log.debug("Database error viewing task: {0}".format(e)) 1046 return None 1047 else: 1048 if task: 1049 session.expunge(task) 1050 return task 1051 finally: 1052 session.close()
1053 1054 @classlock
1055 - def delete_task(self, task_id):
1056 """Delete information on a task. 1057 @param task_id: ID of the task to query. 1058 @return: operation status. 1059 """ 1060 session = self.Session() 1061 try: 1062 task = session.query(Task).get(task_id) 1063 session.delete(task) 1064 session.commit() 1065 except SQLAlchemyError as e: 1066 log.debug("Database error deleting task: {0}".format(e)) 1067 session.rollback() 1068 return False 1069 finally: 1070 session.close() 1071 return True
1072 1073 @classlock
1074 - def view_sample(self, sample_id):
1075 """Retrieve information on a sample given a sample id. 1076 @param sample_id: ID of the sample to query. 1077 @return: details on the sample used in sample: sample_id. 1078 """ 1079 session = self.Session() 1080 try: 1081 sample = session.query(Sample).get(sample_id) 1082 except AttributeError: 1083 return None 1084 except SQLAlchemyError as e: 1085 log.debug("Database error viewing task: {0}".format(e)) 1086 return None 1087 else: 1088 if sample: 1089 session.expunge(sample) 1090 finally: 1091 session.close() 1092 1093 return sample
1094 1095 @classlock
1096 - def find_sample(self, md5=None, sha256=None):
1097 """Search samples by MD5. 1098 @param md5: md5 string 1099 @return: matches list 1100 """ 1101 session = self.Session() 1102 try: 1103 if md5: 1104 sample = session.query(Sample).filter_by(md5=md5).first() 1105 elif sha256: 1106 sample = session.query(Sample).filter_by(sha256=sha256).first() 1107 except SQLAlchemyError as e: 1108 log.debug("Database error searching sample: {0}".format(e)) 1109 return None 1110 else: 1111 if sample: 1112 session.expunge(sample) 1113 finally: 1114 session.close() 1115 return sample
1116 1117 @classlock
1118 - def count_samples(self):
1119 """Counts the amount of samples in the database.""" 1120 session = self.Session() 1121 try: 1122 sample_count = session.query(Sample).count() 1123 except SQLAlchemyError as e: 1124 log.debug("Database error counting samples: {0}".format(e)) 1125 return 0 1126 finally: 1127 session.close() 1128 return sample_count
1129 1130 @classlock
1131 - def view_machine(self, name):
1132 """Show virtual machine. 1133 @params name: virtual machine name 1134 @return: virtual machine's details 1135 """ 1136 session = self.Session() 1137 try: 1138 machine = session.query(Machine).options(joinedload("tags")).filter(Machine.name == name).first() 1139 except SQLAlchemyError as e: 1140 log.debug("Database error viewing machine: {0}".format(e)) 1141 return None 1142 else: 1143 if machine: 1144 session.expunge(machine) 1145 finally: 1146 session.close() 1147 return machine
1148 1149 @classlock
1150 - def view_machine_by_label(self, label):
1151 """Show virtual machine. 1152 @params label: virtual machine label 1153 @return: virtual machine's details 1154 """ 1155 session = self.Session() 1156 try: 1157 machine = session.query(Machine).options(joinedload("tags")).filter(Machine.label == label).first() 1158 except SQLAlchemyError as e: 1159 log.debug("Database error viewing machine by label: {0}".format(e)) 1160 return None 1161 else: 1162 if machine: 1163 session.expunge(machine) 1164 finally: 1165 session.close() 1166 return machine
1167 1168 @classlock
1169 - def view_errors(self, task_id):
1170 """Get all errors related to a task. 1171 @param task_id: ID of task associated to the errors 1172 @return: list of errors. 1173 """ 1174 session = self.Session() 1175 try: 1176 errors = session.query(Error).filter_by(task_id=task_id).all() 1177 except SQLAlchemyError as e: 1178 log.debug("Database error viewing errors: {0}".format(e)) 1179 return [] 1180 finally: 1181 session.close() 1182 return errors
1183