Package modules :: Package reporting :: Module mongodb
[hide private]
[frames] | no frames]

Source Code for Module modules.reporting.mongodb

  1  # Copyright (C) 2010-2014 Cuckoo Sandbox Developers. 
  2  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
  3  # See the file 'docs/LICENSE' for copying permission. 
  4   
  5  import os 
  6   
  7  from lib.cuckoo.common.abstracts import Report 
  8  from lib.cuckoo.common.exceptions import CuckooDependencyError 
  9  from lib.cuckoo.common.exceptions import CuckooReportError 
 10  from lib.cuckoo.common.objects import File 
 11   
 12  try: 
 13      from pymongo.connection import Connection 
 14      from pymongo.errors import ConnectionFailure 
 15      from gridfs import GridFS 
 16      from gridfs.errors import FileExists 
 17      HAVE_MONGO = True 
 18  except ImportError: 
 19      HAVE_MONGO = False 
 20   
21 -class MongoDB(Report):
22 """Stores report in MongoDB.""" 23
24 - def connect(self):
25 """Connects to Mongo database, loads options and set connectors. 26 @raise CuckooReportError: if unable to connect. 27 """ 28 host = self.options.get("host", "127.0.0.1") 29 port = self.options.get("port", 27017) 30 31 try: 32 self.conn = Connection(host, port) 33 self.db = self.conn.cuckoo 34 self.fs = GridFS(self.db) 35 except TypeError: 36 raise CuckooReportError("Mongo connection port must be integer") 37 except ConnectionFailure: 38 raise CuckooReportError("Cannot connect to MongoDB")
39
40 - def store_file(self, file_obj, filename=""):
41 """Store a file in GridFS. 42 @param file_obj: object to the file to store 43 @param filename: name of the file to store 44 @return: object id of the stored file 45 """ 46 if not filename: 47 filename = file_obj.get_name() 48 49 existing = self.db.fs.files.find_one({"sha256": file_obj.get_sha256()}) 50 51 if existing: 52 return existing["_id"] 53 else: 54 new = self.fs.new_file(filename=filename, 55 sha256=file_obj.get_sha256()) 56 for chunk in file_obj.get_chunks(): 57 new.write(chunk) 58 try: 59 new.close() 60 except FileExists: 61 to_find = {"sha256": file_obj.get_sha256()} 62 return self.db.fs.files.find_one(to_find)["_id"] 63 else: 64 return new._id
65
66 - def run(self, results):
67 """Writes report. 68 @param results: analysis results dictionary. 69 @raise CuckooReportError: if fails to connect or write to MongoDB. 70 """ 71 # We put the raise here and not at the import because it would 72 # otherwise trigger even if the module is not enabled in the config. 73 if not HAVE_MONGO: 74 raise CuckooDependencyError("Unable to import pymongo " 75 "(install with `pip install pymongo`)") 76 77 self.connect() 78 79 # Set an unique index on stored files, to avoid duplicates. 80 # From pymongo docs: 81 # Returns the name of the created index if an index is actually 82 # created. 83 # Returns None if the index already exists. 84 self.db.fs.files.ensure_index("sha256", unique=True, 85 sparse=True, name="sha256_unique") 86 87 # Create a copy of the dictionary. This is done in order to not modify 88 # the original dictionary and possibly compromise the following 89 # reporting modules. 90 report = dict(results) 91 92 # Store the sample in GridFS. 93 if results["info"]["category"] == "file": 94 sample = File(self.file_path) 95 if sample.valid(): 96 fname = results["target"]["file"]["name"] 97 sample_id = self.store_file(sample, filename=fname) 98 report["target"] = {"file_id": sample_id} 99 report["target"].update(results["target"]) 100 101 # Store the PCAP file in GridFS and reference it back in the report. 102 pcap_path = os.path.join(self.analysis_path, "dump.pcap") 103 pcap = File(pcap_path) 104 if pcap.valid(): 105 pcap_id = self.store_file(pcap) 106 report["network"] = {"pcap_id": pcap_id} 107 report["network"].update(results["network"]) 108 109 # Walk through the dropped files, store them in GridFS and update the 110 # report with the ObjectIds. 111 new_dropped = [] 112 for dropped in report["dropped"]: 113 new_drop = dict(dropped) 114 drop = File(dropped["path"]) 115 if drop.valid(): 116 dropped_id = self.store_file(drop, filename=dropped["name"]) 117 new_drop["object_id"] = dropped_id 118 119 new_dropped.append(new_drop) 120 121 report["dropped"] = new_dropped 122 123 # Add screenshots. 124 report["shots"] = [] 125 shots_path = os.path.join(self.analysis_path, "shots") 126 if os.path.exists(shots_path): 127 # Walk through the files and select the JPGs. 128 shots = [shot for shot in os.listdir(shots_path) 129 if shot.endswith(".jpg")] 130 131 for shot_file in sorted(shots): 132 shot_path = os.path.join(self.analysis_path, 133 "shots", shot_file) 134 shot = File(shot_path) 135 # If the screenshot path is a valid file, store it and 136 # reference it back in the report. 137 if shot.valid(): 138 shot_id = self.store_file(shot) 139 report["shots"].append(shot_id) 140 141 # Store chunks of API calls in a different collection and reference 142 # those chunks back in the report. In this way we should defeat the 143 # issue with the oversized reports exceeding MongoDB's boundaries. 144 # Also allows paging of the reports. 145 new_processes = [] 146 for process in report["behavior"]["processes"]: 147 new_process = dict(process) 148 149 chunk = [] 150 chunks_ids = [] 151 # Loop on each process call. 152 for index, call in enumerate(process["calls"]): 153 # If the chunk size is 100 or if the loop is completed then 154 # store the chunk in MongoDB. 155 if len(chunk) == 100: 156 to_insert = {"pid": process["process_id"], 157 "calls": chunk} 158 chunk_id = self.db.calls.insert(to_insert) 159 chunks_ids.append(chunk_id) 160 # Reset the chunk. 161 chunk = [] 162 163 # Append call to the chunk. 164 chunk.append(call) 165 166 # Store leftovers. 167 if chunk: 168 to_insert = {"pid": process["process_id"], "calls": chunk} 169 chunk_id = self.db.calls.insert(to_insert) 170 chunks_ids.append(chunk_id) 171 172 # Add list of chunks. 173 new_process["calls"] = chunks_ids 174 new_processes.append(new_process) 175 176 # Store the results in the report. 177 report["behavior"] = dict(report["behavior"]) 178 report["behavior"]["processes"] = new_processes 179 180 # Store the report and retrieve its object id. 181 self.db.analysis.save(report) 182 self.conn.disconnect()
183