#!/usr/bin/env python __author__ = "Mia Stein" import re import zlib import cv2 from scapy.all import * PIC_DIR = '/home/user/Desktop/pictures' FACES_DIR = '/home/user/Desktop/faces' PCAP = 'http_witp_jpegs.cap' # split the headers using regular expression def get_http_headers(http_payload): try: # split the headers off if it is HTTP traffic headers_raw = http_payload[:http_payload.index("\r\n\r\n")+2] headers = dict(re.findall(r'(?P.*?):(?P.*?)\r\n', headers_raw)) except: return None if 'Content-Type' not in headers: return None return headers # determine whether we received an image in the HTTP response def extract_image(headers, http_payload): image = None image_type = None try: if 'image' in headers['Content-Type']: # grab the image type and image body image_type = headers['Content-Type'].split('/')[1] image = http_payload[http_payload.index('\r\n\r\n')+4:] # if we detect compression decompress the image # attempt to decompress it before returning the image # type and the raw image buffer try: if 'Content-Encoding' in headers.keys(): if headers['Content-Encoding'] == 'gzip': image = zlib.decompress(image, 16+zlb.MAX_WBITS) elif headers['Content-Encoding'] == 'deflate': image = zlib.decompress(image) except: pass except: return None, None return image, image_type # facial detection code def face_detect(path, file_name): img = cv2.imread(path) # apply a classifier that is trained in advanced for detecting faces # in a front-facing orientation cascade = cv2.CascadeClassifier('/home/bytegirl/Desktop/haarcascade_upperbody.xml') # returns retangle coordinates that correspnd to where the face # was detected in the image. rects = cascade.detectMultiScale(img, 1.3, 4, cv2.cv.CV_HAAR_SCALE_IMAGE, (20,20)) if len(rects) == 0: return False rects[:, 2:] += rects[:, :2] # highlight the faces in the image # draw a green retangle over the area for x1, y1, x2, y2 in rects: cv2.retangle(img, (x1, y1), (x2, y2), (127, 255,0), 2) # write out the resulting image cv2.imwrite('%s/%s-%s' % (FACES_DIR, PCAP, file_name), img) return True def http_assembler(PCAP): carved_images = 0 faces_detected = 0 a = rdpcap('/home/temp/' + PCAP) # scapy automatically separate each TCP session into a dictionay sessions = a.sessions() for session in sessions: http_payload = '' for packet in sessions[session]: try: # we use that wand then filter out only HTTP traffic and then concatenate # the payload of all the HTTP traffic into a single buffer # (the same as Wiresahk Follow TCP Stream) if packet[TCP].dport == 80 or packet[TCP].sport == 80: # reassemble the stream http_payload += str(packet[TCP].payload) except: pass # after we have the HTTP data assembled we pass it off to our HTTP # header, parsing function, which will allow us to inspect the headers headers = get_http_headers(http_payload) if headers is None: continue # after we validade that, we receive an image back in an HTTP response, we # extract the raw image and return the image type and the binary body of # the image itself image, image_type = extract_image(headers, http_payload) if image is not None and image_type is not None: # store the images file_name = '%s-pic_carver_%d.%s' %(PCAP, carved_images, image_type) fd = open('%s/%s' % (PIC_DIR, file_name), 'wb') fd.write(image) fd.close() carved_images += 1 # now attempt face detection try: # path the file for the facial detection routine result = face_detect('%s/%s' %(PIC_DIR, file_name), file_name) if result is True: faces_detected += 1 except: pass return carved_images, faces_detected if __name__ == '__main__': carved_images, faces_detected = http_assembler(PCAP) print "Extracted: %d images" % carved_images print "Detected: %d faces" % faces_detected