mirror of
https://github.com/autistic-symposium/sec-pentesting-toolkit.git
synced 2025-05-02 06:46:07 -04:00
Starting organizing my forensics material
This commit is contained in:
parent
c50d88d102
commit
0077bcf0bf
16 changed files with 9193 additions and 21 deletions
1031
Forensics/scripts/pdf-parser.py
Executable file
1031
Forensics/scripts/pdf-parser.py
Executable file
File diff suppressed because it is too large
Load diff
714
Forensics/scripts/pdfid.py
Executable file
714
Forensics/scripts/pdfid.py
Executable file
|
@ -0,0 +1,714 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
__description__ = 'Tool to test a PDF file'
|
||||
__author__ = 'Didier Stevens'
|
||||
__version__ = '0.1.2'
|
||||
__date__ = '2013/03/13'
|
||||
|
||||
"""
|
||||
|
||||
Tool to test a PDF file
|
||||
|
||||
Source code put in public domain by Didier Stevens, no Copyright
|
||||
https://DidierStevens.com
|
||||
Use at your own risk
|
||||
|
||||
History:
|
||||
2009/03/27: start
|
||||
2009/03/28: scan option
|
||||
2009/03/29: V0.0.2: xml output
|
||||
2009/03/31: V0.0.3: /ObjStm suggested by Dion
|
||||
2009/04/02: V0.0.4: added ErrorMessage
|
||||
2009/04/20: V0.0.5: added Dates
|
||||
2009/04/21: V0.0.6: added entropy
|
||||
2009/04/22: added disarm
|
||||
2009/04/29: finished disarm
|
||||
2009/05/13: V0.0.7: added cPDFEOF
|
||||
2009/07/24: V0.0.8: added /AcroForm and /RichMedia, simplified %PDF header regex, extra date format (without TZ)
|
||||
2009/07/25: added input redirection, option --force
|
||||
2009/10/13: V0.0.9: added detection for CVE-2009-3459; added /RichMedia to disarm
|
||||
2010/01/11: V0.0.10: relaxed %PDF header checking
|
||||
2010/04/28: V0.0.11: added /Launch
|
||||
2010/09/21: V0.0.12: fixed cntCharsAfterLastEOF bug; fix by Russell Holloway
|
||||
2011/12/29: updated for Python 3, added keyword /EmbeddedFile
|
||||
2012/03/03: added PDFiD2JSON; coded by Brandon Dixon
|
||||
2013/02/10: V0.1.0: added http/https support; added support for ZIP file with password 'infected'
|
||||
2013/03/11: V0.1.1: fixes for Python 3
|
||||
2013/03/13: V0.1.2: Added error handling for files; added /XFA
|
||||
|
||||
Todo:
|
||||
- update XML example (entropy, EOF)
|
||||
- code review, cleanup
|
||||
"""
|
||||
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
import xml.dom.minidom
|
||||
import traceback
|
||||
import math
|
||||
import operator
|
||||
import os.path
|
||||
import sys
|
||||
import json
|
||||
import zipfile
|
||||
try:
|
||||
import urllib2
|
||||
urllib23 = urllib2
|
||||
except:
|
||||
import urllib.request
|
||||
urllib23 = urllib.request
|
||||
|
||||
#Convert 2 Bytes If Python 3
|
||||
def C2BIP3(string):
|
||||
if sys.version_info[0] > 2:
|
||||
return bytes([ord(x) for x in string])
|
||||
else:
|
||||
return string
|
||||
|
||||
class cBinaryFile:
|
||||
def __init__(self, file):
|
||||
self.file = file
|
||||
if file == '':
|
||||
self.infile = sys.stdin
|
||||
elif file.lower().startswith('http://') or file.lower().startswith('https://'):
|
||||
try:
|
||||
if sys.hexversion >= 0x020601F0:
|
||||
self.infile = urllib23.urlopen(file, timeout=5)
|
||||
else:
|
||||
self.infile = urllib23.urlopen(file)
|
||||
except urllib23.HTTPError:
|
||||
print('Error accessing URL %s' % file)
|
||||
print(sys.exc_info()[1])
|
||||
sys.exit()
|
||||
elif file.lower().endswith('.zip'):
|
||||
try:
|
||||
self.zipfile = zipfile.ZipFile(file, 'r')
|
||||
self.infile = self.zipfile.open(self.zipfile.infolist()[0], 'r', C2BIP3('infected'))
|
||||
except:
|
||||
print('Error opening file %s' % file)
|
||||
print(sys.exc_info()[1])
|
||||
sys.exit()
|
||||
else:
|
||||
try:
|
||||
self.infile = open(file, 'rb')
|
||||
except:
|
||||
print('Error opening file %s' % file)
|
||||
print(sys.exc_info()[1])
|
||||
sys.exit()
|
||||
self.ungetted = []
|
||||
|
||||
def byte(self):
|
||||
if len(self.ungetted) != 0:
|
||||
return self.ungetted.pop()
|
||||
inbyte = self.infile.read(1)
|
||||
if not inbyte or inbyte == '':
|
||||
self.infile.close()
|
||||
return None
|
||||
return ord(inbyte)
|
||||
|
||||
def bytes(self, size):
|
||||
if size <= len(self.ungetted):
|
||||
result = self.ungetted[0:size]
|
||||
del self.ungetted[0:size]
|
||||
return result
|
||||
inbytes = self.infile.read(size - len(self.ungetted))
|
||||
if inbytes == '':
|
||||
self.infile.close()
|
||||
if type(inbytes) == type(''):
|
||||
result = self.ungetted + [ord(b) for b in inbytes]
|
||||
else:
|
||||
result = self.ungetted + [b for b in inbytes]
|
||||
self.ungetted = []
|
||||
return result
|
||||
|
||||
def unget(self, byte):
|
||||
self.ungetted.append(byte)
|
||||
|
||||
def ungets(self, bytes):
|
||||
bytes.reverse()
|
||||
self.ungetted.extend(bytes)
|
||||
|
||||
class cPDFDate:
|
||||
def __init__(self):
|
||||
self.state = 0
|
||||
|
||||
def parse(self, char):
|
||||
if char == 'D':
|
||||
self.state = 1
|
||||
return None
|
||||
elif self.state == 1:
|
||||
if char == ':':
|
||||
self.state = 2
|
||||
self.digits1 = ''
|
||||
else:
|
||||
self.state = 0
|
||||
return None
|
||||
elif self.state == 2:
|
||||
if len(self.digits1) < 14:
|
||||
if char >= '0' and char <= '9':
|
||||
self.digits1 += char
|
||||
return None
|
||||
else:
|
||||
self.state = 0
|
||||
return None
|
||||
elif char == '+' or char == '-' or char == 'Z':
|
||||
self.state = 3
|
||||
self.digits2 = ''
|
||||
self.TZ = char
|
||||
return None
|
||||
elif char == '"':
|
||||
self.state = 0
|
||||
self.date = 'D:' + self.digits1
|
||||
return self.date
|
||||
elif char < '0' or char > '9':
|
||||
self.state = 0
|
||||
self.date = 'D:' + self.digits1
|
||||
return self.date
|
||||
else:
|
||||
self.state = 0
|
||||
return None
|
||||
elif self.state == 3:
|
||||
if len(self.digits2) < 2:
|
||||
if char >= '0' and char <= '9':
|
||||
self.digits2 += char
|
||||
return None
|
||||
else:
|
||||
self.state = 0
|
||||
return None
|
||||
elif len(self.digits2) == 2:
|
||||
if char == "'":
|
||||
self.digits2 += char
|
||||
return None
|
||||
else:
|
||||
self.state = 0
|
||||
return None
|
||||
elif len(self.digits2) < 5:
|
||||
if char >= '0' and char <= '9':
|
||||
self.digits2 += char
|
||||
if len(self.digits2) == 5:
|
||||
self.state = 0
|
||||
self.date = 'D:' + self.digits1 + self.TZ + self.digits2
|
||||
return self.date
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
self.state = 0
|
||||
return None
|
||||
|
||||
def fEntropy(countByte, countTotal):
|
||||
x = float(countByte) / countTotal
|
||||
if x > 0:
|
||||
return - x * math.log(x, 2)
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
class cEntropy:
|
||||
def __init__(self):
|
||||
self.allBucket = [0 for i in range(0, 256)]
|
||||
self.streamBucket = [0 for i in range(0, 256)]
|
||||
|
||||
def add(self, byte, insideStream):
|
||||
self.allBucket[byte] += 1
|
||||
if insideStream:
|
||||
self.streamBucket[byte] += 1
|
||||
|
||||
def removeInsideStream(self, byte):
|
||||
if self.streamBucket[byte] > 0:
|
||||
self.streamBucket[byte] -= 1
|
||||
|
||||
def calc(self):
|
||||
self.nonStreamBucket = map(operator.sub, self.allBucket, self.streamBucket)
|
||||
allCount = sum(self.allBucket)
|
||||
streamCount = sum(self.streamBucket)
|
||||
nonStreamCount = sum(self.nonStreamBucket)
|
||||
return (allCount, sum(map(lambda x: fEntropy(x, allCount), self.allBucket)), streamCount, sum(map(lambda x: fEntropy(x, streamCount), self.streamBucket)), nonStreamCount, sum(map(lambda x: fEntropy(x, nonStreamCount), self.nonStreamBucket)))
|
||||
|
||||
class cPDFEOF:
|
||||
def __init__(self):
|
||||
self.token = ''
|
||||
self.cntEOFs = 0
|
||||
|
||||
def parse(self, char):
|
||||
if self.cntEOFs > 0:
|
||||
self.cntCharsAfterLastEOF += 1
|
||||
if self.token == '' and char == '%':
|
||||
self.token += char
|
||||
return
|
||||
elif self.token == '%' and char == '%':
|
||||
self.token += char
|
||||
return
|
||||
elif self.token == '%%' and char == 'E':
|
||||
self.token += char
|
||||
return
|
||||
elif self.token == '%%E' and char == 'O':
|
||||
self.token += char
|
||||
return
|
||||
elif self.token == '%%EO' and char == 'F':
|
||||
self.token += char
|
||||
return
|
||||
elif self.token == '%%EOF' and (char == '\n' or char == '\r' or char == ' ' or char == '\t'):
|
||||
self.cntEOFs += 1
|
||||
self.cntCharsAfterLastEOF = 0
|
||||
if char == '\n':
|
||||
self.token = ''
|
||||
else:
|
||||
self.token += char
|
||||
return
|
||||
elif self.token == '%%EOF\r':
|
||||
if char == '\n':
|
||||
self.cntCharsAfterLastEOF = 0
|
||||
self.token = ''
|
||||
else:
|
||||
self.token = ''
|
||||
|
||||
def FindPDFHeaderRelaxed(oBinaryFile):
|
||||
bytes = oBinaryFile.bytes(1024)
|
||||
index = ''.join([chr(byte) for byte in bytes]).find('%PDF')
|
||||
if index == -1:
|
||||
oBinaryFile.ungets(bytes)
|
||||
return ([], None)
|
||||
for endHeader in range(index + 4, index + 4 + 10):
|
||||
if bytes[endHeader] == 10 or bytes[endHeader] == 13:
|
||||
break
|
||||
oBinaryFile.ungets(bytes[endHeader:])
|
||||
return (bytes[0:endHeader], ''.join([chr(byte) for byte in bytes[index:endHeader]]))
|
||||
|
||||
def Hexcode2String(char):
|
||||
if type(char) == int:
|
||||
return '#%02x' % char
|
||||
else:
|
||||
return char
|
||||
|
||||
def SwapCase(char):
|
||||
if type(char) == int:
|
||||
return ord(chr(char).swapcase())
|
||||
else:
|
||||
return char.swapcase()
|
||||
|
||||
def HexcodeName2String(hexcodeName):
|
||||
return ''.join(map(Hexcode2String, hexcodeName))
|
||||
|
||||
def SwapName(wordExact):
|
||||
return map(SwapCase, wordExact)
|
||||
|
||||
def UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut):
|
||||
if word != '':
|
||||
if slash + word in words:
|
||||
words[slash + word][0] += 1
|
||||
if hexcode:
|
||||
words[slash + word][1] += 1
|
||||
elif slash == '/' and allNames:
|
||||
words[slash + word] = [1, 0]
|
||||
if hexcode:
|
||||
words[slash + word][1] += 1
|
||||
if slash == '/':
|
||||
lastName = slash + word
|
||||
if slash == '':
|
||||
if word == 'stream':
|
||||
insideStream = True
|
||||
if word == 'endstream':
|
||||
if insideStream == True and oEntropy != None:
|
||||
for char in 'endstream':
|
||||
oEntropy.removeInsideStream(ord(char))
|
||||
insideStream = False
|
||||
if fOut != None:
|
||||
if slash == '/' and '/' + word in ('/JS', '/JavaScript', '/AA', '/OpenAction', '/JBIG2Decode', '/RichMedia', '/Launch'):
|
||||
wordExactSwapped = HexcodeName2String(SwapName(wordExact))
|
||||
fOut.write(C2BIP3(wordExactSwapped))
|
||||
print('/%s -> /%s' % (HexcodeName2String(wordExact), wordExactSwapped))
|
||||
else:
|
||||
fOut.write(C2BIP3(HexcodeName2String(wordExact)))
|
||||
return ('', [], False, lastName, insideStream)
|
||||
|
||||
class cCVE_2009_3459:
|
||||
def __init__(self):
|
||||
self.count = 0
|
||||
|
||||
def Check(self, lastName, word):
|
||||
if (lastName == '/Colors' and word.isdigit() and int(word) > 2^24): # decided to alert when the number of colors is expressed with more than 3 bytes
|
||||
self.count += 1
|
||||
|
||||
def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False):
|
||||
"""Example of XML output:
|
||||
<PDFiD ErrorOccured="False" ErrorMessage="" Filename="test.pdf" Header="%PDF-1.1" IsPDF="True" Version="0.0.4" Entropy="4.28">
|
||||
<Keywords>
|
||||
<Keyword Count="7" HexcodeCount="0" Name="obj"/>
|
||||
<Keyword Count="7" HexcodeCount="0" Name="endobj"/>
|
||||
<Keyword Count="1" HexcodeCount="0" Name="stream"/>
|
||||
<Keyword Count="1" HexcodeCount="0" Name="endstream"/>
|
||||
<Keyword Count="1" HexcodeCount="0" Name="xref"/>
|
||||
<Keyword Count="1" HexcodeCount="0" Name="trailer"/>
|
||||
<Keyword Count="1" HexcodeCount="0" Name="startxref"/>
|
||||
<Keyword Count="1" HexcodeCount="0" Name="/Page"/>
|
||||
<Keyword Count="0" HexcodeCount="0" Name="/Encrypt"/>
|
||||
<Keyword Count="1" HexcodeCount="0" Name="/JS"/>
|
||||
<Keyword Count="1" HexcodeCount="0" Name="/JavaScript"/>
|
||||
<Keyword Count="0" HexcodeCount="0" Name="/AA"/>
|
||||
<Keyword Count="1" HexcodeCount="0" Name="/OpenAction"/>
|
||||
<Keyword Count="0" HexcodeCount="0" Name="/JBIG2Decode"/>
|
||||
</Keywords>
|
||||
<Dates>
|
||||
<Date Value="D:20090128132916+01'00" Name="/ModDate"/>
|
||||
</Dates>
|
||||
</PDFiD>
|
||||
"""
|
||||
|
||||
word = ''
|
||||
wordExact = []
|
||||
hexcode = False
|
||||
lastName = ''
|
||||
insideStream = False
|
||||
keywords = ('obj',
|
||||
'endobj',
|
||||
'stream',
|
||||
'endstream',
|
||||
'xref',
|
||||
'trailer',
|
||||
'startxref',
|
||||
'/Page',
|
||||
'/Encrypt',
|
||||
'/ObjStm',
|
||||
'/JS',
|
||||
'/JavaScript',
|
||||
'/AA',
|
||||
'/OpenAction',
|
||||
'/AcroForm',
|
||||
'/JBIG2Decode',
|
||||
'/RichMedia',
|
||||
'/Launch',
|
||||
'/EmbeddedFile',
|
||||
'/XFA',
|
||||
)
|
||||
words = {}
|
||||
dates = []
|
||||
for keyword in keywords:
|
||||
words[keyword] = [0, 0]
|
||||
slash = ''
|
||||
xmlDoc = xml.dom.minidom.getDOMImplementation().createDocument(None, 'PDFiD', None)
|
||||
att = xmlDoc.createAttribute('Version')
|
||||
att.nodeValue = __version__
|
||||
xmlDoc.documentElement.setAttributeNode(att)
|
||||
att = xmlDoc.createAttribute('Filename')
|
||||
att.nodeValue = file
|
||||
xmlDoc.documentElement.setAttributeNode(att)
|
||||
attErrorOccured = xmlDoc.createAttribute('ErrorOccured')
|
||||
xmlDoc.documentElement.setAttributeNode(attErrorOccured)
|
||||
attErrorOccured.nodeValue = 'False'
|
||||
attErrorMessage = xmlDoc.createAttribute('ErrorMessage')
|
||||
xmlDoc.documentElement.setAttributeNode(attErrorMessage)
|
||||
attErrorMessage.nodeValue = ''
|
||||
|
||||
oPDFDate = None
|
||||
oEntropy = None
|
||||
oPDFEOF = None
|
||||
oCVE_2009_3459 = cCVE_2009_3459()
|
||||
try:
|
||||
attIsPDF = xmlDoc.createAttribute('IsPDF')
|
||||
xmlDoc.documentElement.setAttributeNode(attIsPDF)
|
||||
oBinaryFile = cBinaryFile(file)
|
||||
if extraData:
|
||||
oPDFDate = cPDFDate()
|
||||
oEntropy = cEntropy()
|
||||
oPDFEOF = cPDFEOF()
|
||||
(bytesHeader, pdfHeader) = FindPDFHeaderRelaxed(oBinaryFile)
|
||||
if disarm:
|
||||
(pathfile, extension) = os.path.splitext(file)
|
||||
fOut = open(pathfile + '.disarmed' + extension, 'wb')
|
||||
for byteHeader in bytesHeader:
|
||||
fOut.write(C2BIP3(chr(byteHeader)))
|
||||
else:
|
||||
fOut = None
|
||||
if oEntropy != None:
|
||||
for byteHeader in bytesHeader:
|
||||
oEntropy.add(byteHeader, insideStream)
|
||||
if pdfHeader == None and not force:
|
||||
attIsPDF.nodeValue = 'False'
|
||||
return xmlDoc
|
||||
else:
|
||||
if pdfHeader == None:
|
||||
attIsPDF.nodeValue = 'False'
|
||||
pdfHeader = ''
|
||||
else:
|
||||
attIsPDF.nodeValue = 'True'
|
||||
att = xmlDoc.createAttribute('Header')
|
||||
att.nodeValue = repr(pdfHeader[0:10]).strip("'")
|
||||
xmlDoc.documentElement.setAttributeNode(att)
|
||||
byte = oBinaryFile.byte()
|
||||
while byte != None:
|
||||
char = chr(byte)
|
||||
charUpper = char.upper()
|
||||
if charUpper >= 'A' and charUpper <= 'Z' or charUpper >= '0' and charUpper <= '9':
|
||||
word += char
|
||||
wordExact.append(char)
|
||||
elif slash == '/' and char == '#':
|
||||
d1 = oBinaryFile.byte()
|
||||
if d1 != None:
|
||||
d2 = oBinaryFile.byte()
|
||||
if d2 != None and (chr(d1) >= '0' and chr(d1) <= '9' or chr(d1).upper() >= 'A' and chr(d1).upper() <= 'F') and (chr(d2) >= '0' and chr(d2) <= '9' or chr(d2).upper() >= 'A' and chr(d2).upper() <= 'F'):
|
||||
word += chr(int(chr(d1) + chr(d2), 16))
|
||||
wordExact.append(int(chr(d1) + chr(d2), 16))
|
||||
hexcode = True
|
||||
if oEntropy != None:
|
||||
oEntropy.add(d1, insideStream)
|
||||
oEntropy.add(d2, insideStream)
|
||||
if oPDFEOF != None:
|
||||
oPDFEOF.parse(d1)
|
||||
oPDFEOF.parse(d2)
|
||||
else:
|
||||
oBinaryFile.unget(d2)
|
||||
oBinaryFile.unget(d1)
|
||||
(word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
|
||||
if disarm:
|
||||
fOut.write(C2BIP3(char))
|
||||
else:
|
||||
oBinaryFile.unget(d1)
|
||||
(word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
|
||||
if disarm:
|
||||
fOut.write(C2BIP3(char))
|
||||
else:
|
||||
oCVE_2009_3459.Check(lastName, word)
|
||||
|
||||
(word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
|
||||
if char == '/':
|
||||
slash = '/'
|
||||
else:
|
||||
slash = ''
|
||||
if disarm:
|
||||
fOut.write(C2BIP3(char))
|
||||
|
||||
if oPDFDate != None and oPDFDate.parse(char) != None:
|
||||
dates.append([oPDFDate.date, lastName])
|
||||
|
||||
if oEntropy != None:
|
||||
oEntropy.add(byte, insideStream)
|
||||
|
||||
if oPDFEOF != None:
|
||||
oPDFEOF.parse(char)
|
||||
|
||||
byte = oBinaryFile.byte()
|
||||
(word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
|
||||
|
||||
# check to see if file ended with %%EOF. If so, we can reset charsAfterLastEOF and add one to EOF count. This is never performed in
|
||||
# the parse function because it never gets called due to hitting the end of file.
|
||||
if byte == None and oPDFEOF != None:
|
||||
if oPDFEOF.token == '%%EOF':
|
||||
oPDFEOF.cntEOFs += 1
|
||||
oPDFEOF.cntCharsAfterLastEOF = 0
|
||||
oPDFEOF.token = ''
|
||||
|
||||
except SystemExit:
|
||||
sys.exit()
|
||||
except:
|
||||
attErrorOccured.nodeValue = 'True'
|
||||
attErrorMessage.nodeValue = traceback.format_exc()
|
||||
|
||||
if disarm:
|
||||
fOut.close()
|
||||
|
||||
attEntropyAll = xmlDoc.createAttribute('TotalEntropy')
|
||||
xmlDoc.documentElement.setAttributeNode(attEntropyAll)
|
||||
attCountAll = xmlDoc.createAttribute('TotalCount')
|
||||
xmlDoc.documentElement.setAttributeNode(attCountAll)
|
||||
attEntropyStream = xmlDoc.createAttribute('StreamEntropy')
|
||||
xmlDoc.documentElement.setAttributeNode(attEntropyStream)
|
||||
attCountStream = xmlDoc.createAttribute('StreamCount')
|
||||
xmlDoc.documentElement.setAttributeNode(attCountStream)
|
||||
attEntropyNonStream = xmlDoc.createAttribute('NonStreamEntropy')
|
||||
xmlDoc.documentElement.setAttributeNode(attEntropyNonStream)
|
||||
attCountNonStream = xmlDoc.createAttribute('NonStreamCount')
|
||||
xmlDoc.documentElement.setAttributeNode(attCountNonStream)
|
||||
if oEntropy != None:
|
||||
(countAll, entropyAll , countStream, entropyStream, countNonStream, entropyNonStream) = oEntropy.calc()
|
||||
attEntropyAll.nodeValue = '%f' % entropyAll
|
||||
attCountAll.nodeValue = '%d' % countAll
|
||||
attEntropyStream.nodeValue = '%f' % entropyStream
|
||||
attCountStream.nodeValue = '%d' % countStream
|
||||
attEntropyNonStream.nodeValue = '%f' % entropyNonStream
|
||||
attCountNonStream.nodeValue = '%d' % countNonStream
|
||||
else:
|
||||
attEntropyAll.nodeValue = ''
|
||||
attCountAll.nodeValue = ''
|
||||
attEntropyStream.nodeValue = ''
|
||||
attCountStream.nodeValue = ''
|
||||
attEntropyNonStream.nodeValue = ''
|
||||
attCountNonStream.nodeValue = ''
|
||||
attCountEOF = xmlDoc.createAttribute('CountEOF')
|
||||
xmlDoc.documentElement.setAttributeNode(attCountEOF)
|
||||
attCountCharsAfterLastEOF = xmlDoc.createAttribute('CountCharsAfterLastEOF')
|
||||
xmlDoc.documentElement.setAttributeNode(attCountCharsAfterLastEOF)
|
||||
if oPDFEOF != None:
|
||||
attCountEOF.nodeValue = '%d' % oPDFEOF.cntEOFs
|
||||
attCountCharsAfterLastEOF.nodeValue = '%d' % oPDFEOF.cntCharsAfterLastEOF
|
||||
else:
|
||||
attCountEOF.nodeValue = ''
|
||||
attCountCharsAfterLastEOF.nodeValue = ''
|
||||
|
||||
eleKeywords = xmlDoc.createElement('Keywords')
|
||||
xmlDoc.documentElement.appendChild(eleKeywords)
|
||||
for keyword in keywords:
|
||||
eleKeyword = xmlDoc.createElement('Keyword')
|
||||
eleKeywords.appendChild(eleKeyword)
|
||||
att = xmlDoc.createAttribute('Name')
|
||||
att.nodeValue = keyword
|
||||
eleKeyword.setAttributeNode(att)
|
||||
att = xmlDoc.createAttribute('Count')
|
||||
att.nodeValue = str(words[keyword][0])
|
||||
eleKeyword.setAttributeNode(att)
|
||||
att = xmlDoc.createAttribute('HexcodeCount')
|
||||
att.nodeValue = str(words[keyword][1])
|
||||
eleKeyword.setAttributeNode(att)
|
||||
eleKeyword = xmlDoc.createElement('Keyword')
|
||||
eleKeywords.appendChild(eleKeyword)
|
||||
att = xmlDoc.createAttribute('Name')
|
||||
att.nodeValue = '/Colors > 2^24'
|
||||
eleKeyword.setAttributeNode(att)
|
||||
att = xmlDoc.createAttribute('Count')
|
||||
att.nodeValue = str(oCVE_2009_3459.count)
|
||||
eleKeyword.setAttributeNode(att)
|
||||
att = xmlDoc.createAttribute('HexcodeCount')
|
||||
att.nodeValue = str(0)
|
||||
eleKeyword.setAttributeNode(att)
|
||||
if allNames:
|
||||
keys = sorted(words.keys())
|
||||
for word in keys:
|
||||
if not word in keywords:
|
||||
eleKeyword = xmlDoc.createElement('Keyword')
|
||||
eleKeywords.appendChild(eleKeyword)
|
||||
att = xmlDoc.createAttribute('Name')
|
||||
att.nodeValue = word
|
||||
eleKeyword.setAttributeNode(att)
|
||||
att = xmlDoc.createAttribute('Count')
|
||||
att.nodeValue = str(words[word][0])
|
||||
eleKeyword.setAttributeNode(att)
|
||||
att = xmlDoc.createAttribute('HexcodeCount')
|
||||
att.nodeValue = str(words[word][1])
|
||||
eleKeyword.setAttributeNode(att)
|
||||
eleDates = xmlDoc.createElement('Dates')
|
||||
xmlDoc.documentElement.appendChild(eleDates)
|
||||
dates.sort(key=lambda x: x[0])
|
||||
for date in dates:
|
||||
eleDate = xmlDoc.createElement('Date')
|
||||
eleDates.appendChild(eleDate)
|
||||
att = xmlDoc.createAttribute('Value')
|
||||
att.nodeValue = date[0]
|
||||
eleDate.setAttributeNode(att)
|
||||
att = xmlDoc.createAttribute('Name')
|
||||
att.nodeValue = date[1]
|
||||
eleDate.setAttributeNode(att)
|
||||
return xmlDoc
|
||||
|
||||
def PDFiD2String(xmlDoc, force):
|
||||
result = 'PDFiD %s %s\n' % (xmlDoc.documentElement.getAttribute('Version'), xmlDoc.documentElement.getAttribute('Filename'))
|
||||
if xmlDoc.documentElement.getAttribute('ErrorOccured') == 'True':
|
||||
return result + '***Error occured***\n%s\n' % xmlDoc.documentElement.getAttribute('ErrorMessage')
|
||||
if not force and xmlDoc.documentElement.getAttribute('IsPDF') == 'False':
|
||||
return result + ' Not a PDF document\n'
|
||||
result += ' PDF Header: %s\n' % xmlDoc.documentElement.getAttribute('Header')
|
||||
for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes:
|
||||
result += ' %-16s %7d' % (node.getAttribute('Name'), int(node.getAttribute('Count')))
|
||||
if int(node.getAttribute('HexcodeCount')) > 0:
|
||||
result += '(%d)' % int(node.getAttribute('HexcodeCount'))
|
||||
result += '\n'
|
||||
if xmlDoc.documentElement.getAttribute('CountEOF') != '':
|
||||
result += ' %-16s %7d\n' % ('%%EOF', int(xmlDoc.documentElement.getAttribute('CountEOF')))
|
||||
if xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF') != '':
|
||||
result += ' %-16s %7d\n' % ('After last %%EOF', int(xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF')))
|
||||
for node in xmlDoc.documentElement.getElementsByTagName('Dates')[0].childNodes:
|
||||
result += ' %-23s %s\n' % (node.getAttribute('Value'), node.getAttribute('Name'))
|
||||
if xmlDoc.documentElement.getAttribute('TotalEntropy') != '':
|
||||
result += ' Total entropy: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('TotalEntropy'), xmlDoc.documentElement.getAttribute('TotalCount'))
|
||||
if xmlDoc.documentElement.getAttribute('StreamEntropy') != '':
|
||||
result += ' Entropy inside streams: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('StreamEntropy'), xmlDoc.documentElement.getAttribute('StreamCount'))
|
||||
if xmlDoc.documentElement.getAttribute('NonStreamEntropy') != '':
|
||||
result += ' Entropy outside streams: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('NonStreamEntropy'), xmlDoc.documentElement.getAttribute('NonStreamCount'))
|
||||
return result
|
||||
|
||||
def Scan(directory, allNames, extraData, disarm, force):
|
||||
try:
|
||||
if os.path.isdir(directory):
|
||||
for entry in os.listdir(directory):
|
||||
Scan(os.path.join(directory, entry), allNames, extraData, disarm, force)
|
||||
else:
|
||||
result = PDFiD2String(PDFiD(directory, allNames, extraData, disarm, force), force)
|
||||
print(result)
|
||||
logfile = open('PDFiD.log', 'a')
|
||||
logfile.write(result + '\n')
|
||||
logfile.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
#function derived from: http://blog.9bplus.com/pdfidpy-output-to-json
|
||||
def PDFiD2JSON(xmlDoc, force):
|
||||
#Get Top Layer Data
|
||||
errorOccured = xmlDoc.documentElement.getAttribute('ErrorOccured')
|
||||
errorMessage = xmlDoc.documentElement.getAttribute('ErrorMessage')
|
||||
filename = xmlDoc.documentElement.getAttribute('Filename')
|
||||
header = xmlDoc.documentElement.getAttribute('Header')
|
||||
isPdf = xmlDoc.documentElement.getAttribute('IsPDF')
|
||||
version = xmlDoc.documentElement.getAttribute('Version')
|
||||
entropy = xmlDoc.documentElement.getAttribute('Entropy')
|
||||
|
||||
#extra data
|
||||
countEof = xmlDoc.documentElement.getAttribute('CountEOF')
|
||||
countChatAfterLastEof = xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF')
|
||||
totalEntropy = xmlDoc.documentElement.getAttribute('TotalEntropy')
|
||||
streamEntropy = xmlDoc.documentElement.getAttribute('StreamEntropy')
|
||||
nonStreamEntropy = xmlDoc.documentElement.getAttribute('NonStreamEntropy')
|
||||
|
||||
keywords = []
|
||||
dates = []
|
||||
|
||||
#grab all keywords
|
||||
for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes:
|
||||
name = node.getAttribute('Name')
|
||||
count = int(node.getAttribute('Count'))
|
||||
if int(node.getAttribute('HexcodeCount')) > 0:
|
||||
hexCount = int(node.getAttribute('HexcodeCount'))
|
||||
else:
|
||||
hexCount = 0
|
||||
keyword = { 'count':count, 'hexcodecount':hexCount, 'name':name }
|
||||
keywords.append(keyword)
|
||||
|
||||
#grab all date information
|
||||
for node in xmlDoc.documentElement.getElementsByTagName('Dates')[0].childNodes:
|
||||
name = node.getAttribute('Name')
|
||||
value = node.getAttribute('Value')
|
||||
date = { 'name':name, 'value':value }
|
||||
dates.append(date)
|
||||
|
||||
data = { 'countEof':countEof, 'countChatAfterLastEof':countChatAfterLastEof, 'totalEntropy':totalEntropy, 'streamEntropy':streamEntropy, 'nonStreamEntropy':nonStreamEntropy, 'errorOccured':errorOccured, 'errorMessage':errorMessage, 'filename':filename, 'header':header, 'isPdf':isPdf, 'version':version, 'entropy':entropy, 'keywords': { 'keyword': keywords }, 'dates': { 'date':dates} }
|
||||
complete = [ { 'pdfid' : data} ]
|
||||
result = json.dumps(complete)
|
||||
return result
|
||||
|
||||
def Main():
|
||||
oParser = optparse.OptionParser(usage='usage: %prog [options] [pdf-file|zip-file|url]\n' + __description__, version='%prog ' + __version__)
|
||||
oParser.add_option('-s', '--scan', action='store_true', default=False, help='scan the given directory')
|
||||
oParser.add_option('-a', '--all', action='store_true', default=False, help='display all the names')
|
||||
oParser.add_option('-e', '--extra', action='store_true', default=False, help='display extra data, like dates')
|
||||
oParser.add_option('-f', '--force', action='store_true', default=False, help='force the scan of the file, even without proper %PDF header')
|
||||
oParser.add_option('-d', '--disarm', action='store_true', default=False, help='disable JavaScript and auto launch')
|
||||
(options, args) = oParser.parse_args()
|
||||
|
||||
if len(args) == 0:
|
||||
if options.disarm:
|
||||
print('Option disarm not supported with stdin')
|
||||
options.disarm = False
|
||||
print(PDFiD2String(PDFiD('', options.all, options.extra, options.disarm, options.force), options.force))
|
||||
elif len(args) == 1:
|
||||
if options.scan:
|
||||
Scan(args[0], options.all, options.extra, options.disarm, options.force)
|
||||
else:
|
||||
print(PDFiD2String(PDFiD(args[0], options.all, options.extra, options.disarm, options.force), options.force))
|
||||
else:
|
||||
oParser.print_help()
|
||||
print('')
|
||||
print(' %s' % __description__)
|
||||
print(' Source code put in the public domain by Didier Stevens, no Copyright')
|
||||
print(' Use at your own risk')
|
||||
print(' https://DidierStevens.com')
|
||||
return
|
||||
|
||||
if __name__ == '__main__':
|
||||
Main()
|
Loading…
Add table
Add a link
Reference in a new issue