├── .gitignore ├── LICENSE.md ├── README.md ├── __init__.py ├── bin └── __init__.py ├── db └── __init__.py ├── lib ├── __init__.py ├── parse │ ├── __init__.py │ ├── pdfminer │ │ ├── __init__.py │ │ ├── arcfour.py │ │ ├── ascii85.py │ │ ├── ccitt.py │ │ ├── lzw.py │ │ ├── pdfdocument.py │ │ ├── pdfparser.py │ │ ├── pdftypes.py │ │ ├── psparser.py │ │ ├── runlength.py │ │ └── utils.py │ └── peepdf │ │ ├── AUTHORS │ │ ├── CHANGELOG │ │ ├── COPYING │ │ ├── JSAnalysis.py │ │ ├── PDFConsole.py │ │ ├── PDFCore.py │ │ ├── PDFCrypto.py │ │ ├── PDFFilters.py │ │ ├── PDFUtils.py │ │ ├── README │ │ ├── TODO │ │ ├── __init__.py │ │ ├── aes.py │ │ ├── aespython │ │ ├── __init__.py │ │ ├── aes_cipher.py │ │ ├── aes_tables.py │ │ ├── cbc_mode.py │ │ ├── cfb_mode.py │ │ ├── key_expander.py │ │ ├── ofb_mode.py │ │ └── test_keys.py │ │ ├── ccitt.py │ │ ├── colorama │ │ ├── PKG-INFO │ │ ├── __init__.py │ │ ├── ansi.py │ │ ├── ansitowin32.py │ │ ├── initialise.py │ │ ├── win32.py │ │ └── winterm.py │ │ ├── jjdecode.py │ │ ├── jsbeautifier │ │ ├── __init__.py │ │ └── unpackers │ │ │ ├── README.specs.mkd │ │ │ ├── __init__.py │ │ │ ├── evalbased.py │ │ │ ├── javascriptobfuscator.py │ │ │ ├── myobfuscate.py │ │ │ ├── packer.py │ │ │ └── urlencode.py │ │ ├── lzw.py │ │ ├── peepdf.dtd │ │ └── peepdf.py ├── scandir.py └── spectragraph │ ├── __init__.py │ ├── conversion.py │ ├── matrix.py │ └── spectragraph.py ├── logs └── __init__.py ├── main.py ├── process ├── __init__.py ├── hashers │ ├── __init__.py │ ├── hasher.py │ ├── pdfminer.py │ └── peepdf.py ├── parsers │ ├── __init__.py │ ├── parse.py │ ├── pdfminer.py │ └── peepdf.py ├── pdf.py ├── pdfhasher.py ├── run-jpexs.py └── sdhasher.py ├── storage ├── __init__.py ├── dbgw.py └── storage.py ├── util ├── __init__.py ├── huntterp.py └── str_utils.py └── xml-output └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .gitignore support plugin (hsz.mobi) 2 | ### vim template 3 | [._]*.s[a-w][a-z] 4 | [._]s[a-w][a-z] 5 | *.un~ 6 | Session.vim 7 | .netrwhist 8 | *~ 9 | 10 | 11 | ### C template 12 | # Object files 13 | *.o 14 | *.ko 15 | *.obj 16 | *.elf 17 | 18 | # Precompiled Headers 19 | *.gch 20 | *.pch 21 | 22 | # Libraries 23 | *.lib 24 | *.a 25 | *.la 26 | *.lo 27 | 28 | # Shared objects (inc. Windows DLLs) 29 | *.dll 30 | *.so 31 | *.so.* 32 | *.dylib 33 | 34 | # Executables 35 | *.exe 36 | *.out 37 | *.app 38 | *.i*86 39 | *.x86_64 40 | *.hex 41 | 42 | 43 | ### JetBrains template 44 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm 45 | 46 | *.iml 47 | 48 | ## Directory-based project format: 49 | .idea/ 50 | # if you remove the above rule, at least ignore the following: 51 | 52 | # User-specific stuff: 53 | # .idea/workspace.xml 54 | # .idea/tasks.xml 55 | # .idea/dictionaries 56 | 57 | # Sensitive or high-churn files: 58 | # .idea/dataSources.ids 59 | # .idea/dataSources.xml 60 | # .idea/sqlDataSources.xml 61 | # .idea/dynamic.xml 62 | # .idea/uiDesigner.xml 63 | 64 | # Gradle: 65 | # .idea/gradle.xml 66 | # .idea/libraries 67 | 68 | # Mongo Explorer plugin: 69 | # .idea/mongoSettings.xml 70 | 71 | ## File-based project format: 72 | *.ipr 73 | *.iws 74 | 75 | ## Plugin-specific files: 76 | 77 | # IntelliJ 78 | out/ 79 | 80 | # mpeltonen/sbt-idea plugin 81 | .idea_modules/ 82 | 83 | # JIRA plugin 84 | atlassian-ide-plugin.xml 85 | 86 | # Crashlytics plugin (for Android Studio and IntelliJ) 87 | com_crashlytics_export_strings.xml 88 | crashlytics.properties 89 | crashlytics-build.properties 90 | 91 | 92 | ### Xcode template 93 | build/ 94 | *.pbxuser 95 | !default.pbxuser 96 | *.mode1v3 97 | !default.mode1v3 98 | *.mode2v3 99 | !default.mode2v3 100 | *.perspectivev3 101 | !default.perspectivev3 102 | xcuserdata 103 | *.xccheckout 104 | *.moved-aside 105 | DerivedData 106 | *.xcuserstate 107 | 108 | 109 | ### Python template 110 | # Byte-compiled / optimized / DLL files 111 | __pycache__/ 112 | *.py[cod] 113 | 114 | # C extensions 115 | *.so 116 | 117 | # Distribution / packaging 118 | .Python 119 | env/ 120 | build/ 121 | develop-eggs/ 122 | dist/ 123 | downloads/ 124 | eggs/ 125 | lib64/ 126 | parts/ 127 | sdist/ 128 | var/ 129 | *.egg-info/ 130 | .installed.cfg 131 | *.egg 132 | 133 | # PyInstaller 134 | # Usually these files are written by a python script from a template 135 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 136 | *.manifest 137 | *.spec 138 | 139 | # Installer logs 140 | pip-log.txt 141 | pip-delete-this-directory.txt 142 | 143 | # Unit test / coverage reports 144 | htmlcov/ 145 | .tox/ 146 | .coverage 147 | .cache 148 | nosetests.xml 149 | coverage.xml 150 | 151 | # Translations 152 | *.mo 153 | *.pot 154 | 155 | # Django stuff: 156 | *.log 157 | 158 | # Sphinx documentation 159 | _build/ 160 | 161 | # PyBuilder 162 | target/ 163 | 164 | 165 | ### OSX template 166 | .DS_Store 167 | .AppleDouble 168 | .LSOverride 169 | 170 | # Icon must end with two \r 171 | Icon 172 | 173 | # Thumbnails 174 | ._* 175 | 176 | # Files that might appear on external disk 177 | .Spotlight-V100 178 | .Trashes 179 | 180 | # Directories potentially created on remote AFP share 181 | .AppleDB 182 | .AppleDesktop 183 | Network Trash Folder 184 | Temporary Items 185 | .apdisk 186 | 187 | 188 | ### Java template 189 | *.class 190 | 191 | # Mobile Tools for Java (J2ME) 192 | .mtj.tmp/ 193 | 194 | # Package Files # 195 | *.jar 196 | *.war 197 | *.ear 198 | 199 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 200 | hs_err_pid* 201 | 202 | 203 | ### Added by nabu authors 204 | *.sqlite 205 | docs/ 206 | xml-output/*.zip 207 | xml-output/*.xml 208 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Use of Nabu and related source code is subject to the terms 2 | of the following licenses: 3 | 4 | GNU General Public License (GPL) Rights pursuant to Version 2, June 1991 5 | Government Purpose License Rights (GPLR) pursuant to DFARS 252.227.7013 6 | 7 | NO WARRANTY 8 | 9 | ANY INFORMATION, MATERIALS, SERVICES, INTELLECTUAL PROPERTY OR OTHER 10 | PROPERTY OR RIGHTS GRANTED OR PROVIDED BY CARNEGIE MELLON UNIVERSITY 11 | PURSUANT TO THIS LICENSE (HEREINAFTER THE "DELIVERABLES") ARE ON AN 12 | "AS-IS" BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY 13 | KIND, EITHER EXPRESS OR IMPLIED AS TO ANY MATTER INCLUDING, BUT NOT 14 | LIMITED TO, WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE, 15 | MERCHANTABILITY, INFORMATIONAL CONTENT, NONINFRINGEMENT, OR ERROR-FREE 16 | OPERATION. CARNEGIE MELLON UNIVERSITY SHALL NOT BE LIABLE FOR INDIRECT, 17 | SPECIAL OR CONSEQUENTIAL DAMAGES, SUCH AS LOSS OF PROFITS OR INABILITY 18 | TO USE SAID INTELLECTUAL PROPERTY, UNDER THIS LICENSE, REGARDLESS OF 19 | WHETHER SUCH PARTY WAS AWARE OF THE POSSIBILITY OF SUCH DAMAGES. 20 | LICENSEE AGREES THAT IT WILL NOT MAKE ANY WARRANTY ON BEHALF OF 21 | CARNEGIE MELLON UNIVERSITY, EXPRESS OR IMPLIED, TO ANY PERSON 22 | CONCERNING THE APPLICATION OF OR THE RESULTS TO BE OBTAINED WITH THE 23 | DELIVERABLES UNDER THIS LICENSE. 24 | 25 | Licensee hereby agrees to defend, indemnify, and hold harmless Carnegie 26 | Mellon University, its trustees, officers, employees, and agents from 27 | all claims or demands made against them (and any related losses, 28 | expenses, or attorney's fees) arising out of, or relating to Licensee's 29 | and/or its sub licensees' negligent use or willful misuse of or 30 | negligent conduct or willful misconduct regarding the Software, 31 | facilities, or other rights or assistance granted by Carnegie Mellon 32 | University under this License, including, but not limited to, any 33 | claims of product liability, personal injury, death, damage to 34 | property, or violation of any laws or regulations. 35 | 36 | Carnegie Mellon University Software Engineering Institute authored 37 | documents are sponsored by the U.S. Department of Defense under 38 | Contract FA8721-05-C-0003. Carnegie Mellon University retains 39 | copyrights in all material produced under this contract. The U.S. 40 | Government retains a non-exclusive, royalty-free license to publish or 41 | reproduce these documents, or allow others to do so, for U.S. 42 | Government purposes only pursuant to the copyright license under the 43 | contract clause at 252.227.7013. 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | About 2 | ===== 3 | Nabu is a tool (work in progress) for parsing, constructing, and comparing the structural graphs of a large collection 4 | of PDF documents. The comparisons are based on the work of [NetSimile](http://arxiv.org/abs/1209.2684). 5 | 6 | This tool grew from PDFrankenstein, and now includes javascript in the pdf database. To view the JS after building 7 | your database: 8 | 9 | `sqlite3 -cmd "select js from pdfs" db/nabu-graphdb.sqlite` 10 | 11 | Dependencies 12 | ------------ 13 | * networkx 14 | * scipy 15 | * matplotlib 16 | * psycopg2 (PostGres python module, also requires Postgres) 17 | 18 | Usage 19 | ----- 20 | 21 | The workflow with Nabu will typically be: 22 | 23 | 1. Build a graph database from a collection of PDFs 24 | 2. Score the graphs for similarity 25 | 3. Draw dendogram clusters (TODO) 26 | 27 | #### Building the Database 28 | 29 | Build the graph database by parsing the specified PDFs. PDFs are given with full paths in a line separated file. 30 | `python main.py [options] build ` 31 | 32 | #### Scoring the Database 33 | 34 | Requires a list of files to score. If the files are not present in the graph database then they will be added. Nabu will output (in CSV format): `subject, family, candidate, score` 35 | 36 | `python main.py [options] score ` 37 | 38 | #### Drawing Clusters 39 | 40 | Runs from the graph database. Uses scipy and matplotlib to draw the dendrogram of the set of PDFs based on the 41 | similarity score. Currently uses Canberra distance metric. 42 | 43 | `python main.py [options] cluster` 44 | 45 | #### Options 46 | 47 | ``` 48 | positional arguments: 49 | action build | score | cluster (under construction) 50 | fin line separated text file of samples to run 51 | 52 | optional arguments: 53 | -h, --help show this help message and exit 54 | -b, --beginning Start from beginning. Don't resume job file based on completed 55 | -c CHUNK, --chunk CHUNK 56 | Chunk size in jobs. Default is num_procs * 1 57 | -d, --debug Spam the terminal with debug output 58 | -g GRAPHDB, --graphdb GRAPHDB 59 | Graph database filename. Default is nabu- 60 | graphdb.sqlite 61 | -j JOBDB, --jobdb JOBDB 62 | Job database filename. Default is nabu-jobs.sqlite 63 | --xmldb XMLDB xml database filename. Default is nabu-xml.sqlite 64 | --dbdir DBDIR Database directory. Default is .../nabu/db/ 65 | --logdir LOGDIR Logging directory. Default is .../nabu/logs/ 66 | --parser PARSER Type of pdf parser to use. Default is pdfminer 67 | -p PROCS, --procs PROCS 68 | Number of parallel processes. Default is 2/3 cpu core 69 | count 70 | -t THRESH, --thresh THRESH 71 | Threshold which reports only graphs with similarities 72 | at or below this value. 73 | -u, --update Ignore completed jobs 74 | ``` 75 | 76 | References 77 | ---------- 78 | [NetSimile](http://arxiv.org/abs/1209.2684) -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'sei-mappel' 2 | -------------------------------------------------------------------------------- /bin/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'honey' 2 | -------------------------------------------------------------------------------- /db/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-sei/nabu/3afcab20a5ddd8a9b984d8f34756ebedfc0b45a9/db/__init__.py -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'honey' 2 | -------------------------------------------------------------------------------- /lib/parse/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'honey' 2 | -------------------------------------------------------------------------------- /lib/parse/pdfminer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-sei/nabu/3afcab20a5ddd8a9b984d8f34756ebedfc0b45a9/lib/parse/pdfminer/__init__.py -------------------------------------------------------------------------------- /lib/parse/pdfminer/arcfour.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ Python implementation of Arcfour encryption algorithm. 4 | 5 | This code is in the public domain. 6 | 7 | """ 8 | 9 | 10 | ## Arcfour 11 | ## 12 | class Arcfour(object): 13 | 14 | """ 15 | >>> Arcfour(b'Key').process(b'Plaintext').encode('hex') 16 | 'bbf316e8d940af0ad3' 17 | >>> Arcfour(b'Wiki').process(b'pedia').encode('hex') 18 | '1021bf0420' 19 | >>> Arcfour(b'Secret').process(b'Attack at dawn').encode('hex') 20 | '45a01f645fc35b383552544b9bf5' 21 | """ 22 | 23 | def __init__(self, key): 24 | s = range(256) 25 | j = 0 26 | klen = len(key) 27 | for i in xrange(256): 28 | j = (j + s[i] + ord(key[i % klen])) % 256 29 | (s[i], s[j]) = (s[j], s[i]) 30 | self.s = s 31 | (self.i, self.j) = (0, 0) 32 | return 33 | 34 | def process(self, data): 35 | (i, j) = (self.i, self.j) 36 | s = self.s 37 | r = b'' 38 | for c in data: 39 | i = (i+1) % 256 40 | j = (j+s[i]) % 256 41 | (s[i], s[j]) = (s[j], s[i]) 42 | k = s[(s[i]+s[j]) % 256] 43 | r += chr(ord(c) ^ k) 44 | (self.i, self.j) = (i, j) 45 | return r 46 | 47 | encrypt = decrypt = process 48 | 49 | new = Arcfour 50 | 51 | # test 52 | if __name__ == '__main__': 53 | import doctest 54 | doctest.testmod() 55 | -------------------------------------------------------------------------------- /lib/parse/pdfminer/ascii85.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ Python implementation of ASCII85/ASCIIHex decoder (Adobe version). 4 | 5 | This code is in the public domain. 6 | 7 | """ 8 | 9 | import re 10 | import struct 11 | 12 | 13 | # ascii85decode(data) 14 | def ascii85decode(data): 15 | """ 16 | In ASCII85 encoding, every four bytes are encoded with five ASCII 17 | letters, using 85 different types of characters (as 256**4 < 85**5). 18 | When the length of the original bytes is not a multiple of 4, a special 19 | rule is used for round up. 20 | 21 | The Adobe's ASCII85 implementation is slightly different from 22 | its original in handling the last characters. 23 | 24 | The sample string is taken from: 25 | http://en.wikipedia.org/w/index.php?title=Ascii85 26 | 27 | >>> ascii85decode(b'9jqo^BlbD-BleB1DJ+*+F(f,q') 28 | 'Man is distinguished' 29 | >>> ascii85decode(b'E,9)oF*2M7/c~>') 30 | 'pleasure.' 31 | """ 32 | n = b = 0 33 | out = b'' 34 | for c in data: 35 | if b'!' <= c and c <= b'u': 36 | n += 1 37 | b = b*85+(ord(c)-33) 38 | if n == 5: 39 | out += struct.pack('>L', b) 40 | n = b = 0 41 | elif c == b'z': 42 | assert n == 0 43 | out += b'\0\0\0\0' 44 | elif c == b'~': 45 | if n: 46 | for _ in range(5-n): 47 | b = b*85+84 48 | out += struct.pack('>L', b)[:n-1] 49 | break 50 | return out 51 | 52 | # asciihexdecode(data) 53 | hex_re = re.compile(r'([a-f\d]{2})', re.IGNORECASE) 54 | trail_re = re.compile(r'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE) 55 | 56 | 57 | def asciihexdecode(data): 58 | """ 59 | ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1 60 | For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the 61 | ASCIIHexDecode filter produces one byte of binary data. All white-space 62 | characters are ignored. A right angle bracket character (>) indicates 63 | EOD. Any other characters will cause an error. If the filter encounters 64 | the EOD marker after reading an odd number of hexadecimal digits, it 65 | will behave as if a 0 followed the last digit. 66 | 67 | >>> asciihexdecode(b'61 62 2e6364 65') 68 | 'ab.cde' 69 | >>> asciihexdecode(b'61 62 2e6364 657>') 70 | 'ab.cdep' 71 | >>> asciihexdecode(b'7>') 72 | 'p' 73 | """ 74 | decode = (lambda hx: chr(int(hx, 16))) 75 | out = map(decode, hex_re.findall(data)) 76 | m = trail_re.search(data) 77 | if m: 78 | out.append(decode('%c0' % m.group(1))) 79 | return b''.join(out) 80 | 81 | 82 | if __name__ == '__main__': 83 | import doctest 84 | doctest.testmod() 85 | -------------------------------------------------------------------------------- /lib/parse/pdfminer/lzw.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | try: 4 | from cStringIO import StringIO 5 | except ImportError: 6 | from StringIO import StringIO 7 | 8 | 9 | class CorruptDataError(Exception): 10 | pass 11 | 12 | 13 | ## LZWDecoder 14 | ## 15 | class LZWDecoder(object): 16 | 17 | debug = 0 18 | 19 | def __init__(self, fp): 20 | self.fp = fp 21 | self.buff = 0 22 | self.bpos = 8 23 | self.nbits = 9 24 | self.table = None 25 | self.prevbuf = None 26 | return 27 | 28 | def readbits(self, bits): 29 | v = 0 30 | while 1: 31 | # the number of remaining bits we can get from the current buffer. 32 | r = 8-self.bpos 33 | if bits <= r: 34 | # |-----8-bits-----| 35 | # |-bpos-|-bits-| | 36 | # | |----r----| 37 | v = (v << bits) | ((self.buff >> (r-bits)) & ((1 << bits)-1)) 38 | self.bpos += bits 39 | break 40 | else: 41 | # |-----8-bits-----| 42 | # |-bpos-|---bits----... 43 | # | |----r----| 44 | v = (v << r) | (self.buff & ((1 << r)-1)) 45 | bits -= r 46 | x = self.fp.read(1) 47 | if not x: 48 | raise EOFError 49 | self.buff = ord(x) 50 | self.bpos = 0 51 | return v 52 | 53 | def feed(self, code): 54 | x = '' 55 | if code == 256: 56 | self.table = [chr(c) for c in xrange(256)] # 0-255 57 | self.table.append(None) # 256 58 | self.table.append(None) # 257 59 | self.prevbuf = '' 60 | self.nbits = 9 61 | elif code == 257: 62 | pass 63 | elif not self.prevbuf: 64 | x = self.prevbuf = self.table[code] 65 | else: 66 | if code < len(self.table): 67 | x = self.table[code] 68 | self.table.append(self.prevbuf+x[:1]) 69 | elif code == len(self.table): 70 | self.table.append(self.prevbuf+self.prevbuf[:1]) 71 | x = self.table[code] 72 | else: 73 | raise CorruptDataError 74 | l = len(self.table) 75 | if l == 511: 76 | self.nbits = 10 77 | elif l == 1023: 78 | self.nbits = 11 79 | elif l == 2047: 80 | self.nbits = 12 81 | self.prevbuf = x 82 | return x 83 | 84 | def run(self): 85 | while 1: 86 | try: 87 | code = self.readbits(self.nbits) 88 | except EOFError: 89 | break 90 | try: 91 | x = self.feed(code) 92 | except CorruptDataError: 93 | # just ignore corrupt data and stop yielding there 94 | break 95 | yield x 96 | if self.debug: 97 | print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' % 98 | (self.nbits, code, x, self.table[258:])) 99 | return 100 | 101 | 102 | # lzwdecode 103 | def lzwdecode(data): 104 | """ 105 | >>> lzwdecode('\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01') 106 | '\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42' 107 | """ 108 | fp = StringIO(data) 109 | return ''.join(LZWDecoder(fp).run()) 110 | 111 | if __name__ == '__main__': 112 | import doctest 113 | doctest.testmod() 114 | -------------------------------------------------------------------------------- /lib/parse/pdfminer/pdfparser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | try: 4 | from cStringIO import StringIO 5 | except ImportError: 6 | from StringIO import StringIO 7 | from psparser import PSStackParser 8 | from psparser import PSSyntaxError, PSEOF 9 | from psparser import KWD, STRICT 10 | from pdftypes import PDFException 11 | from pdftypes import PDFStream, PDFObjRef 12 | from pdftypes import int_value 13 | from pdftypes import dict_value 14 | 15 | 16 | ## Exceptions 17 | ## 18 | class PDFSyntaxError(PDFException): 19 | pass 20 | 21 | 22 | ## PDFParser 23 | ## 24 | class PDFParser(PSStackParser): 25 | 26 | """ 27 | PDFParser fetch PDF objects from a file stream. 28 | It can handle indirect references by referring to 29 | a PDF document set by set_document method. 30 | It also reads XRefs at the end of every PDF file. 31 | 32 | Typical usage: 33 | parser = PDFParser(fp) 34 | parser.read_xref() 35 | parser.read_xref(fallback=True) # optional 36 | parser.set_document(doc) 37 | parser.seek(offset) 38 | parser.nextobject() 39 | 40 | """ 41 | 42 | def __init__(self, fp, dbg=False): 43 | PSStackParser.__init__(self, fp, dbg) 44 | self.doc = None 45 | self.fallback = False 46 | return 47 | 48 | def set_document(self, doc): 49 | """Associates the parser with a PDFDocument object.""" 50 | self.doc = doc 51 | return 52 | 53 | KEYWORD_R = KWD('R') 54 | KEYWORD_NULL = KWD('null') 55 | KEYWORD_ENDOBJ = KWD('endobj') 56 | KEYWORD_STREAM = KWD('stream') 57 | KEYWORD_XREF = KWD('xref') 58 | KEYWORD_STARTXREF = KWD('startxref') 59 | 60 | def do_keyword(self, pos, token): 61 | """Handles PDF-related keywords.""" 62 | 63 | if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): 64 | self.add_results(*self.pop(1)) 65 | 66 | elif token is self.KEYWORD_ENDOBJ: 67 | self.add_results(*self.pop(4)) 68 | 69 | elif token is self.KEYWORD_NULL: 70 | # null object 71 | self.push((pos, None)) 72 | 73 | elif token is self.KEYWORD_R: 74 | # reference to indirect object 75 | try: 76 | ((_, objid), (_, genno)) = self.pop(2) 77 | (objid, genno) = (int(objid), int(genno)) 78 | obj = PDFObjRef(self.doc, objid, genno) 79 | self.push((pos, obj)) 80 | except PSSyntaxError: 81 | pass 82 | 83 | elif token is self.KEYWORD_STREAM: 84 | # stream object 85 | try: 86 | ((_, dic),) = self.pop(1) 87 | except ValueError: 88 | dic = [] 89 | 90 | dic = dict_value(dic) 91 | objlen = 0 92 | if not self.fallback: 93 | try: 94 | objlen = int_value(dic['Length']) 95 | except KeyError: 96 | if STRICT: 97 | raise PDFSyntaxError('/Length is undefined: %r' % dic) 98 | self.seek(pos) 99 | try: 100 | (_, line) = self.nextline() # 'stream' 101 | except PSEOF: 102 | if STRICT: 103 | raise PDFSyntaxError('Unexpected EOF') 104 | return 105 | pos += len(line) 106 | self.fp.seek(pos) 107 | data = self.fp.read(objlen) 108 | self.seek(pos+objlen) 109 | while 1: 110 | try: 111 | (linepos, line) = self.nextline() 112 | except PSEOF: 113 | if STRICT: 114 | raise PDFSyntaxError('Unexpected EOF') 115 | break 116 | if 'endstream' in line: 117 | i = line.index('endstream') 118 | objlen += i 119 | data += line[:i] 120 | break 121 | objlen += len(line) 122 | data += line 123 | self.seek(pos+objlen) 124 | # XXX limit objlen not to exceed object boundary 125 | if 2 <= self.debug: 126 | print >>sys.stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \ 127 | (pos, objlen, dic, data[:10]) 128 | obj = PDFStream(dic, data, self.doc.decipher) 129 | self.push((pos, obj)) 130 | 131 | else: 132 | # others 133 | self.push((pos, token)) 134 | 135 | return 136 | 137 | 138 | ## PDFStreamParser 139 | ## 140 | class PDFStreamParser(PDFParser): 141 | 142 | """ 143 | PDFStreamParser is used to parse PDF content streams 144 | that is contained in each page and has instructions 145 | for rendering the page. A reference to a PDF document is 146 | needed because a PDF content stream can also have 147 | indirect references to other objects in the same document. 148 | """ 149 | 150 | def __init__(self, data): 151 | PDFParser.__init__(self, StringIO(data)) 152 | return 153 | 154 | def flush(self): 155 | self.add_results(*self.popall()) 156 | return 157 | 158 | def do_keyword(self, pos, token): 159 | if token is self.KEYWORD_R: 160 | # reference to indirect object 161 | try: 162 | ((_, objid), (_, genno)) = self.pop(2) 163 | (objid, genno) = (int(objid), int(genno)) 164 | obj = PDFObjRef(self.doc, objid, genno) 165 | self.push((pos, obj)) 166 | except PSSyntaxError: 167 | pass 168 | return 169 | # others 170 | self.push((pos, token)) 171 | return 172 | -------------------------------------------------------------------------------- /lib/parse/pdfminer/pdftypes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import zlib 3 | from lzw import lzwdecode 4 | from ascii85 import ascii85decode, asciihexdecode 5 | from runlength import rldecode 6 | from ccitt import ccittfaxdecode 7 | from psparser import PSException, PSObject 8 | from psparser import LIT, STRICT 9 | from utils import apply_png_predictor, isnumber 10 | 11 | LITERAL_CRYPT = LIT('Crypt') 12 | 13 | # Abbreviation of Filter names in PDF 4.8.6. "Inline Images" 14 | LITERALS_FLATE_DECODE = (LIT('FlateDecode'), LIT('Fl')) 15 | LITERALS_LZW_DECODE = (LIT('LZWDecode'), LIT('LZW')) 16 | LITERALS_ASCII85_DECODE = (LIT('ASCII85Decode'), LIT('A85')) 17 | LITERALS_ASCIIHEX_DECODE = (LIT('ASCIIHexDecode'), LIT('AHx')) 18 | LITERALS_RUNLENGTH_DECODE = (LIT('RunLengthDecode'), LIT('RL')) 19 | LITERALS_CCITTFAX_DECODE = (LIT('CCITTFaxDecode'), LIT('CCF')) 20 | LITERALS_DCT_DECODE = (LIT('DCTDecode'), LIT('DCT')) 21 | 22 | 23 | ## PDF Objects 24 | ## 25 | class PDFObject(PSObject): 26 | pass 27 | 28 | class PDFException(PSException): 29 | pass 30 | 31 | class PDFTypeError(PDFException): 32 | pass 33 | 34 | class PDFValueError(PDFException): 35 | pass 36 | 37 | class PDFObjectNotFound(PDFException): 38 | pass 39 | 40 | class PDFNotImplementedError(PDFException): 41 | pass 42 | 43 | 44 | ## PDFObjRef 45 | ## 46 | class PDFObjRef(PDFObject): 47 | 48 | def __init__(self, doc, objid, _): 49 | if objid == 0: 50 | if STRICT: 51 | raise PDFValueError('PDF object id cannot be 0.') 52 | self.doc = doc 53 | self.objid = objid 54 | #self.genno = genno # Never used. 55 | return 56 | 57 | def __repr__(self): 58 | return '' % (self.objid) 59 | 60 | def resolve(self, default=None): 61 | try: 62 | return self.doc.getobj(self.objid) 63 | except PDFObjectNotFound: 64 | return default 65 | 66 | 67 | # resolve 68 | def resolve1(x, default=None): 69 | """Resolves an object. 70 | 71 | If this is an array or dictionary, it may still contains 72 | some indirect objects inside. 73 | """ 74 | while isinstance(x, PDFObjRef): 75 | x = x.resolve(default=default) 76 | return x 77 | 78 | 79 | def resolve_all(x, default=None): 80 | """Recursively resolves the given object and all the internals. 81 | 82 | Make sure there is no indirect reference within the nested object. 83 | This procedure might be slow. 84 | """ 85 | while isinstance(x, PDFObjRef): 86 | x = x.resolve(default=default) 87 | if isinstance(x, list): 88 | x = [resolve_all(v, default=default) for v in x] 89 | elif isinstance(x, dict): 90 | for (k, v) in x.iteritems(): 91 | x[k] = resolve_all(v, default=default) 92 | return x 93 | 94 | 95 | def decipher_all(decipher, objid, genno, x): 96 | """Recursively deciphers the given object. 97 | """ 98 | if isinstance(x, str): 99 | return decipher(objid, genno, x) 100 | if isinstance(x, list): 101 | x = [decipher_all(decipher, objid, genno, v) for v in x] 102 | elif isinstance(x, dict): 103 | for (k, v) in x.iteritems(): 104 | x[k] = decipher_all(decipher, objid, genno, v) 105 | return x 106 | 107 | 108 | # Type cheking 109 | def int_value(x): 110 | x = resolve1(x) 111 | if not isinstance(x, int): 112 | if STRICT: 113 | raise PDFTypeError('Integer required: %r' % x) 114 | return 0 115 | return x 116 | 117 | 118 | def float_value(x): 119 | x = resolve1(x) 120 | if not isinstance(x, float): 121 | if STRICT: 122 | raise PDFTypeError('Float required: %r' % x) 123 | return 0.0 124 | return x 125 | 126 | 127 | def num_value(x): 128 | x = resolve1(x) 129 | if not isnumber(x): 130 | if STRICT: 131 | raise PDFTypeError('Int or Float required: %r' % x) 132 | return 0 133 | return x 134 | 135 | 136 | def str_value(x): 137 | x = resolve1(x) 138 | if not isinstance(x, str): 139 | if STRICT: 140 | raise PDFTypeError('String required: %r' % x) 141 | return '' 142 | return x 143 | 144 | 145 | def list_value(x): 146 | x = resolve1(x) 147 | if not isinstance(x, (list, tuple)): 148 | if STRICT: 149 | raise PDFTypeError('List required: %r' % x) 150 | return [] 151 | return x 152 | 153 | 154 | def dict_value(x): 155 | x = resolve1(x) 156 | if not isinstance(x, dict): 157 | if STRICT: 158 | raise PDFTypeError('Dict required: %r' % x) 159 | return {} 160 | return x 161 | 162 | 163 | def stream_value(x): 164 | x = resolve1(x) 165 | if not isinstance(x, PDFStream): 166 | if STRICT: 167 | raise PDFTypeError('PDFStream required: %r' % x) 168 | return PDFStream({}, '') 169 | return x 170 | 171 | 172 | ## PDFStream type 173 | ## 174 | class PDFStream(PDFObject): 175 | 176 | def __init__(self, attrs, rawdata, decipher=None): 177 | assert isinstance(attrs, dict) 178 | self.attrs = attrs 179 | self.rawdata = rawdata 180 | self.decipher = decipher 181 | self.data = None 182 | self.objid = None 183 | self.genno = None 184 | return 185 | 186 | def set_objid(self, objid, genno): 187 | self.objid = objid 188 | self.genno = genno 189 | return 190 | 191 | def __repr__(self): 192 | if self.data is None: 193 | assert self.rawdata is not None 194 | return '' % (self.objid, len(self.rawdata), self.attrs) 195 | else: 196 | assert self.data is not None 197 | return '' % (self.objid, len(self.data), self.attrs) 198 | 199 | def __contains__(self, name): 200 | return name in self.attrs 201 | 202 | def __getitem__(self, name): 203 | return self.attrs[name] 204 | 205 | def get(self, name, default=None): 206 | return self.attrs.get(name, default) 207 | 208 | def get_any(self, names, default=None): 209 | for name in names: 210 | if name in self.attrs: 211 | return self.attrs[name] 212 | return default 213 | 214 | def get_filters(self): 215 | filters = self.get_any(('F', 'Filter')) 216 | if not filters: 217 | return [] 218 | if isinstance(filters, list): 219 | return filters 220 | return [filters] 221 | 222 | def decode(self): 223 | assert self.data is None and self.rawdata is not None 224 | data = self.rawdata 225 | if self.decipher: 226 | # Handle encryption 227 | data = self.decipher(self.objid, self.genno, data) 228 | filters = self.get_filters() 229 | if not filters: 230 | self.data = data 231 | self.rawdata = None 232 | return 233 | for f in filters: 234 | if isinstance(f,PDFObjRef): 235 | filters +=f.resolve() 236 | continue 237 | params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {}) 238 | if f in LITERALS_FLATE_DECODE: 239 | # will get errors if the document is encrypted. 240 | try: 241 | data = zlib.decompress(data) 242 | except zlib.error, e: 243 | if STRICT: 244 | raise PDFException('Invalid zlib bytes: %r, %r' % (e, data)) 245 | data = '' 246 | elif f in LITERALS_LZW_DECODE: 247 | data = lzwdecode(data) 248 | elif f in LITERALS_ASCII85_DECODE: 249 | data = ascii85decode(data) 250 | elif f in LITERALS_ASCIIHEX_DECODE: 251 | data = asciihexdecode(data) 252 | elif f in LITERALS_RUNLENGTH_DECODE: 253 | data = rldecode(data) 254 | elif f in LITERALS_CCITTFAX_DECODE: 255 | data = ccittfaxdecode(data, params) 256 | elif f == LITERAL_CRYPT: 257 | # not yet.. 258 | raise PDFNotImplementedError('/Crypt filter is unsupported') 259 | else: 260 | raise PDFNotImplementedError('Unsupported filter: %r' % f) 261 | # apply predictors 262 | if 'Predictor' in params: 263 | pred = int_value(params['Predictor']) 264 | if pred == 1: 265 | # no predictor 266 | pass 267 | elif 10 <= pred: 268 | # PNG predictor 269 | colors = int_value(params.get('Colors', 1)) 270 | columns = int_value(params.get('Columns', 1)) 271 | bitspercomponent = int_value(params.get('BitsPerComponent', 8)) 272 | data = apply_png_predictor(pred, colors, columns, bitspercomponent, data) 273 | else: 274 | raise PDFNotImplementedError('Unsupported predictor: %r' % pred) 275 | self.data = data 276 | self.rawdata = None 277 | return 278 | 279 | def get_data(self): 280 | if self.data is None: 281 | self.decode() 282 | return self.data 283 | 284 | def get_rawdata(self): 285 | return self.rawdata 286 | -------------------------------------------------------------------------------- /lib/parse/pdfminer/runlength.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # RunLength decoder (Adobe version) implementation based on PDF Reference 4 | # version 1.4 section 3.3.4. 5 | # 6 | # * public domain * 7 | # 8 | 9 | def rldecode(data): 10 | """ 11 | RunLength decoder (Adobe version) implementation based on PDF Reference 12 | version 1.4 section 3.3.4: 13 | The RunLengthDecode filter decodes data that has been encoded in a 14 | simple byte-oriented format based on run length. The encoded data 15 | is a sequence of runs, where each run consists of a length byte 16 | followed by 1 to 128 bytes of data. If the length byte is in the 17 | range 0 to 127, the following length + 1 (1 to 128) bytes are 18 | copied literally during decompression. If length is in the range 19 | 129 to 255, the following single byte is to be copied 257 - length 20 | (2 to 128) times during decompression. A length value of 128 21 | denotes EOD. 22 | >>> s = b'\x05123456\xfa7\x04abcde\x80junk' 23 | >>> rldecode(s) 24 | '1234567777777abcde' 25 | """ 26 | decoded = [] 27 | i = 0 28 | while i < len(data): 29 | #print 'data[%d]=:%d:' % (i,ord(data[i])) 30 | length = ord(data[i]) 31 | if length == 128: 32 | break 33 | if length >= 0 and length < 128: 34 | run = data[i+1:(i+1)+(length+1)] 35 | #print 'length=%d, run=%s' % (length+1,run) 36 | decoded.append(run) 37 | i = (i+1) + (length+1) 38 | if length > 128: 39 | run = data[i+1]*(257-length) 40 | #print 'length=%d, run=%s' % (257-length,run) 41 | decoded.append(run) 42 | i = (i+1) + 1 43 | return b''.join(decoded) 44 | 45 | 46 | if __name__ == '__main__': 47 | import doctest 48 | doctest.testmod() 49 | -------------------------------------------------------------------------------- /lib/parse/pdfminer/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Miscellaneous Routines. 4 | """ 5 | import struct 6 | from sys import maxint as INF 7 | 8 | 9 | ## PNG Predictor 10 | ## 11 | def apply_png_predictor(pred, colors, columns, bitspercomponent, data): 12 | if bitspercomponent != 8: 13 | # unsupported 14 | raise ValueError(bitspercomponent) 15 | nbytes = colors*columns*bitspercomponent//8 16 | i = 0 17 | buf = '' 18 | line0 = '\x00' * columns 19 | for i in xrange(0, len(data), nbytes+1): 20 | ft = data[i] 21 | i += 1 22 | line1 = data[i:i+nbytes] 23 | line2 = '' 24 | if ft == '\x00': 25 | # PNG none 26 | line2 += line1 27 | elif ft == '\x01': 28 | # PNG sub (UNTESTED) 29 | c = 0 30 | for b in line1: 31 | c = (c+ord(b)) & 255 32 | line2 += chr(c) 33 | elif ft == '\x02': 34 | # PNG up 35 | for (a, b) in zip(line0, line1): 36 | c = (ord(a)+ord(b)) & 255 37 | line2 += chr(c) 38 | elif ft == '\x03': 39 | # PNG average (UNTESTED) 40 | c = 0 41 | for (a, b) in zip(line0, line1): 42 | c = ((c+ord(a)+ord(b))//2) & 255 43 | line2 += chr(c) 44 | else: 45 | # unsupported 46 | raise ValueError(ft) 47 | buf += line2 48 | line0 = line2 49 | return buf 50 | 51 | 52 | ## Matrix operations 53 | ## 54 | MATRIX_IDENTITY = (1, 0, 0, 1, 0, 0) 55 | 56 | 57 | def mult_matrix((a1, b1, c1, d1, e1, f1), (a0, b0, c0, d0, e0, f0)): 58 | """Returns the multiplication of two matrices.""" 59 | return (a0*a1+c0*b1, b0*a1+d0*b1, 60 | a0*c1+c0*d1, b0*c1+d0*d1, 61 | a0*e1+c0*f1+e0, b0*e1+d0*f1+f0) 62 | 63 | 64 | def translate_matrix((a, b, c, d, e, f), (x, y)): 65 | """Translates a matrix by (x, y).""" 66 | return (a, b, c, d, x*a+y*c+e, x*b+y*d+f) 67 | 68 | 69 | def apply_matrix_pt((a, b, c, d, e, f), (x, y)): 70 | """Applies a matrix to a point.""" 71 | return (a*x+c*y+e, b*x+d*y+f) 72 | 73 | 74 | def apply_matrix_norm((a, b, c, d, e, f), (p, q)): 75 | """Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))""" 76 | return (a*p+c*q, b*p+d*q) 77 | 78 | 79 | ## Utility functions 80 | ## 81 | 82 | # isnumber 83 | def isnumber(x): 84 | return isinstance(x, (int, long, float)) 85 | 86 | # uniq 87 | def uniq(objs): 88 | """Eliminates duplicated elements.""" 89 | done = set() 90 | for obj in objs: 91 | if obj in done: 92 | continue 93 | done.add(obj) 94 | yield obj 95 | return 96 | 97 | 98 | # csort 99 | def csort(objs, key=lambda x: x): 100 | """Order-preserving sorting function.""" 101 | idxs = dict((obj, i) for (i, obj) in enumerate(objs)) 102 | return sorted(objs, key=lambda obj: (key(obj), idxs[obj])) 103 | 104 | 105 | # fsplit 106 | def fsplit(pred, objs): 107 | """Split a list into two classes according to the predicate.""" 108 | t = [] 109 | f = [] 110 | for obj in objs: 111 | if pred(obj): 112 | t.append(obj) 113 | else: 114 | f.append(obj) 115 | return (t, f) 116 | 117 | 118 | # drange 119 | def drange(v0, v1, d): 120 | """Returns a discrete range.""" 121 | assert v0 < v1 122 | return xrange(int(v0)//d, int(v1+d)//d) 123 | 124 | 125 | # get_bound 126 | def get_bound(pts): 127 | """Compute a minimal rectangle that covers all the points.""" 128 | (x0, y0, x1, y1) = (INF, INF, -INF, -INF) 129 | for (x, y) in pts: 130 | x0 = min(x0, x) 131 | y0 = min(y0, y) 132 | x1 = max(x1, x) 133 | y1 = max(y1, y) 134 | return (x0, y0, x1, y1) 135 | 136 | 137 | # pick 138 | def pick(seq, func, maxobj=None): 139 | """Picks the object obj where func(obj) has the highest value.""" 140 | maxscore = None 141 | for obj in seq: 142 | score = func(obj) 143 | if maxscore is None or maxscore < score: 144 | (maxscore, maxobj) = (score, obj) 145 | return maxobj 146 | 147 | 148 | # choplist 149 | def choplist(n, seq): 150 | """Groups every n elements of the list.""" 151 | r = [] 152 | for x in seq: 153 | r.append(x) 154 | if len(r) == n: 155 | yield tuple(r) 156 | r = [] 157 | return 158 | 159 | 160 | # nunpack 161 | def nunpack(s, default=0): 162 | """Unpacks 1 to 4 byte integers (big endian).""" 163 | l = len(s) 164 | if not l: 165 | return default 166 | elif l == 1: 167 | return ord(s) 168 | elif l == 2: 169 | return struct.unpack('>H', s)[0] 170 | elif l == 3: 171 | return struct.unpack('>L', '\x00'+s)[0] 172 | elif l == 4: 173 | return struct.unpack('>L', s)[0] 174 | else: 175 | raise TypeError('invalid length: %d' % l) 176 | 177 | 178 | # decode_text 179 | PDFDocEncoding = ''.join(unichr(x) for x in ( 180 | 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 181 | 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 182 | 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0017, 0x0017, 183 | 0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc, 184 | 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 185 | 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 186 | 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 187 | 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 188 | 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 189 | 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 190 | 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 191 | 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 192 | 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 193 | 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 194 | 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 195 | 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x0000, 196 | 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044, 197 | 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018, 198 | 0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160, 199 | 0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 0x0000, 200 | 0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 201 | 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af, 202 | 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 203 | 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 204 | 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 205 | 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 206 | 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 207 | 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 208 | 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 209 | 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 210 | 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 211 | 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 212 | )) 213 | 214 | 215 | def decode_text(s): 216 | """Decodes a PDFDocEncoding string to Unicode.""" 217 | if s.startswith('\xfe\xff'): 218 | return unicode(s[2:], 'utf-16be', 'ignore') 219 | else: 220 | return ''.join(PDFDocEncoding[ord(c)] for c in s) 221 | 222 | 223 | # enc 224 | def enc(x, codec='ascii'): 225 | """Encodes a string for SGML/XML/HTML""" 226 | x = x.replace('&', '&').replace('>', '>').replace('<', '<').replace('"', '"') 227 | return x.encode(codec, 'xmlcharrefreplace') 228 | 229 | 230 | def bbox2str((x0, y0, x1, y1)): 231 | return '%.3f,%.3f,%.3f,%.3f' % (x0, y0, x1, y1) 232 | 233 | 234 | def matrix2str((a, b, c, d, e, f)): 235 | return '[%.2f,%.2f,%.2f,%.2f, (%.2f,%.2f)]' % (a, b, c, d, e, f) 236 | 237 | 238 | ## Plane 239 | ## 240 | ## A set-like data structure for objects placed on a plane. 241 | ## Can efficiently find objects in a certain rectangular area. 242 | ## It maintains two parallel lists of objects, each of 243 | ## which is sorted by its x or y coordinate. 244 | ## 245 | class Plane(object): 246 | 247 | def __init__(self, bbox, gridsize=50): 248 | self._objs = set() 249 | self._grid = {} 250 | self.gridsize = gridsize 251 | (self.x0, self.y0, self.x1, self.y1) = bbox 252 | return 253 | 254 | def __repr__(self): 255 | return ('' % list(self)) 256 | 257 | def __iter__(self): 258 | return iter(self._objs) 259 | 260 | def __len__(self): 261 | return len(self._objs) 262 | 263 | def __contains__(self, obj): 264 | return obj in self._objs 265 | 266 | def _getrange(self, (x0, y0, x1, y1)): 267 | if (x1 <= self.x0 or self.x1 <= x0 or 268 | y1 <= self.y0 or self.y1 <= y0): return 269 | x0 = max(self.x0, x0) 270 | y0 = max(self.y0, y0) 271 | x1 = min(self.x1, x1) 272 | y1 = min(self.y1, y1) 273 | for y in drange(y0, y1, self.gridsize): 274 | for x in drange(x0, x1, self.gridsize): 275 | yield (x, y) 276 | return 277 | 278 | # extend(objs) 279 | def extend(self, objs): 280 | for obj in objs: 281 | self.add(obj) 282 | return 283 | 284 | # add(obj): place an object. 285 | def add(self, obj): 286 | for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)): 287 | if k not in self._grid: 288 | r = [] 289 | self._grid[k] = r 290 | else: 291 | r = self._grid[k] 292 | r.append(obj) 293 | self._objs.add(obj) 294 | return 295 | 296 | # remove(obj): displace an object. 297 | def remove(self, obj): 298 | for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)): 299 | try: 300 | self._grid[k].remove(obj) 301 | except (KeyError, ValueError): 302 | pass 303 | self._objs.remove(obj) 304 | return 305 | 306 | # find(): finds objects that are in a certain area. 307 | def find(self, (x0, y0, x1, y1)): 308 | done = set() 309 | for k in self._getrange((x0, y0, x1, y1)): 310 | if k not in self._grid: 311 | continue 312 | for obj in self._grid[k]: 313 | if obj in done: 314 | continue 315 | done.add(obj) 316 | if (obj.x1 <= x0 or x1 <= obj.x0 or 317 | obj.y1 <= y0 or y1 <= obj.y0): 318 | continue 319 | yield obj 320 | return 321 | -------------------------------------------------------------------------------- /lib/parse/peepdf/AUTHORS: -------------------------------------------------------------------------------- 1 | Jose Miguel Esparza 2 | http://eternal-todo.com 3 | http://twitter.com/EternalTodo -------------------------------------------------------------------------------- /lib/parse/peepdf/CHANGELOG: -------------------------------------------------------------------------------- 1 | ----------------------------------------------- 2 | peepdf Black Hat Vegas (0.2 r156), 2012-07-25 3 | ----------------------------------------------- 4 | 5 | * New features: 6 | 7 | - Added "grinch mode" execution to avoid colorized output 8 | - Added more colors in the interactive console output: warning, errors, important information... 9 | - Changed sctest command, now it's implemented with pylibemu 10 | - Added decrypt command to parse password protected documents 11 | - Modified analyseJS() to extract JS code from XDP packets and unescape HTML entities 12 | - Added function unescapeHTMLEntities() to unescape HTML entities 13 | - Added AES decryption support (128 and 256 bits). 14 | - Added hashes in objects information (info $object_id) 15 | - Added support for decoding CCITTFaxDecode filters (Thanks to @binjo) 16 | 17 | * Fixes: 18 | 19 | - Fix to show decrypt errors 20 | - Fixed silly bug with /EncryptMetadata element 21 | - Added missing binary file operations 22 | - Fixed Issue 5: Resolved false positives when monitoring some elements like actions, events, etc. (Thanks to @hiddenillusion) 23 | - Bug in PDFStream.decode and PDFStream.encode, dealing with an array of filter parameters (Thanks to @binjo) 24 | 25 | 26 | ----------------------------------------------- 27 | peepdf Black Hat Arsenal (0.1 r92), 2012-03-16 28 | ----------------------------------------------- 29 | 30 | * New features: 31 | 32 | - Added support for more parameters in Flate/LZW decode (stream filters) 33 | - Encryption algorithm now showing in document information 34 | - Added XML output and SHA hash to file information 35 | - Improved unescape function to support mixed escaped formats (eg. "%u6734%34%u8790") 36 | - Added xor and xor_search commands 37 | - Added easy way of redirect console output (>, >>, $>, $>>) 38 | - Added xor function by Evan Fosmark 39 | - Added detection of CVE-2011-4369 (/PRC) 40 | - Added hash command (Thanks to @binjo for code and comments) 41 | - Added js_beautify command 42 | - Update function added 43 | - Added new vulns and showing information related to non JS vulns 44 | - Added escape sequence in the limited output 45 | - Added ascii85 decode from pdfminer to improve code and avoid bugs (Thanks to Brandon Dixon!) 46 | - Added lzwdecode from pdfminer to improve code and avoid bugs 47 | 48 | * Fixes: 49 | 50 | - Update process rewritten, now based on hashing of files 51 | - Silly bug in computeUserPass function (Thanks to Christian Martorella!) 52 | - Added binary mode in files operations 53 | - Recursion bug in update function 54 | - Minor bug in do_embed function 55 | - Bug to support encoding following PDF specifications (Issue 3 by czchen) 56 | - Bug to handle negative numbers in P element 57 | - Bug in the xref table when creating a new PDF (Issue 2) 58 | - Silly bug when parsing filter parameters 59 | - Bug related to updating objects and statistics of PDF files 60 | - Some bugs related to offsets calculation 61 | - Fixed "replace" function in PDFObjectStream 62 | - Fix in asciiHexDecode filter function 63 | 64 | 65 | ----------------------------------------------- 66 | peepdf 0.1 r15, 2011-05-05 67 | ----------------------------------------------- 68 | 69 | - Initial Release 70 | 71 | -------------------------------------------------------------------------------- /lib/parse/peepdf/JSAnalysis.py: -------------------------------------------------------------------------------- 1 | # 2 | # peepdf is a tool to analyse and modify PDF files 3 | # http://peepdf.eternal-todo.com 4 | # By Jose Miguel Esparza 5 | # 6 | # Copyright (C) 2011-2014 Jose Miguel Esparza 7 | # 8 | # This file is part of peepdf. 9 | # 10 | # peepdf is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # peepdf is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with peepdf. If not, see . 22 | # 23 | 24 | ''' 25 | This module contains some functions to analyse Javascript code inside the PDF file 26 | ''' 27 | 28 | import sys, re , os, jsbeautifier, traceback 29 | from PDFUtils import unescapeHTMLEntities, escapeString 30 | try: 31 | import PyV8 32 | 33 | JS_MODULE = True 34 | 35 | class Global(PyV8.JSClass): 36 | evalCode = '' 37 | 38 | def evalOverride(self, expression): 39 | self.evalCode += '\n\n// New evaluated code\n' + expression 40 | return 41 | 42 | except: 43 | JS_MODULE = False 44 | 45 | 46 | errorsFile = 'errors.txt' 47 | newLine = os.linesep 48 | reJSscript = ']*?contentType\s*?=\s*?[\'"]application/x-javascript[\'"][^>]*?>(.*?)' 49 | preDefinedCode = 'var app = this;' 50 | 51 | def analyseJS(code, context = None, manualAnalysis = False): 52 | ''' 53 | Hooks the eval function and search for obfuscated elements in the Javascript code 54 | 55 | @param code: The Javascript code (string) 56 | @return: List with analysis information of the Javascript code: [JSCode,unescapedBytes,urlsFound,errors,context], where 57 | JSCode is a list with the several stages Javascript code, 58 | unescapedBytes is a list with the parameters of unescape functions, 59 | urlsFound is a list with the URLs found in the unescaped bytes, 60 | errors is a list of errors, 61 | context is the context of execution of the Javascript code. 62 | ''' 63 | errors = [] 64 | JSCode = [] 65 | unescapedBytes = [] 66 | urlsFound = [] 67 | 68 | try: 69 | code = unescapeHTMLEntities(code) 70 | scriptElements = re.findall(reJSscript, code, re.DOTALL | re.IGNORECASE) 71 | if scriptElements != []: 72 | code = '' 73 | for scriptElement in scriptElements: 74 | code += scriptElement + '\n\n' 75 | code = jsbeautifier.beautify(code) 76 | JSCode.append(code) 77 | 78 | if code != None and JS_MODULE and not manualAnalysis: 79 | if context == None: 80 | context = PyV8.JSContext(Global()) 81 | context.enter() 82 | # Hooking the eval function 83 | context.eval('eval=evalOverride') 84 | #context.eval(preDefinedCode) 85 | while True: 86 | originalCode = code 87 | try: 88 | context.eval(code) 89 | evalCode = context.eval('evalCode') 90 | evalCode = jsbeautifier.beautify(evalCode) 91 | if evalCode != '' and evalCode != code: 92 | code = evalCode 93 | JSCode.append(code) 94 | else: 95 | break 96 | except: 97 | error = str(sys.exc_info()[1]) 98 | open('jserror.log','ab').write(error + newLine) 99 | errors.append(error) 100 | break 101 | 102 | if False: 103 | escapedVars = re.findall('(\w*?)\s*?=\s*?(unescape\((.*?)\))', code, re.DOTALL) 104 | for var in escapedVars: 105 | bytes = var[2] 106 | if bytes.find('+') != -1 or bytes.find('%') == -1: 107 | varContent = getVarContent(code, bytes) 108 | if len(varContent) > 150: 109 | ret = unescape(varContent) 110 | if ret[0] != -1: 111 | bytes = ret[1] 112 | urls = re.findall('https?://.*$', bytes, re.DOTALL) 113 | if bytes not in unescapedBytes: 114 | unescapedBytes.append(bytes) 115 | for url in urls: 116 | if url not in urlsFound: 117 | urlsFound.append(url) 118 | else: 119 | bytes = bytes[1:-1] 120 | if len(bytes) > 150: 121 | ret = unescape(bytes) 122 | if ret[0] != -1: 123 | bytes = ret[1] 124 | urls = re.findall('https?://.*$', bytes, re.DOTALL) 125 | if bytes not in unescapedBytes: 126 | unescapedBytes.append(bytes) 127 | for url in urls: 128 | if url not in urlsFound: 129 | urlsFound.append(url) 130 | except: 131 | traceback.print_exc(file=open(errorsFile,'a')) 132 | errors.append('Unexpected error in the JSAnalysis module!!') 133 | finally: 134 | for js in JSCode: 135 | if js == None or js == '': 136 | JSCode.remove(js) 137 | return [JSCode,unescapedBytes,urlsFound,errors,context] 138 | 139 | def getVarContent(jsCode, varContent): 140 | ''' 141 | Given the Javascript code and the content of a variable this method tries to obtain the real value of the variable, cleaning expressions like "a = eval; a(js_code);" 142 | 143 | @param jsCode: The Javascript code (string) 144 | @param varContent: The content of the variable (string) 145 | @return: A string with real value of the variable 146 | ''' 147 | clearBytes = '' 148 | varContent = varContent.replace('\n','') 149 | varContent = varContent.replace('\r','') 150 | varContent = varContent.replace('\t','') 151 | varContent = varContent.replace(' ','') 152 | parts = varContent.split('+') 153 | for part in parts: 154 | if re.match('["\'].*?["\']', part, re.DOTALL): 155 | clearBytes += part[1:-1] 156 | else: 157 | part = escapeString(part) 158 | varContent = re.findall(part + '\s*?=\s*?(.*?)[,;]', jsCode, re.DOTALL) 159 | if varContent != []: 160 | clearBytes += getVarContent(jsCode, varContent[0]) 161 | return clearBytes 162 | 163 | def isJavascript(content): 164 | ''' 165 | Given an string this method looks for typical Javscript strings and try to identify if the string contains Javascrit code or not. 166 | 167 | @param content: A string 168 | @return: A boolean, True if it seems to contain Javascript code or False in the other case 169 | ''' 170 | JSStrings = ['var ',';',')','(','function ','=','{','}','if ','else','return','while ','for ',',','eval'] 171 | keyStrings = [';','(',')'] 172 | stringsFound = [] 173 | limit = 15 174 | minDistinctStringsFound = 5 175 | results = 0 176 | 177 | if re.findall(reJSscript, content, re.DOTALL | re.IGNORECASE) != []: 178 | return True 179 | 180 | for char in content: 181 | if (ord(char) < 32 and char not in ['\n','\r','\t','\f','\x00']) or ord(char) >= 127: 182 | return False 183 | 184 | for string in JSStrings: 185 | cont = content.count(string) 186 | results += cont 187 | if cont > 0 and string not in stringsFound: 188 | stringsFound.append(string) 189 | elif cont == 0 and string in keyStrings: 190 | return False 191 | 192 | if results > limit and len(stringsFound) >= minDistinctStringsFound: 193 | return True 194 | else: 195 | return False 196 | 197 | def searchObfuscatedFunctions(jsCode, function): 198 | ''' 199 | Search for obfuscated functions in the Javascript code 200 | 201 | @param jsCode: The Javascript code (string) 202 | @param function: The function name to look for (string) 203 | @return: List with obfuscated functions information [functionName,functionCall,containsReturns] 204 | ''' 205 | obfuscatedFunctionsInfo = [] 206 | if jsCode != None: 207 | match = re.findall('\W('+function+'\s{0,5}?\((.*?)\)\s{0,5}?;)', jsCode, re.DOTALL) 208 | if match != []: 209 | for m in match: 210 | if re.findall('return',m[1],re.IGNORECASE) != []: 211 | obfuscatedFunctionsInfo.append([function,m,True]) 212 | else: 213 | obfuscatedFunctionsInfo.append([function,m,False]) 214 | obfuscatedFunctions = re.findall('\s*?((\w*?)\s*?=\s*?'+function+')\s*?;', jsCode, re.DOTALL) 215 | for obfuscatedFunction in obfuscatedFunctions: 216 | obfuscatedElement = obfuscatedFunction[1] 217 | obfuscatedFunctionsInfo += searchObfuscatedFunctions(jsCode, obfuscatedElement) 218 | return obfuscatedFunctionsInfo 219 | 220 | def unescape(escapedBytes, unicode = True): 221 | ''' 222 | This method unescapes the given string 223 | 224 | @param escapedBytes: A string to unescape 225 | @return: A tuple (status,statusContent), where statusContent is an unescaped string in case status = 0 or an error in case status = -1 226 | ''' 227 | #TODO: modify to accept a list of escaped strings? 228 | unescapedBytes = '' 229 | if unicode: 230 | unicodePadding = '\x00' 231 | else: 232 | unicodePadding = '' 233 | try: 234 | if escapedBytes.lower().find('%u') != -1 or escapedBytes.lower().find('\u') != -1 or escapedBytes.find('%') != -1: 235 | if escapedBytes.lower().find('\u') != -1: 236 | splitBytes = escapedBytes.split('\\') 237 | else: 238 | splitBytes = escapedBytes.split('%') 239 | for i in range(len(splitBytes)): 240 | splitByte = splitBytes[i] 241 | if splitByte == '': 242 | continue 243 | if len(splitByte) > 4 and re.match('u[0-9a-f]{4}',splitByte[:5],re.IGNORECASE): 244 | unescapedBytes += chr(int(splitByte[3]+splitByte[4],16))+chr(int(splitByte[1]+splitByte[2],16)) 245 | if len(splitByte) > 5: 246 | for j in range(5,len(splitByte)): 247 | unescapedBytes += splitByte[j] + unicodePadding 248 | elif len(splitByte) > 1 and re.match('[0-9a-f]{2}',splitByte[:2],re.IGNORECASE): 249 | unescapedBytes += chr(int(splitByte[0]+splitByte[1],16)) + unicodePadding 250 | if len(splitByte) > 2: 251 | for j in range(2,len(splitByte)): 252 | unescapedBytes += splitByte[j] + unicodePadding 253 | else: 254 | if i != 0: 255 | unescapedBytes += '%' + unicodePadding 256 | for j in range(len(splitByte)): 257 | unescapedBytes += splitByte[j] + unicodePadding 258 | else: 259 | unescapedBytes = escapedBytes 260 | except: 261 | return (-1,'Error while unescaping the bytes') 262 | return (0,unescapedBytes) 263 | -------------------------------------------------------------------------------- /lib/parse/peepdf/README: -------------------------------------------------------------------------------- 1 | ** Home page ** 2 | 3 | http://peepdf.eternal-todo.com 4 | http://twitter.com/peepdf 5 | 6 | 7 | ** Dependencies ** 8 | 9 | - In order to analyse Javascript code "PyV8" is needed: 10 | 11 | http://code.google.com/p/pyv8/ 12 | 13 | 14 | - The "sctest" command is a wrapper of "sctest" (libemu). Besides libemu pylibemu is used and must be installed: 15 | 16 | http://libemu.carnivore.it (latest version from git repository, Sourceforge package is outdated) 17 | https://github.com/buffer/pylibemu 18 | 19 | 20 | - To support XML output "lxml" is needed: 21 | 22 | http://lxml.de/installation.html 23 | 24 | 25 | - Included modules: lzw, colorama, jsbeautifier, ccitt, pythonaes (Thanks to all the developers!!) 26 | 27 | 28 | 29 | ** Installation ** 30 | 31 | No installation is needed apart of the commented dependencies, just execute it! 32 | 33 | 34 | 35 | ** Execution ** 36 | 37 | There are two important options when peepdf is executed: 38 | 39 | -f: Ignores the parsing errors. Analysing malicious files propably leads to parsing errors, so this parameter should be set. 40 | -l: Sets the loose mode, so does not search for the endobj tag because it's not obligatory. Helpful with malformed files. 41 | 42 | 43 | * Simple execution 44 | 45 | Shows the statistics of the file after being decoded/decrypted and analysed: 46 | 47 | python peepdf.py [options] pdf_file 48 | 49 | 50 | * Interactive console 51 | 52 | Executes the interactive console to let play with the PDF file: 53 | 54 | python peepdf.py -i [options] pdf_file 55 | 56 | If no PDF file is specified it's possible to use the decode/encode/js*/sctest commands and create a new PDF file: 57 | 58 | python peepdf.py -i 59 | 60 | 61 | * Batch execution 62 | 63 | It's possible to use a commands file to specify the commands to be executed in the batch mode. This type of execution is good to automatise analysis of several files: 64 | 65 | python peepdf.py [options] -s commands_file pdf_file 66 | 67 | 68 | 69 | ** Updating ** 70 | 71 | Just type this and you will be updated to the latest version from the repository: 72 | 73 | python peepdf.py -u 74 | 75 | 76 | 77 | ** Some hints ** 78 | 79 | If the information shown when a PDF file is parsed is not enough to know if it's harmful or not, the following commands can help to do it: 80 | 81 | * tree 82 | 83 | Shows the tree graph of the file or specified version. Here we can see suspicious elements. 84 | 85 | 86 | * offsets 87 | 88 | Shows the physical map of the file or the specified version of the document. This is helpful to see unusual big objects or big spaces between objects. 89 | 90 | 91 | * search 92 | 93 | Search the specified string or hexadecimal string in the objects (decoded and encrypted streams included). 94 | 95 | 96 | * object/rawobject 97 | 98 | Shows the (raw) content of the object. 99 | 100 | 101 | * stream/rawstream 102 | 103 | Shows the (raw) content of the stream. 104 | 105 | 106 | * The rest of commands, of course 107 | 108 | > help 109 | 110 | 111 | 112 | ** Bugs ** 113 | 114 | Send me bugs and comments, please!! ;) You can do it via mail (jesparza AT eternal-todo.com) or through Google Code (http://peepdf.googlecode.com). 115 | 116 | Thanks!! 117 | -------------------------------------------------------------------------------- /lib/parse/peepdf/TODO: -------------------------------------------------------------------------------- 1 | Pending tasks: 2 | 3 | - User manual 4 | - Documentation of methods in PDFCore.py 5 | - Add the rest of supported stream filters (better testing of existent) 6 | - Automatic analysis of embedded PDF files 7 | - Add AES to the encryption implementation 8 | - Improve the automatic Javascript analysis, getting code from other parts of the documents (getAnnots, etc) 9 | - GUI 10 | - ActionScript analysis? -------------------------------------------------------------------------------- /lib/parse/peepdf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-sei/nabu/3afcab20a5ddd8a9b984d8f34756ebedfc0b45a9/lib/parse/peepdf/__init__.py -------------------------------------------------------------------------------- /lib/parse/peepdf/aes.py: -------------------------------------------------------------------------------- 1 | # 2 | # peepdf is a tool to analyse and modify PDF files 3 | # http://peepdf.eternal-todo.com 4 | # By Jose Miguel Esparza 5 | # 6 | # Copyright (C) 2012-2014 Jose Miguel Esparza 7 | # 8 | # This file is part of peepdf. 9 | # 10 | # peepdf is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # peepdf is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with peepdf. If not, see . 22 | # 23 | 24 | """ 25 | Created from the demonstration of the pythonaes package. 26 | 27 | Copyright (c) 2010, Adam Newman http://www.caller9.com/ 28 | Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php 29 | """ 30 | 31 | import sys 32 | from aespython import key_expander, aes_cipher, cbc_mode 33 | 34 | def decryptData(data, password = None, keyLength = None, mode = 'CBC'): 35 | ''' 36 | Method added for peepdf 37 | ''' 38 | decryptedData = '' 39 | if keyLength == None: 40 | keyLength = len(password)*8 41 | if keyLength not in [128, 192, 256]: 42 | return (-1, 'Bad length key in AES decryption process') 43 | 44 | iv = map(ord, data[:16]) 45 | key = map(ord, password) 46 | data = data[16:] 47 | if len(data) % 16 != 0: 48 | data = data[:-(len(data)%16)] 49 | keyExpander = key_expander.KeyExpander(keyLength) 50 | expandedKey = keyExpander.expand(key) 51 | aesCipher = aes_cipher.AESCipher(expandedKey) 52 | if mode == 'CBC': 53 | aesMode = cbc_mode.CBCMode(aesCipher, 16) 54 | aesMode.set_iv(iv) 55 | for i in range(0,len(data),16): 56 | ciphertext = map(ord,data[i:i+16]) 57 | decryptedBytes = aesMode.decrypt_block(ciphertext) 58 | for byte in decryptedBytes: 59 | decryptedData += chr(byte) 60 | return (0, decryptedData) -------------------------------------------------------------------------------- /lib/parse/peepdf/aespython/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-sei/nabu/3afcab20a5ddd8a9b984d8f34756ebedfc0b45a9/lib/parse/peepdf/aespython/__init__.py -------------------------------------------------------------------------------- /lib/parse/peepdf/aespython/aes_cipher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | AES Block Cipher. 4 | 5 | Performs single block cipher decipher operations on a 16 element list of integers. 6 | These integers represent 8 bit bytes in a 128 bit block. 7 | The result of cipher or decipher operations is the transformed 16 element list of integers. 8 | 9 | Running this file as __main__ will result in a self-test of the algorithm. 10 | 11 | Algorithm per NIST FIPS-197 http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf 12 | 13 | Copyright (c) 2010, Adam Newman http://www.caller9.com/ 14 | Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php 15 | """ 16 | __author__ = "Adam Newman" 17 | 18 | #Normally use relative import. In test mode use local import. 19 | try:from .aes_tables import sbox,i_sbox,galI,galNI 20 | except ValueError:from aes_tables import sbox,i_sbox,galI,galNI 21 | ups=",".join("s%x"%x for x in range(16)) 22 | upr=ups.replace("s","r") 23 | mix=",".join(",".join(("g{0}[s%x]^g{1}[s%x]^g{2}[s%x]^g{3}[s%x]^r%x"%(i+(i[0]+(0,3,2,1)[j],))).format(j&3,j+1&3,j+2&3,j+3&3) for j in (0,3,2,1)) for i in ((0,1,2,3),(4,5,6,7),(8,9,10,11),(12,13,14,15))).replace("g2","g").replace("g3","g") 24 | i=mix.find("g[") 25 | while i!=-1: 26 | mix=mix[:i]+mix[i+2:i+4]+mix[i+5:] 27 | i=mix.find("g[",i) 28 | imix=",".join(",".join(("g{0}[s%x]^g{1}[s%x]^g{2}[s%x]^g{3}[s%x]"%i).format(j&3,j+1&3,j+2&3,j+3&3) for j in (0,3,2,1)) for i in ((0,1,2,3),(4,5,6,7),(8,9,10,11),(12,13,14,15))) 29 | csl=["s%x"%(x*5&15) for x in range(16)] 30 | csr=["s%x"%(x*-3&15) for x in range(16)] 31 | box=",".join("s[%s]"%i for i in csl) 32 | ibox=",".join("s[%s]^r%x"%i for i in zip(csr,range(16))) 33 | xor=",".join("s[%s]^r%x"%i for i in zip(csl,range(16))) 34 | xori=";".join("s%x^=r%x"%(i,i) for i in range(16)) 35 | ciph="""def decipher_block(f,s): 36 | g0,g1,g2,g3=galNI;ek=f._expanded_key;S=s+[0]*(16-len(s));s=sbox;R=ek[:16];X 37 | for f in range(!16):R=ek[f:f+16];S=B;S=M 38 | R=ek[f+16:] 39 | return """.replace("S",ups).replace("R",upr).replace("X",xori) 40 | class AESCipher: 41 | def __init__(self,expanded_key): 42 | self._expanded_key=expanded_key 43 | self._Nr=len(expanded_key)-16 44 | exec(ciph.replace("g2,g3","").replace("dec","c").replace("!","16,f._Nr,").replace("B",box).replace("M",mix)+xor) 45 | exec(ciph.replace("NI","I").replace(":16","f._Nr:").replace("f+16:",":16").replace("!","f._Nr-16,0,-").replace("sbox","i_sbox").replace("B",ibox).replace("M",imix)+ibox) 46 | import unittest 47 | class TestCipher(unittest.TestCase): 48 | def test_cipher(self): 49 | """Test AES cipher with all key lengths""" 50 | import test_keys 51 | import key_expander 52 | test_data = test_keys.TestKeys() 53 | for key_size in 128, 192, 256: 54 | test_key_expander = key_expander.KeyExpander(key_size) 55 | test_expanded_key = test_key_expander.expand(test_data.test_key[key_size]) 56 | test_cipher = AESCipher(test_expanded_key) 57 | test_result_ciphertext = test_cipher.cipher_block(test_data.test_block_plaintext) 58 | self.assertEquals(len([i for i, j in zip(test_result_ciphertext, test_data.test_block_ciphertext_validated[key_size]) if i == j]), 59 | 16,msg='Test %d bit cipher'%key_size) 60 | test_result_plaintext = test_cipher.decipher_block(test_data.test_block_ciphertext_validated[key_size]) 61 | self.assertEquals(len([i for i, j in zip(test_result_plaintext, test_data.test_block_plaintext) if i == j]), 62 | 16,msg='Test %d bit decipher'%key_size) 63 | if __name__ == "__main__": 64 | unittest.main() -------------------------------------------------------------------------------- /lib/parse/peepdf/aespython/aes_tables.py: -------------------------------------------------------------------------------- 1 | """ 2 | Instantiate AES tables for rcon,sbox,i_sbox,and galois_lookup. 3 | 4 | Copyright (c) 2010,Adam Newman http://www.caller9.com/ 5 | Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php 6 | """ 7 | __author__ = "Adam Newman" 8 | rcon=( 9 | 0x8d,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a, 10 | 0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91,0x39, 11 | 0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,0x25,0x4a,0x94,0x33,0x66,0xcc,0x83,0x1d,0x3a, 12 | 0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36,0x6c,0xd8, 13 | 0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef, 14 | 0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,0x25,0x4a,0x94,0x33,0x66,0xcc, 15 | 0x83,0x1d,0x3a,0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b, 16 | 0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,0xb3, 17 | 0x7d,0xfa,0xef,0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,0x25,0x4a,0x94, 18 | 0x33,0x66,0xcc,0x83,0x1d,0x3a,0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,0x08,0x10,0x20, 19 | 0x40,0x80,0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35, 20 | 0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f, 21 | 0x25,0x4a,0x94,0x33,0x66,0xcc,0x83,0x1d,0x3a,0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04, 22 | 0x08,0x10,0x20,0x40,0x80,0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63, 23 | 0xc6,0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd, 24 | 0x61,0xc2,0x9f,0x25,0x4a,0x94,0x33,0x66,0xcc,0x83,0x1d,0x3a,0x74,0xe8,0xcb) 25 | sbox=( 26 | 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76, 27 | 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0, 28 | 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15, 29 | 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75, 30 | 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84, 31 | 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf, 32 | 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8, 33 | 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2, 34 | 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73, 35 | 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb, 36 | 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79, 37 | 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08, 38 | 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a, 39 | 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e, 40 | 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf, 41 | 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16) 42 | i_sbox=( 43 | 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, 44 | 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, 45 | 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, 46 | 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, 47 | 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, 48 | 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, 49 | 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, 50 | 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, 51 | 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, 52 | 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, 53 | 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, 54 | 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, 55 | 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, 56 | 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, 57 | 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, 58 | 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d) 59 | galNI=(( 60 | 0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e, 61 | 0x20,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e, 62 | 0x40,0x42,0x44,0x46,0x48,0x4a,0x4c,0x4e,0x50,0x52,0x54,0x56,0x58,0x5a,0x5c,0x5e, 63 | 0x60,0x62,0x64,0x66,0x68,0x6a,0x6c,0x6e,0x70,0x72,0x74,0x76,0x78,0x7a,0x7c,0x7e, 64 | 0x80,0x82,0x84,0x86,0x88,0x8a,0x8c,0x8e,0x90,0x92,0x94,0x96,0x98,0x9a,0x9c,0x9e, 65 | 0xa0,0xa2,0xa4,0xa6,0xa8,0xaa,0xac,0xae,0xb0,0xb2,0xb4,0xb6,0xb8,0xba,0xbc,0xbe, 66 | 0xc0,0xc2,0xc4,0xc6,0xc8,0xca,0xcc,0xce,0xd0,0xd2,0xd4,0xd6,0xd8,0xda,0xdc,0xde, 67 | 0xe0,0xe2,0xe4,0xe6,0xe8,0xea,0xec,0xee,0xf0,0xf2,0xf4,0xf6,0xf8,0xfa,0xfc,0xfe, 68 | 0x1b,0x19,0x1f,0x1d,0x13,0x11,0x17,0x15,0x0b,0x09,0x0f,0x0d,0x03,0x01,0x07,0x05, 69 | 0x3b,0x39,0x3f,0x3d,0x33,0x31,0x37,0x35,0x2b,0x29,0x2f,0x2d,0x23,0x21,0x27,0x25, 70 | 0x5b,0x59,0x5f,0x5d,0x53,0x51,0x57,0x55,0x4b,0x49,0x4f,0x4d,0x43,0x41,0x47,0x45, 71 | 0x7b,0x79,0x7f,0x7d,0x73,0x71,0x77,0x75,0x6b,0x69,0x6f,0x6d,0x63,0x61,0x67,0x65, 72 | 0x9b,0x99,0x9f,0x9d,0x93,0x91,0x97,0x95,0x8b,0x89,0x8f,0x8d,0x83,0x81,0x87,0x85, 73 | 0xbb,0xb9,0xbf,0xbd,0xb3,0xb1,0xb7,0xb5,0xab,0xa9,0xaf,0xad,0xa3,0xa1,0xa7,0xa5, 74 | 0xdb,0xd9,0xdf,0xdd,0xd3,0xd1,0xd7,0xd5,0xcb,0xc9,0xcf,0xcd,0xc3,0xc1,0xc7,0xc5, 75 | 0xfb,0xf9,0xff,0xfd,0xf3,0xf1,0xf7,0xf5,0xeb,0xe9,0xef,0xed,0xe3,0xe1,0xe7,0xe5), 76 | (0x00,0x03,0x06,0x05,0x0c,0x0f,0x0a,0x09,0x18,0x1b,0x1e,0x1d,0x14,0x17,0x12,0x11, 77 | 0x30,0x33,0x36,0x35,0x3c,0x3f,0x3a,0x39,0x28,0x2b,0x2e,0x2d,0x24,0x27,0x22,0x21, 78 | 0x60,0x63,0x66,0x65,0x6c,0x6f,0x6a,0x69,0x78,0x7b,0x7e,0x7d,0x74,0x77,0x72,0x71, 79 | 0x50,0x53,0x56,0x55,0x5c,0x5f,0x5a,0x59,0x48,0x4b,0x4e,0x4d,0x44,0x47,0x42,0x41, 80 | 0xc0,0xc3,0xc6,0xc5,0xcc,0xcf,0xca,0xc9,0xd8,0xdb,0xde,0xdd,0xd4,0xd7,0xd2,0xd1, 81 | 0xf0,0xf3,0xf6,0xf5,0xfc,0xff,0xfa,0xf9,0xe8,0xeb,0xee,0xed,0xe4,0xe7,0xe2,0xe1, 82 | 0xa0,0xa3,0xa6,0xa5,0xac,0xaf,0xaa,0xa9,0xb8,0xbb,0xbe,0xbd,0xb4,0xb7,0xb2,0xb1, 83 | 0x90,0x93,0x96,0x95,0x9c,0x9f,0x9a,0x99,0x88,0x8b,0x8e,0x8d,0x84,0x87,0x82,0x81, 84 | 0x9b,0x98,0x9d,0x9e,0x97,0x94,0x91,0x92,0x83,0x80,0x85,0x86,0x8f,0x8c,0x89,0x8a, 85 | 0xab,0xa8,0xad,0xae,0xa7,0xa4,0xa1,0xa2,0xb3,0xb0,0xb5,0xb6,0xbf,0xbc,0xb9,0xba, 86 | 0xfb,0xf8,0xfd,0xfe,0xf7,0xf4,0xf1,0xf2,0xe3,0xe0,0xe5,0xe6,0xef,0xec,0xe9,0xea, 87 | 0xcb,0xc8,0xcd,0xce,0xc7,0xc4,0xc1,0xc2,0xd3,0xd0,0xd5,0xd6,0xdf,0xdc,0xd9,0xda, 88 | 0x5b,0x58,0x5d,0x5e,0x57,0x54,0x51,0x52,0x43,0x40,0x45,0x46,0x4f,0x4c,0x49,0x4a, 89 | 0x6b,0x68,0x6d,0x6e,0x67,0x64,0x61,0x62,0x73,0x70,0x75,0x76,0x7f,0x7c,0x79,0x7a, 90 | 0x3b,0x38,0x3d,0x3e,0x37,0x34,0x31,0x32,0x23,0x20,0x25,0x26,0x2f,0x2c,0x29,0x2a, 91 | 0x0b,0x08,0x0d,0x0e,0x07,0x04,0x01,0x02,0x13,0x10,0x15,0x16,0x1f,0x1c,0x19,0x1a)) 92 | galI=( 93 | (0x00,0x0e,0x1c,0x12,0x38,0x36,0x24,0x2a,0x70,0x7e,0x6c,0x62,0x48,0x46,0x54,0x5a, 94 | 0xe0,0xee,0xfc,0xf2,0xd8,0xd6,0xc4,0xca,0x90,0x9e,0x8c,0x82,0xa8,0xa6,0xb4,0xba, 95 | 0xdb,0xd5,0xc7,0xc9,0xe3,0xed,0xff,0xf1,0xab,0xa5,0xb7,0xb9,0x93,0x9d,0x8f,0x81, 96 | 0x3b,0x35,0x27,0x29,0x03,0x0d,0x1f,0x11,0x4b,0x45,0x57,0x59,0x73,0x7d,0x6f,0x61, 97 | 0xad,0xa3,0xb1,0xbf,0x95,0x9b,0x89,0x87,0xdd,0xd3,0xc1,0xcf,0xe5,0xeb,0xf9,0xf7, 98 | 0x4d,0x43,0x51,0x5f,0x75,0x7b,0x69,0x67,0x3d,0x33,0x21,0x2f,0x05,0x0b,0x19,0x17, 99 | 0x76,0x78,0x6a,0x64,0x4e,0x40,0x52,0x5c,0x06,0x08,0x1a,0x14,0x3e,0x30,0x22,0x2c, 100 | 0x96,0x98,0x8a,0x84,0xae,0xa0,0xb2,0xbc,0xe6,0xe8,0xfa,0xf4,0xde,0xd0,0xc2,0xcc, 101 | 0x41,0x4f,0x5d,0x53,0x79,0x77,0x65,0x6b,0x31,0x3f,0x2d,0x23,0x09,0x07,0x15,0x1b, 102 | 0xa1,0xaf,0xbd,0xb3,0x99,0x97,0x85,0x8b,0xd1,0xdf,0xcd,0xc3,0xe9,0xe7,0xf5,0xfb, 103 | 0x9a,0x94,0x86,0x88,0xa2,0xac,0xbe,0xb0,0xea,0xe4,0xf6,0xf8,0xd2,0xdc,0xce,0xc0, 104 | 0x7a,0x74,0x66,0x68,0x42,0x4c,0x5e,0x50,0x0a,0x04,0x16,0x18,0x32,0x3c,0x2e,0x20, 105 | 0xec,0xe2,0xf0,0xfe,0xd4,0xda,0xc8,0xc6,0x9c,0x92,0x80,0x8e,0xa4,0xaa,0xb8,0xb6, 106 | 0x0c,0x02,0x10,0x1e,0x34,0x3a,0x28,0x26,0x7c,0x72,0x60,0x6e,0x44,0x4a,0x58,0x56, 107 | 0x37,0x39,0x2b,0x25,0x0f,0x01,0x13,0x1d,0x47,0x49,0x5b,0x55,0x7f,0x71,0x63,0x6d, 108 | 0xd7,0xd9,0xcb,0xc5,0xef,0xe1,0xf3,0xfd,0xa7,0xa9,0xbb,0xb5,0x9f,0x91,0x83,0x8d), 109 | (0x00,0x0b,0x16,0x1d,0x2c,0x27,0x3a,0x31,0x58,0x53,0x4e,0x45,0x74,0x7f,0x62,0x69, 110 | 0xb0,0xbb,0xa6,0xad,0x9c,0x97,0x8a,0x81,0xe8,0xe3,0xfe,0xf5,0xc4,0xcf,0xd2,0xd9, 111 | 0x7b,0x70,0x6d,0x66,0x57,0x5c,0x41,0x4a,0x23,0x28,0x35,0x3e,0x0f,0x04,0x19,0x12, 112 | 0xcb,0xc0,0xdd,0xd6,0xe7,0xec,0xf1,0xfa,0x93,0x98,0x85,0x8e,0xbf,0xb4,0xa9,0xa2, 113 | 0xf6,0xfd,0xe0,0xeb,0xda,0xd1,0xcc,0xc7,0xae,0xa5,0xb8,0xb3,0x82,0x89,0x94,0x9f, 114 | 0x46,0x4d,0x50,0x5b,0x6a,0x61,0x7c,0x77,0x1e,0x15,0x08,0x03,0x32,0x39,0x24,0x2f, 115 | 0x8d,0x86,0x9b,0x90,0xa1,0xaa,0xb7,0xbc,0xd5,0xde,0xc3,0xc8,0xf9,0xf2,0xef,0xe4, 116 | 0x3d,0x36,0x2b,0x20,0x11,0x1a,0x07,0x0c,0x65,0x6e,0x73,0x78,0x49,0x42,0x5f,0x54, 117 | 0xf7,0xfc,0xe1,0xea,0xdb,0xd0,0xcd,0xc6,0xaf,0xa4,0xb9,0xb2,0x83,0x88,0x95,0x9e, 118 | 0x47,0x4c,0x51,0x5a,0x6b,0x60,0x7d,0x76,0x1f,0x14,0x09,0x02,0x33,0x38,0x25,0x2e, 119 | 0x8c,0x87,0x9a,0x91,0xa0,0xab,0xb6,0xbd,0xd4,0xdf,0xc2,0xc9,0xf8,0xf3,0xee,0xe5, 120 | 0x3c,0x37,0x2a,0x21,0x10,0x1b,0x06,0x0d,0x64,0x6f,0x72,0x79,0x48,0x43,0x5e,0x55, 121 | 0x01,0x0a,0x17,0x1c,0x2d,0x26,0x3b,0x30,0x59,0x52,0x4f,0x44,0x75,0x7e,0x63,0x68, 122 | 0xb1,0xba,0xa7,0xac,0x9d,0x96,0x8b,0x80,0xe9,0xe2,0xff,0xf4,0xc5,0xce,0xd3,0xd8, 123 | 0x7a,0x71,0x6c,0x67,0x56,0x5d,0x40,0x4b,0x22,0x29,0x34,0x3f,0x0e,0x05,0x18,0x13, 124 | 0xca,0xc1,0xdc,0xd7,0xe6,0xed,0xf0,0xfb,0x92,0x99,0x84,0x8f,0xbe,0xb5,0xa8,0xa3), 125 | (0x00,0x0d,0x1a,0x17,0x34,0x39,0x2e,0x23,0x68,0x65,0x72,0x7f,0x5c,0x51,0x46,0x4b, 126 | 0xd0,0xdd,0xca,0xc7,0xe4,0xe9,0xfe,0xf3,0xb8,0xb5,0xa2,0xaf,0x8c,0x81,0x96,0x9b, 127 | 0xbb,0xb6,0xa1,0xac,0x8f,0x82,0x95,0x98,0xd3,0xde,0xc9,0xc4,0xe7,0xea,0xfd,0xf0, 128 | 0x6b,0x66,0x71,0x7c,0x5f,0x52,0x45,0x48,0x03,0x0e,0x19,0x14,0x37,0x3a,0x2d,0x20, 129 | 0x6d,0x60,0x77,0x7a,0x59,0x54,0x43,0x4e,0x05,0x08,0x1f,0x12,0x31,0x3c,0x2b,0x26, 130 | 0xbd,0xb0,0xa7,0xaa,0x89,0x84,0x93,0x9e,0xd5,0xd8,0xcf,0xc2,0xe1,0xec,0xfb,0xf6, 131 | 0xd6,0xdb,0xcc,0xc1,0xe2,0xef,0xf8,0xf5,0xbe,0xb3,0xa4,0xa9,0x8a,0x87,0x90,0x9d, 132 | 0x06,0x0b,0x1c,0x11,0x32,0x3f,0x28,0x25,0x6e,0x63,0x74,0x79,0x5a,0x57,0x40,0x4d, 133 | 0xda,0xd7,0xc0,0xcd,0xee,0xe3,0xf4,0xf9,0xb2,0xbf,0xa8,0xa5,0x86,0x8b,0x9c,0x91, 134 | 0x0a,0x07,0x10,0x1d,0x3e,0x33,0x24,0x29,0x62,0x6f,0x78,0x75,0x56,0x5b,0x4c,0x41, 135 | 0x61,0x6c,0x7b,0x76,0x55,0x58,0x4f,0x42,0x09,0x04,0x13,0x1e,0x3d,0x30,0x27,0x2a, 136 | 0xb1,0xbc,0xab,0xa6,0x85,0x88,0x9f,0x92,0xd9,0xd4,0xc3,0xce,0xed,0xe0,0xf7,0xfa, 137 | 0xb7,0xba,0xad,0xa0,0x83,0x8e,0x99,0x94,0xdf,0xd2,0xc5,0xc8,0xeb,0xe6,0xf1,0xfc, 138 | 0x67,0x6a,0x7d,0x70,0x53,0x5e,0x49,0x44,0x0f,0x02,0x15,0x18,0x3b,0x36,0x21,0x2c, 139 | 0x0c,0x01,0x16,0x1b,0x38,0x35,0x22,0x2f,0x64,0x69,0x7e,0x73,0x50,0x5d,0x4a,0x47, 140 | 0xdc,0xd1,0xc6,0xcb,0xe8,0xe5,0xf2,0xff,0xb4,0xb9,0xae,0xa3,0x80,0x8d,0x9a,0x97), 141 | (0x00,0x09,0x12,0x1b,0x24,0x2d,0x36,0x3f,0x48,0x41,0x5a,0x53,0x6c,0x65,0x7e,0x77, 142 | 0x90,0x99,0x82,0x8b,0xb4,0xbd,0xa6,0xaf,0xd8,0xd1,0xca,0xc3,0xfc,0xf5,0xee,0xe7, 143 | 0x3b,0x32,0x29,0x20,0x1f,0x16,0x0d,0x04,0x73,0x7a,0x61,0x68,0x57,0x5e,0x45,0x4c, 144 | 0xab,0xa2,0xb9,0xb0,0x8f,0x86,0x9d,0x94,0xe3,0xea,0xf1,0xf8,0xc7,0xce,0xd5,0xdc, 145 | 0x76,0x7f,0x64,0x6d,0x52,0x5b,0x40,0x49,0x3e,0x37,0x2c,0x25,0x1a,0x13,0x08,0x01, 146 | 0xe6,0xef,0xf4,0xfd,0xc2,0xcb,0xd0,0xd9,0xae,0xa7,0xbc,0xb5,0x8a,0x83,0x98,0x91, 147 | 0x4d,0x44,0x5f,0x56,0x69,0x60,0x7b,0x72,0x05,0x0c,0x17,0x1e,0x21,0x28,0x33,0x3a, 148 | 0xdd,0xd4,0xcf,0xc6,0xf9,0xf0,0xeb,0xe2,0x95,0x9c,0x87,0x8e,0xb1,0xb8,0xa3,0xaa, 149 | 0xec,0xe5,0xfe,0xf7,0xc8,0xc1,0xda,0xd3,0xa4,0xad,0xb6,0xbf,0x80,0x89,0x92,0x9b, 150 | 0x7c,0x75,0x6e,0x67,0x58,0x51,0x4a,0x43,0x34,0x3d,0x26,0x2f,0x10,0x19,0x02,0x0b, 151 | 0xd7,0xde,0xc5,0xcc,0xf3,0xfa,0xe1,0xe8,0x9f,0x96,0x8d,0x84,0xbb,0xb2,0xa9,0xa0, 152 | 0x47,0x4e,0x55,0x5c,0x63,0x6a,0x71,0x78,0x0f,0x06,0x1d,0x14,0x2b,0x22,0x39,0x30, 153 | 0x9a,0x93,0x88,0x81,0xbe,0xb7,0xac,0xa5,0xd2,0xdb,0xc0,0xc9,0xf6,0xff,0xe4,0xed, 154 | 0x0a,0x03,0x18,0x11,0x2e,0x27,0x3c,0x35,0x42,0x4b,0x50,0x59,0x66,0x6f,0x74,0x7d, 155 | 0xa1,0xa8,0xb3,0xba,0x85,0x8c,0x97,0x9e,0xe9,0xe0,0xfb,0xf2,0xcd,0xc4,0xdf,0xd6, 156 | 0x31,0x38,0x23,0x2a,0x15,0x1c,0x07,0x0e,0x79,0x70,0x6b,0x62,0x5d,0x54,0x4f,0x46)) -------------------------------------------------------------------------------- /lib/parse/peepdf/aespython/cbc_mode.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | CBC Mode of operation 4 | 5 | Running this file as __main__ will result in a self-test of the algorithm. 6 | 7 | Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf 8 | 9 | Copyright (c) 2010, Adam Newman http://www.caller9.com/ 10 | Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php 11 | """ 12 | __author__ = "Adam Newman" 13 | 14 | class CBCMode: 15 | """Perform CBC operation on a block and retain IV information for next operation""" 16 | def __init__(self, block_cipher, block_size): 17 | self._block_cipher = block_cipher 18 | self._block_size = block_size 19 | self._iv = [0] * block_size 20 | 21 | def set_iv(self, iv): 22 | if len(iv) == self._block_size: 23 | self._iv = iv 24 | 25 | def encrypt_block(self, plaintext): 26 | iv=self._iv=self._block_cipher.cipher_block([i ^ j for i,j in zip (plaintext, self._iv)]) 27 | return iv 28 | 29 | def decrypt_block(self, ciphertext): 30 | plaintext = list(self._block_cipher.decipher_block(ciphertext)) 31 | for i,v in enumerate(self._iv):plaintext[i]^=v 32 | self._iv = ciphertext 33 | return plaintext 34 | 35 | import unittest 36 | class TestEncryptionMode(unittest.TestCase): 37 | def test_mode(self): 38 | #Self test 39 | import key_expander 40 | import aes_cipher 41 | import test_keys 42 | 43 | test_data = test_keys.TestKeys() 44 | 45 | test_expander = key_expander.KeyExpander(256) 46 | test_expanded_key = test_expander.expand(test_data.test_mode_key) 47 | 48 | test_cipher = aes_cipher.AESCipher(test_expanded_key) 49 | 50 | test_cbc = CBCMode(test_cipher, 16) 51 | 52 | test_cbc.set_iv(test_data.test_mode_iv) 53 | for k in range(4): 54 | self.assertEquals(len([i for i, j in zip(test_data.test_cbc_ciphertext[k],test_cbc.encrypt_block(test_data.test_mode_plaintext[k])) if i == j]), 55 | 16, 56 | msg='CBC encrypt test block %d'%k) 57 | 58 | test_cbc.set_iv(test_data.test_mode_iv) 59 | for k in range(4): 60 | self.assertEquals(len([i for i, j in zip(test_data.test_mode_plaintext[k],test_cbc.decrypt_block(test_data.test_cbc_ciphertext[k])) if i == j]), 61 | 16, 62 | msg='CBC decrypt test block %d'%k) 63 | 64 | if __name__ == "__main__": 65 | unittest.main() -------------------------------------------------------------------------------- /lib/parse/peepdf/aespython/cfb_mode.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | CFB Mode of operation 4 | 5 | Running this file as __main__ will result in a self-test of the algorithm. 6 | 7 | Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf 8 | 9 | Copyright (c) 2010, Adam Newman http://www.caller9.com/ 10 | Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php 11 | """ 12 | __author__ = "Adam Newman" 13 | 14 | class CFBMode: 15 | """Perform CFB operation on a block and retain IV information for next operation""" 16 | def __init__(self, block_cipher, block_size): 17 | self._block_cipher = block_cipher 18 | self._block_size = block_size 19 | self._iv = [0] * block_size 20 | 21 | def set_iv(self, iv): 22 | if len(iv) == self._block_size: 23 | self._iv = iv 24 | 25 | def encrypt_block(self, plaintext): 26 | cipher_iv = self._block_cipher.cipher_block(self._iv) 27 | iv = self._iv = [i ^ j for i,j in zip (plaintext, cipher_iv)] 28 | return iv 29 | 30 | def decrypt_block(self, ciphertext): 31 | cipher_iv = self._block_cipher.cipher_block(self._iv) 32 | self._iv = ciphertext 33 | return [i ^ j for i,j in zip (cipher_iv, ciphertext)] 34 | 35 | import unittest 36 | class TestEncryptionMode(unittest.TestCase): 37 | def test_mode(self): 38 | #Self test 39 | import key_expander 40 | import aes_cipher 41 | import test_keys 42 | 43 | test_data = test_keys.TestKeys() 44 | 45 | test_expander = key_expander.KeyExpander(256) 46 | test_expanded_key = test_expander.expand(test_data.test_mode_key) 47 | 48 | test_cipher = aes_cipher.AESCipher(test_expanded_key) 49 | 50 | test_cfb = CFBMode(test_cipher, 16) 51 | 52 | test_cfb.set_iv(test_data.test_mode_iv) 53 | for k in range(4): 54 | self.assertEquals(len([i for i, j in zip(test_data.test_cfb_ciphertext[k],test_cfb.encrypt_block(test_data.test_mode_plaintext[k])) if i == j]), 55 | 16, 56 | msg='CFB encrypt test block' + str(k)) 57 | 58 | test_cfb.set_iv(test_data.test_mode_iv) 59 | for k in range(4): 60 | self.assertEquals(len([i for i, j in zip(test_data.test_mode_plaintext[k],test_cfb.decrypt_block(test_data.test_cfb_ciphertext[k])) if i == j]), 61 | 16, 62 | msg='CFB decrypt test block' + str(k)) 63 | 64 | if __name__ == "__main__": 65 | unittest.main() 66 | -------------------------------------------------------------------------------- /lib/parse/peepdf/aespython/key_expander.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | AES Key Expansion. 5 | 6 | Expands 128, 192, or 256 bit key for use with AES 7 | 8 | Running this file as __main__ will result in a self-test of the algorithm. 9 | 10 | Algorithm per NIST FIPS-197 http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf 11 | 12 | Copyright (c) 2010, Adam Newman http://www.caller9.com/ 13 | Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php 14 | """ 15 | __author__ = "Adam Newman" 16 | 17 | #Normally use relative import. In test mode use local import. 18 | try:from .aes_tables import sbox,rcon 19 | except ValueError:from aes_tables import sbox,rcon 20 | from operator import xor 21 | class KeyExpander: 22 | """Perform AES Key Expansion""" 23 | 24 | _expanded_key_length = {128 : 176, 192 : 208, 256 : 240} 25 | 26 | def __init__(self, key_length): 27 | self._key_length = key_length 28 | self._n = key_length>>3 29 | 30 | if key_length in self._expanded_key_length: 31 | self._b = self._expanded_key_length[key_length] 32 | else: 33 | raise LookupError('Invalid Key Size') 34 | 35 | def expand(self, new_key): 36 | """ 37 | Expand the encryption key per AES key schedule specifications 38 | 39 | http://en.wikipedia.org/wiki/Rijndael_key_schedule#Key_schedule_description 40 | """ 41 | #First n bytes are copied from key 42 | len_new_key = len(new_key) 43 | if len_new_key != self._n: 44 | raise RuntimeError('expand(): key size is invalid') 45 | rcon_iter = 1 46 | nex=new_key.extend 47 | 48 | #Grow the key until it is the correct length 49 | while 1: 50 | #Copy last 4 bytes of extended key, apply core, increment i(rcon_iter), 51 | #core Append the list of elements 1-3 and list comprised of element 0 (circular rotate left) 52 | #core For each element of this new list, put the result of sbox into output array. 53 | #xor with 4 bytes n bytes from end of extended key 54 | keyarr=[sbox[i] for i in new_key[-3:]+new_key[-4:-3]] 55 | #First byte of output array is XORed with rcon(iter) 56 | keyarr[0] ^= rcon[rcon_iter] 57 | nex(map(xor,keyarr, new_key[-self._n:4-self._n])) 58 | rcon_iter += 1 59 | len_new_key += 4 60 | 61 | #Run three passes of 4 byte expansion using copy of 4 byte tail of extended key 62 | #which is then xor'd with 4 bytes n bytes from end of extended key 63 | for j in 0,1,2: 64 | nex(map(xor,new_key[-4:], new_key[-self._n:4-self._n])) 65 | len_new_key += 4 66 | if len_new_key >= self._b:return new_key 67 | else: 68 | #If key length is 256 and key is not complete, add 4 bytes tail of extended key 69 | #run through sbox before xor with 4 bytes n bytes from end of extended key 70 | if self._key_length == 256: 71 | nex(map(xor,[sbox[x] for x in new_key[-4:]], new_key[-self._n:4-self._n])) 72 | len_new_key += 4 73 | if len_new_key >= self._b:return new_key 74 | 75 | #If key length is 192 or 256 and key is not complete, run 2 or 3 passes respectively 76 | #of 4 byte tail of extended key xor with 4 bytes n bytes from end of extended key 77 | if self._key_length != 128: 78 | for j in ((0,1) if self._key_length == 192 else (0,1,2)): 79 | nex(map(xor,new_key[-4:], new_key[-self._n:4-self._n])) 80 | len_new_key += 4 81 | if len_new_key >= self._b:return new_key 82 | 83 | import unittest 84 | class TestKeyExpander(unittest.TestCase): 85 | def test_keys(self): 86 | """Test All Key Expansions""" 87 | import test_keys 88 | test_data = test_keys.TestKeys() 89 | for key_size in 128, 192, 256: 90 | test_expander = KeyExpander(key_size) 91 | test_expanded_key = test_expander.expand(test_data.test_key[key_size]) 92 | self.assertEqual (len([i for i, j in zip(test_expanded_key, test_data.test_expanded_key_validated[key_size]) if i == j]), 93 | len(test_data.test_expanded_key_validated[key_size]), 94 | msg='Key expansion ' + str(key_size) + ' bit') 95 | 96 | if __name__ == "__main__": 97 | unittest.main() -------------------------------------------------------------------------------- /lib/parse/peepdf/aespython/ofb_mode.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | OFB Mode of operation 4 | 5 | Running this file as __main__ will result in a self-test of the algorithm. 6 | 7 | Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf 8 | 9 | Copyright (c) 2010, Adam Newman http://www.caller9.com/ 10 | Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php 11 | """ 12 | __author__ = "Adam Newman" 13 | 14 | class OFBMode: 15 | """Perform OFB operation on a block and retain IV information for next operation""" 16 | def __init__(self, block_cipher, block_size): 17 | self._block_cipher = block_cipher 18 | self._block_size = block_size 19 | self._iv = [0] * block_size 20 | 21 | def set_iv(self, iv): 22 | if len(iv) == self._block_size: 23 | self._iv = iv 24 | 25 | def encrypt_block(self, plaintext): 26 | self._iv = cipher_iv = self._block_cipher.cipher_block(self._iv) 27 | return [i ^ j for i,j in zip (plaintext, cipher_iv)] 28 | 29 | def decrypt_block(self, ciphertext): 30 | self._iv = cipher_iv = self._block_cipher.cipher_block(self._iv) 31 | return [i ^ j for i,j in zip (cipher_iv, ciphertext)] 32 | 33 | import unittest 34 | class TestEncryptionMode(unittest.TestCase): 35 | def test_mode(self): 36 | #Self test 37 | import key_expander 38 | import aes_cipher 39 | import test_keys 40 | 41 | test_data = test_keys.TestKeys() 42 | 43 | test_expander = key_expander.KeyExpander(256) 44 | test_expanded_key = test_expander.expand(test_data.test_mode_key) 45 | 46 | test_cipher = aes_cipher.AESCipher(test_expanded_key) 47 | 48 | test_ofb = OFBMode(test_cipher, 16) 49 | 50 | test_ofb.set_iv(test_data.test_mode_iv) 51 | for k in range(4): 52 | self.assertEquals(len([i for i, j in zip(test_data.test_ofb_ciphertext[k],test_ofb.encrypt_block(test_data.test_mode_plaintext[k])) if i == j]), 53 | 16, 54 | msg='OFB encrypt test block' + str(k)) 55 | 56 | test_ofb.set_iv(test_data.test_mode_iv) 57 | for k in range(4): 58 | self.assertEquals(len([i for i, j in zip(test_data.test_mode_plaintext[k],test_ofb.decrypt_block(test_data.test_ofb_ciphertext[k])) if i == j]), 59 | 16, 60 | msg='OFB decrypt test block' + str(k)) 61 | 62 | if __name__ == "__main__": 63 | unittest.main() -------------------------------------------------------------------------------- /lib/parse/peepdf/aespython/test_keys.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test keys and data for self-test operations. 3 | 4 | Test data from: 5 | NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf 6 | NIST FIPS-197 http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf 7 | 8 | Copyright (c) 2010, Adam Newman http://www.caller9.com/ 9 | Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php 10 | """ 11 | __author__ = "Adam Newman" 12 | 13 | class TestKeys: 14 | """Test data, keys, IVs, and output to use in self-tests""" 15 | test_key = { 16 | 128 : [ 17 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f] 18 | , 192 : [ 19 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 20 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17] 21 | , 256 : [ 22 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 23 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f] 24 | } 25 | 26 | test_expanded_key_validated = { 27 | 128 : [ 28 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 29 | 0xd6, 0xaa, 0x74, 0xfd, 0xd2, 0xaf, 0x72, 0xfa, 0xda, 0xa6, 0x78, 0xf1, 0xd6, 0xab, 0x76, 0xfe, 30 | 0xb6, 0x92, 0xcf, 0x0b, 0x64, 0x3d, 0xbd, 0xf1, 0xbe, 0x9b, 0xc5, 0x00, 0x68, 0x30, 0xb3, 0xfe, 31 | 0xb6, 0xff, 0x74, 0x4e, 0xd2, 0xc2, 0xc9, 0xbf, 0x6c, 0x59, 0x0c, 0xbf, 0x04, 0x69, 0xbf, 0x41, 32 | 0x47, 0xf7, 0xf7, 0xbc, 0x95, 0x35, 0x3e, 0x03, 0xf9, 0x6c, 0x32, 0xbc, 0xfd, 0x05, 0x8d, 0xfd, 33 | 0x3c, 0xaa, 0xa3, 0xe8, 0xa9, 0x9f, 0x9d, 0xeb, 0x50, 0xf3, 0xaf, 0x57, 0xad, 0xf6, 0x22, 0xaa, 34 | 0x5e, 0x39, 0x0f, 0x7d, 0xf7, 0xa6, 0x92, 0x96, 0xa7, 0x55, 0x3d, 0xc1, 0x0a, 0xa3, 0x1f, 0x6b, 35 | 0x14, 0xf9, 0x70, 0x1a, 0xe3, 0x5f, 0xe2, 0x8c, 0x44, 0x0a, 0xdf, 0x4d, 0x4e, 0xa9, 0xc0, 0x26, 36 | 0x47, 0x43, 0x87, 0x35, 0xa4, 0x1c, 0x65, 0xb9, 0xe0, 0x16, 0xba, 0xf4, 0xae, 0xbf, 0x7a, 0xd2, 37 | 0x54, 0x99, 0x32, 0xd1, 0xf0, 0x85, 0x57, 0x68, 0x10, 0x93, 0xed, 0x9c, 0xbe, 0x2c, 0x97, 0x4e, 38 | 0x13, 0x11, 0x1d, 0x7f, 0xe3, 0x94, 0x4a, 0x17, 0xf3, 0x07, 0xa7, 0x8b, 0x4d, 0x2b, 0x30, 0xc5] 39 | , 192 : [ 40 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 41 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x58, 0x46, 0xf2, 0xf9, 0x5c, 0x43, 0xf4, 0xfe, 42 | 0x54, 0x4a, 0xfe, 0xf5, 0x58, 0x47, 0xf0, 0xfa, 0x48, 0x56, 0xe2, 0xe9, 0x5c, 0x43, 0xf4, 0xfe, 43 | 0x40, 0xf9, 0x49, 0xb3, 0x1c, 0xba, 0xbd, 0x4d, 0x48, 0xf0, 0x43, 0xb8, 0x10, 0xb7, 0xb3, 0x42, 44 | 0x58, 0xe1, 0x51, 0xab, 0x04, 0xa2, 0xa5, 0x55, 0x7e, 0xff, 0xb5, 0x41, 0x62, 0x45, 0x08, 0x0c, 45 | 0x2a, 0xb5, 0x4b, 0xb4, 0x3a, 0x02, 0xf8, 0xf6, 0x62, 0xe3, 0xa9, 0x5d, 0x66, 0x41, 0x0c, 0x08, 46 | 0xf5, 0x01, 0x85, 0x72, 0x97, 0x44, 0x8d, 0x7e, 0xbd, 0xf1, 0xc6, 0xca, 0x87, 0xf3, 0x3e, 0x3c, 47 | 0xe5, 0x10, 0x97, 0x61, 0x83, 0x51, 0x9b, 0x69, 0x34, 0x15, 0x7c, 0x9e, 0xa3, 0x51, 0xf1, 0xe0, 48 | 0x1e, 0xa0, 0x37, 0x2a, 0x99, 0x53, 0x09, 0x16, 0x7c, 0x43, 0x9e, 0x77, 0xff, 0x12, 0x05, 0x1e, 49 | 0xdd, 0x7e, 0x0e, 0x88, 0x7e, 0x2f, 0xff, 0x68, 0x60, 0x8f, 0xc8, 0x42, 0xf9, 0xdc, 0xc1, 0x54, 50 | 0x85, 0x9f, 0x5f, 0x23, 0x7a, 0x8d, 0x5a, 0x3d, 0xc0, 0xc0, 0x29, 0x52, 0xbe, 0xef, 0xd6, 0x3a, 51 | 0xde, 0x60, 0x1e, 0x78, 0x27, 0xbc, 0xdf, 0x2c, 0xa2, 0x23, 0x80, 0x0f, 0xd8, 0xae, 0xda, 0x32, 52 | 0xa4, 0x97, 0x0a, 0x33, 0x1a, 0x78, 0xdc, 0x09, 0xc4, 0x18, 0xc2, 0x71, 0xe3, 0xa4, 0x1d, 0x5d] 53 | , 256 : [ 54 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 55 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 56 | 0xa5, 0x73, 0xc2, 0x9f, 0xa1, 0x76, 0xc4, 0x98, 0xa9, 0x7f, 0xce, 0x93, 0xa5, 0x72, 0xc0, 0x9c, 57 | 0x16, 0x51, 0xa8, 0xcd, 0x02, 0x44, 0xbe, 0xda, 0x1a, 0x5d, 0xa4, 0xc1, 0x06, 0x40, 0xba, 0xde, 58 | 0xae, 0x87, 0xdf, 0xf0, 0x0f, 0xf1, 0x1b, 0x68, 0xa6, 0x8e, 0xd5, 0xfb, 0x03, 0xfc, 0x15, 0x67, 59 | 0x6d, 0xe1, 0xf1, 0x48, 0x6f, 0xa5, 0x4f, 0x92, 0x75, 0xf8, 0xeb, 0x53, 0x73, 0xb8, 0x51, 0x8d, 60 | 0xc6, 0x56, 0x82, 0x7f, 0xc9, 0xa7, 0x99, 0x17, 0x6f, 0x29, 0x4c, 0xec, 0x6c, 0xd5, 0x59, 0x8b, 61 | 0x3d, 0xe2, 0x3a, 0x75, 0x52, 0x47, 0x75, 0xe7, 0x27, 0xbf, 0x9e, 0xb4, 0x54, 0x07, 0xcf, 0x39, 62 | 0x0b, 0xdc, 0x90, 0x5f, 0xc2, 0x7b, 0x09, 0x48, 0xad, 0x52, 0x45, 0xa4, 0xc1, 0x87, 0x1c, 0x2f, 63 | 0x45, 0xf5, 0xa6, 0x60, 0x17, 0xb2, 0xd3, 0x87, 0x30, 0x0d, 0x4d, 0x33, 0x64, 0x0a, 0x82, 0x0a, 64 | 0x7c, 0xcf, 0xf7, 0x1c, 0xbe, 0xb4, 0xfe, 0x54, 0x13, 0xe6, 0xbb, 0xf0, 0xd2, 0x61, 0xa7, 0xdf, 65 | 0xf0, 0x1a, 0xfa, 0xfe, 0xe7, 0xa8, 0x29, 0x79, 0xd7, 0xa5, 0x64, 0x4a, 0xb3, 0xaf, 0xe6, 0x40, 66 | 0x25, 0x41, 0xfe, 0x71, 0x9b, 0xf5, 0x00, 0x25, 0x88, 0x13, 0xbb, 0xd5, 0x5a, 0x72, 0x1c, 0x0a, 67 | 0x4e, 0x5a, 0x66, 0x99, 0xa9, 0xf2, 0x4f, 0xe0, 0x7e, 0x57, 0x2b, 0xaa, 0xcd, 0xf8, 0xcd, 0xea, 68 | 0x24, 0xfc, 0x79, 0xcc, 0xbf, 0x09, 0x79, 0xe9, 0x37, 0x1a, 0xc2, 0x3c, 0x6d, 0x68, 0xde, 0x36] 69 | } 70 | 71 | test_block_ciphertext_validated = { 72 | 128 : [ 73 | 0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30, 0xd8, 0xcd, 0xb7, 0x80, 0x70, 0xb4, 0xc5, 0x5a] 74 | , 192 : [ 75 | 0xdd, 0xa9, 0x7c, 0xa4, 0x86, 0x4c, 0xdf, 0xe0, 0x6e, 0xaf, 0x70, 0xa0, 0xec, 0x0d, 0x71, 0x91] 76 | , 256 : [ 77 | 0x8e, 0xa2, 0xb7, 0xca, 0x51, 0x67, 0x45, 0xbf, 0xea, 0xfc, 0x49, 0x90, 0x4b, 0x49, 0x60, 0x89] 78 | } 79 | 80 | test_block_plaintext = [ 81 | 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff] 82 | 83 | #After initial validation, these deviated from test in SP 800-38A to use same key, iv, and plaintext on tests. 84 | #Still valid, just easier to test with. 85 | test_mode_key= [ 86 | 0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe, 0x2b, 0x73, 0xae, 0xf0, 0x85, 0x7d, 0x77, 0x81, 87 | 0x1f, 0x35, 0x2c, 0x07, 0x3b, 0x61, 0x08, 0xd7, 0x2d, 0x98, 0x10, 0xa3, 0x09, 0x14, 0xdf, 0xf4] 88 | test_mode_iv = [ 89 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f] 90 | test_mode_plaintext = [ 91 | [0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a], 92 | [0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51], 93 | [0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef], 94 | [0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10]] 95 | test_cbc_ciphertext = [ 96 | [0xf5, 0x8c, 0x4c, 0x04, 0xd6, 0xe5, 0xf1, 0xba, 0x77, 0x9e, 0xab, 0xfb, 0x5f, 0x7b, 0xfb, 0xd6], 97 | [0x9c, 0xfc, 0x4e, 0x96, 0x7e, 0xdb, 0x80, 0x8d, 0x67, 0x9f, 0x77, 0x7b, 0xc6, 0x70, 0x2c, 0x7d], 98 | [0x39, 0xf2, 0x33, 0x69, 0xa9, 0xd9, 0xba, 0xcf, 0xa5, 0x30, 0xe2, 0x63, 0x04, 0x23, 0x14, 0x61], 99 | [0xb2, 0xeb, 0x05, 0xe2, 0xc3, 0x9b, 0xe9, 0xfc, 0xda, 0x6c, 0x19, 0x07, 0x8c, 0x6a, 0x9d, 0x1b]] 100 | test_cfb_ciphertext = [ 101 | [0xdc, 0x7e, 0x84, 0xbf, 0xda, 0x79, 0x16, 0x4b, 0x7e, 0xcd, 0x84, 0x86, 0x98, 0x5d, 0x38, 0x60], 102 | [0x39, 0xff, 0xed, 0x14, 0x3b, 0x28, 0xb1, 0xc8, 0x32, 0x11, 0x3c, 0x63, 0x31, 0xe5, 0x40, 0x7b], 103 | [0xdf, 0x10, 0x13, 0x24, 0x15, 0xe5, 0x4b, 0x92, 0xa1, 0x3e, 0xd0, 0xa8, 0x26, 0x7a, 0xe2, 0xf9], 104 | [0x75, 0xa3, 0x85, 0x74, 0x1a, 0xb9, 0xce, 0xf8, 0x20, 0x31, 0x62, 0x3d, 0x55, 0xb1, 0xe4, 0x71]] 105 | test_ofb_ciphertext = [ 106 | [0xdc, 0x7e, 0x84, 0xbf, 0xda, 0x79, 0x16, 0x4b, 0x7e, 0xcd, 0x84, 0x86, 0x98, 0x5d, 0x38, 0x60], 107 | [0x4f, 0xeb, 0xdc, 0x67, 0x40, 0xd2, 0x0b, 0x3a, 0xc8, 0x8f, 0x6a, 0xd8, 0x2a, 0x4f, 0xb0, 0x8d], 108 | [0x71, 0xab, 0x47, 0xa0, 0x86, 0xe8, 0x6e, 0xed, 0xf3, 0x9d, 0x1c, 0x5b, 0xba, 0x97, 0xc4, 0x08], 109 | [0x01, 0x26, 0x14, 0x1d, 0x67, 0xf3, 0x7b, 0xe8, 0x53, 0x8f, 0x5a, 0x8b, 0xe7, 0x40, 0xe4, 0x84]] 110 | 111 | def hex_output(self, list): 112 | #Debugging output helper 113 | result = '[' 114 | for i in list[:-1]: 115 | result += hex(i) + ',' 116 | return result + hex(list[-1]) + ']' 117 | 118 | 119 | -------------------------------------------------------------------------------- /lib/parse/peepdf/colorama/__init__.py: -------------------------------------------------------------------------------- 1 | from .initialise import init 2 | from .ansi import Fore, Back, Style 3 | from .ansitowin32 import AnsiToWin32 4 | 5 | VERSION = '0.1.18' 6 | 7 | -------------------------------------------------------------------------------- /lib/parse/peepdf/colorama/ansi.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This module generates ANSI character codes to printing colors to terminals. 3 | See: http://en.wikipedia.org/wiki/ANSI_escape_code 4 | ''' 5 | 6 | CSI = '\033[' 7 | 8 | def code_to_chars(code): 9 | return CSI + str(code) + 'm' 10 | 11 | class AnsiCodes(object): 12 | def __init__(self, codes): 13 | for name in dir(codes): 14 | if not name.startswith('_'): 15 | value = getattr(codes, name) 16 | setattr(self, name, code_to_chars(value)) 17 | 18 | class AnsiFore: 19 | BLACK = 30 20 | RED = 31 21 | GREEN = 32 22 | YELLOW = 33 23 | BLUE = 34 24 | MAGENTA = 35 25 | CYAN = 36 26 | WHITE = 37 27 | RESET = 39 28 | 29 | class AnsiBack: 30 | BLACK = 40 31 | RED = 41 32 | GREEN = 42 33 | YELLOW = 43 34 | BLUE = 44 35 | MAGENTA = 45 36 | CYAN = 46 37 | WHITE = 47 38 | RESET = 49 39 | 40 | class AnsiStyle: 41 | BRIGHT = 1 42 | DIM = 2 43 | NORMAL = 22 44 | RESET_ALL = 0 45 | 46 | Fore = AnsiCodes( AnsiFore ) 47 | Back = AnsiCodes( AnsiBack ) 48 | Style = AnsiCodes( AnsiStyle ) 49 | 50 | -------------------------------------------------------------------------------- /lib/parse/peepdf/colorama/ansitowin32.py: -------------------------------------------------------------------------------- 1 | 2 | import re 3 | import sys 4 | 5 | from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style 6 | from .winterm import WinTerm, WinColor, WinStyle 7 | from .win32 import windll 8 | 9 | 10 | if windll is not None: 11 | winterm = WinTerm() 12 | 13 | 14 | def is_a_tty(stream): 15 | return hasattr(stream, 'isatty') and stream.isatty() 16 | 17 | 18 | class StreamWrapper(object): 19 | ''' 20 | Wraps a stream (such as stdout), acting as a transparent proxy for all 21 | attribute access apart from method 'write()', which is delegated to our 22 | Converter instance. 23 | ''' 24 | def __init__(self, wrapped, converter): 25 | # double-underscore everything to prevent clashes with names of 26 | # attributes on the wrapped stream object. 27 | self.__wrapped = wrapped 28 | self.__convertor = converter 29 | 30 | def __getattr__(self, name): 31 | return getattr(self.__wrapped, name) 32 | 33 | def write(self, text): 34 | self.__convertor.write(text) 35 | 36 | 37 | class AnsiToWin32(object): 38 | ''' 39 | Implements a 'write()' method which, on Windows, will strip ANSI character 40 | sequences from the text, and if outputting to a tty, will convert them into 41 | win32 function calls. 42 | ''' 43 | ANSI_RE = re.compile('\033\[((?:\d|;)*)([a-zA-Z])') 44 | 45 | def __init__(self, wrapped, convert=None, strip=None, autoreset=False): 46 | # The wrapped stream (normally sys.stdout or sys.stderr) 47 | self.wrapped = wrapped 48 | 49 | # should we reset colors to defaults after every .write() 50 | self.autoreset = autoreset 51 | 52 | # create the proxy wrapping our output stream 53 | self.stream = StreamWrapper(wrapped, self) 54 | 55 | on_windows = sys.platform.startswith('win') 56 | 57 | # should we strip ANSI sequences from our output? 58 | if strip is None: 59 | strip = on_windows 60 | self.strip = strip 61 | 62 | # should we should convert ANSI sequences into win32 calls? 63 | if convert is None: 64 | convert = on_windows and is_a_tty(wrapped) 65 | self.convert = convert 66 | 67 | # dict of ansi codes to win32 functions and parameters 68 | self.win32_calls = self.get_win32_calls() 69 | 70 | # are we wrapping stderr? 71 | self.on_stderr = self.wrapped is sys.stderr 72 | 73 | 74 | def should_wrap(self): 75 | ''' 76 | True if this class is actually needed. If false, then the output 77 | stream will not be affected, nor will win32 calls be issued, so 78 | wrapping stdout is not actually required. This will generally be 79 | False on non-Windows platforms, unless optional functionality like 80 | autoreset has been requested using kwargs to init() 81 | ''' 82 | return self.convert or self.strip or self.autoreset 83 | 84 | 85 | def get_win32_calls(self): 86 | if self.convert and winterm: 87 | return { 88 | AnsiStyle.RESET_ALL: (winterm.reset_all, ), 89 | AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT), 90 | AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL), 91 | AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL), 92 | AnsiFore.BLACK: (winterm.fore, WinColor.BLACK), 93 | AnsiFore.RED: (winterm.fore, WinColor.RED), 94 | AnsiFore.GREEN: (winterm.fore, WinColor.GREEN), 95 | AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW), 96 | AnsiFore.BLUE: (winterm.fore, WinColor.BLUE), 97 | AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA), 98 | AnsiFore.CYAN: (winterm.fore, WinColor.CYAN), 99 | AnsiFore.WHITE: (winterm.fore, WinColor.GREY), 100 | AnsiFore.RESET: (winterm.fore, ), 101 | AnsiBack.BLACK: (winterm.back, WinColor.BLACK), 102 | AnsiBack.RED: (winterm.back, WinColor.RED), 103 | AnsiBack.GREEN: (winterm.back, WinColor.GREEN), 104 | AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW), 105 | AnsiBack.BLUE: (winterm.back, WinColor.BLUE), 106 | AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA), 107 | AnsiBack.CYAN: (winterm.back, WinColor.CYAN), 108 | AnsiBack.WHITE: (winterm.back, WinColor.GREY), 109 | AnsiBack.RESET: (winterm.back, ), 110 | } 111 | 112 | 113 | def write(self, text): 114 | if self.strip or self.convert: 115 | self.write_and_convert(text) 116 | else: 117 | self.wrapped.write(text) 118 | self.wrapped.flush() 119 | if self.autoreset: 120 | self.reset_all() 121 | 122 | 123 | def reset_all(self): 124 | if self.convert: 125 | self.call_win32('m', (0,)) 126 | else: 127 | self.wrapped.write(Style.RESET_ALL) 128 | 129 | 130 | def write_and_convert(self, text): 131 | ''' 132 | Write the given text to our wrapped stream, stripping any ANSI 133 | sequences from the text, and optionally converting them into win32 134 | calls. 135 | ''' 136 | cursor = 0 137 | for match in self.ANSI_RE.finditer(text): 138 | start, end = match.span() 139 | self.write_plain_text(text, cursor, start) 140 | self.convert_ansi(*match.groups()) 141 | cursor = end 142 | self.write_plain_text(text, cursor, len(text)) 143 | 144 | 145 | def write_plain_text(self, text, start, end): 146 | if start < end: 147 | self.wrapped.write(text[start:end]) 148 | self.wrapped.flush() 149 | 150 | 151 | def convert_ansi(self, paramstring, command): 152 | if self.convert: 153 | params = self.extract_params(paramstring) 154 | self.call_win32(command, params) 155 | 156 | 157 | def extract_params(self, paramstring): 158 | def split(paramstring): 159 | for p in paramstring.split(';'): 160 | if p != '': 161 | yield int(p) 162 | return tuple(split(paramstring)) 163 | 164 | 165 | def call_win32(self, command, params): 166 | if params == []: 167 | params = [0] 168 | if command == 'm': 169 | for param in params: 170 | if param in self.win32_calls: 171 | func_args = self.win32_calls[param] 172 | func = func_args[0] 173 | args = func_args[1:] 174 | kwargs = dict(on_stderr=self.on_stderr) 175 | func(*args, **kwargs) 176 | 177 | -------------------------------------------------------------------------------- /lib/parse/peepdf/colorama/initialise.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import sys 3 | 4 | from .ansitowin32 import AnsiToWin32 5 | 6 | 7 | orig_stdout = sys.stdout 8 | orig_stderr = sys.stderr 9 | 10 | atexit_done = False 11 | 12 | 13 | def reset_all(): 14 | AnsiToWin32(orig_stdout).reset_all() 15 | 16 | 17 | def init(autoreset=False, convert=None, strip=None, wrap=True): 18 | 19 | if wrap==False and (autoreset==True or convert==True or strip==True): 20 | raise ValueError('wrap=False conflicts with any other arg=True') 21 | 22 | sys.stdout = wrap_stream(orig_stdout, convert, strip, autoreset, wrap) 23 | sys.stderr = wrap_stream(orig_stderr, convert, strip, autoreset, wrap) 24 | 25 | global atexit_done 26 | if not atexit_done: 27 | atexit.register(reset_all) 28 | atexit_done = True 29 | 30 | 31 | def wrap_stream(stream, convert, strip, autoreset, wrap): 32 | if wrap: 33 | wrapper = AnsiToWin32(stream, 34 | convert=convert, strip=strip, autoreset=autoreset) 35 | if wrapper.should_wrap(): 36 | stream = wrapper.stream 37 | return stream 38 | 39 | -------------------------------------------------------------------------------- /lib/parse/peepdf/colorama/win32.py: -------------------------------------------------------------------------------- 1 | 2 | # from winbase.h 3 | STDOUT = -11 4 | STDERR = -12 5 | 6 | try: 7 | from ctypes import windll 8 | except ImportError: 9 | windll = None 10 | SetConsoleTextAttribute = lambda *_: None 11 | else: 12 | from ctypes import ( 13 | byref, Structure, c_char, c_short, c_uint32, c_ushort 14 | ) 15 | 16 | handles = { 17 | STDOUT: windll.kernel32.GetStdHandle(STDOUT), 18 | STDERR: windll.kernel32.GetStdHandle(STDERR), 19 | } 20 | 21 | SHORT = c_short 22 | WORD = c_ushort 23 | DWORD = c_uint32 24 | TCHAR = c_char 25 | 26 | class COORD(Structure): 27 | """struct in wincon.h""" 28 | _fields_ = [ 29 | ('X', SHORT), 30 | ('Y', SHORT), 31 | ] 32 | 33 | class SMALL_RECT(Structure): 34 | """struct in wincon.h.""" 35 | _fields_ = [ 36 | ("Left", SHORT), 37 | ("Top", SHORT), 38 | ("Right", SHORT), 39 | ("Bottom", SHORT), 40 | ] 41 | 42 | class CONSOLE_SCREEN_BUFFER_INFO(Structure): 43 | """struct in wincon.h.""" 44 | _fields_ = [ 45 | ("dwSize", COORD), 46 | ("dwCursorPosition", COORD), 47 | ("wAttributes", WORD), 48 | ("srWindow", SMALL_RECT), 49 | ("dwMaximumWindowSize", COORD), 50 | ] 51 | 52 | def GetConsoleScreenBufferInfo(stream_id): 53 | handle = handles[stream_id] 54 | csbi = CONSOLE_SCREEN_BUFFER_INFO() 55 | success = windll.kernel32.GetConsoleScreenBufferInfo( 56 | handle, byref(csbi)) 57 | # This fails when imported via setup.py when installing using 'pip' 58 | # presumably the fix is that running setup.py should not trigger all 59 | # this activity. 60 | # assert success 61 | return csbi 62 | 63 | def SetConsoleTextAttribute(stream_id, attrs): 64 | handle = handles[stream_id] 65 | success = windll.kernel32.SetConsoleTextAttribute(handle, attrs) 66 | assert success 67 | 68 | def SetConsoleCursorPosition(stream_id, position): 69 | handle = handles[stream_id] 70 | position = COORD(*position) 71 | success = windll.kernel32.SetConsoleCursorPosition(handle, position) 72 | assert success 73 | 74 | def FillConsoleOutputCharacter(stream_id, char, length, start): 75 | handle = handles[stream_id] 76 | char = TCHAR(char) 77 | length = DWORD(length) 78 | start = COORD(*start) 79 | num_written = DWORD(0) 80 | # AttributeError: function 'FillConsoleOutputCharacter' not found 81 | # could it just be that my types are wrong? 82 | success = windll.kernel32.FillConsoleOutputCharacter( 83 | handle, char, length, start, byref(num_written)) 84 | assert success 85 | return num_written.value 86 | 87 | 88 | if __name__=='__main__': 89 | x = GetConsoleScreenBufferInfo(STDOUT) 90 | print(x.dwSize) 91 | print(x.dwCursorPosition) 92 | print(x.wAttributes) 93 | print(x.srWindow) 94 | print(x.dwMaximumWindowSize) 95 | 96 | -------------------------------------------------------------------------------- /lib/parse/peepdf/colorama/winterm.py: -------------------------------------------------------------------------------- 1 | 2 | from . import win32 3 | 4 | 5 | # from wincon.h 6 | class WinColor(object): 7 | BLACK = 0 8 | BLUE = 1 9 | GREEN = 2 10 | CYAN = 3 11 | RED = 4 12 | MAGENTA = 5 13 | YELLOW = 6 14 | GREY = 7 15 | 16 | # from wincon.h 17 | class WinStyle(object): 18 | NORMAL = 0x00 # dim text, dim background 19 | BRIGHT = 0x08 # bright text, dim background 20 | 21 | 22 | class WinTerm(object): 23 | 24 | def __init__(self): 25 | self._default = \ 26 | win32.GetConsoleScreenBufferInfo(win32.STDOUT).wAttributes 27 | self.set_attrs(self._default) 28 | self._default_fore = self._fore 29 | self._default_back = self._back 30 | self._default_style = self._style 31 | 32 | def get_attrs(self): 33 | return self._fore + self._back * 16 + self._style 34 | 35 | def set_attrs(self, value): 36 | self._fore = value & 7 37 | self._back = (value >> 4) & 7 38 | self._style = value & WinStyle.BRIGHT 39 | 40 | def reset_all(self, on_stderr=None): 41 | self.set_attrs(self._default) 42 | self.set_console(attrs=self._default) 43 | 44 | def fore(self, fore=None, on_stderr=False): 45 | if fore is None: 46 | fore = self._default_fore 47 | self._fore = fore 48 | self.set_console(on_stderr=on_stderr) 49 | 50 | def back(self, back=None, on_stderr=False): 51 | if back is None: 52 | back = self._default_back 53 | self._back = back 54 | self.set_console(on_stderr=on_stderr) 55 | 56 | def style(self, style=None, on_stderr=False): 57 | if style is None: 58 | style = self._default_style 59 | self._style = style 60 | self.set_console(on_stderr=on_stderr) 61 | 62 | def set_console(self, attrs=None, on_stderr=False): 63 | if attrs is None: 64 | attrs = self.get_attrs() 65 | handle = win32.STDOUT 66 | if on_stderr: 67 | handle = win32.STDERR 68 | win32.SetConsoleTextAttribute(handle, attrs) 69 | 70 | -------------------------------------------------------------------------------- /lib/parse/peepdf/jsbeautifier/unpackers/README.specs.mkd: -------------------------------------------------------------------------------- 1 | # UNPACKERS SPECIFICATIONS 2 | 3 | Nothing very difficult: an unpacker is a submodule placed in the directory 4 | where this file was found. Each unpacker must define three symbols: 5 | 6 | * `PRIORITY` : integer number expressing the priority in applying this 7 | unpacker. Lower number means higher priority. 8 | Makes sense only if a source file has been packed with 9 | more than one packer. 10 | * `detect(source)` : returns `True` if source is packed, otherwise, `False`. 11 | * `unpack(source)` : takes a `source` string and unpacks it. Must always return 12 | valid JavaScript. That is to say, your code should look 13 | like: 14 | 15 | ``` 16 | if detect(source): 17 | return do_your_fancy_things_with(source) 18 | else: 19 | return source 20 | ``` 21 | 22 | *You can safely define any other symbol in your module, as it will be ignored.* 23 | 24 | `__init__` code will automatically load new unpackers, without any further step 25 | to be accomplished. Simply drop it in this directory. 26 | -------------------------------------------------------------------------------- /lib/parse/peepdf/jsbeautifier/unpackers/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # General code for JSBeautifier unpackers infrastructure. See README.specs 3 | # written by Stefano Sanfilippo 4 | # 5 | 6 | """General code for JSBeautifier unpackers infrastructure.""" 7 | 8 | import pkgutil 9 | import re 10 | #from jsbeautifier.unpackers import evalbased 11 | import evalbased 12 | 13 | # NOTE: AT THE MOMENT, IT IS DEACTIVATED FOR YOUR SECURITY: it runs js! 14 | BLACKLIST = ['jsbeautifier.unpackers.evalbased'] 15 | 16 | class UnpackingError(Exception): 17 | """Badly packed source or general error. Argument is a 18 | meaningful description.""" 19 | pass 20 | 21 | def getunpackers(): 22 | """Scans the unpackers dir, finds unpackers and add them to UNPACKERS list. 23 | An unpacker will be loaded only if it is a valid python module (name must 24 | adhere to naming conventions) and it is not blacklisted (i.e. inserted 25 | into BLACKLIST.""" 26 | path = __path__ 27 | prefix = __name__ + '.' 28 | unpackers = [] 29 | interface = ['unpack', 'detect', 'PRIORITY'] 30 | for _importer, modname, _ispkg in pkgutil.iter_modules(path, prefix): 31 | if 'tests' not in modname and modname not in BLACKLIST: 32 | try: 33 | module = __import__(modname, fromlist=interface) 34 | except ImportError: 35 | raise UnpackingError('Bad unpacker: %s' % modname) 36 | else: 37 | unpackers.append(module) 38 | 39 | return sorted(unpackers, key = lambda mod: mod.PRIORITY) 40 | 41 | #UNPACKERS = getunpackers() 42 | UNPACKERS = [] 43 | 44 | def run(source, evalcode=False): 45 | """Runs the applicable unpackers and return unpacked source as a string.""" 46 | for unpacker in [mod for mod in UNPACKERS if mod.detect(source)]: 47 | source = unpacker.unpack(source) 48 | if evalcode and evalbased.detect(source): 49 | source = evalbased.unpack(source) 50 | return source 51 | 52 | def filtercomments(source): 53 | """NOT USED: strips trailing comments and put them at the top.""" 54 | trailing_comments = [] 55 | comment = True 56 | 57 | while comment: 58 | if re.search(r'^\s*\/\*', source): 59 | comment = source[0, source.index('*/') + 2] 60 | elif re.search(r'^\s*\/\/', source): 61 | comment = re.search(r'^\s*\/\/', source).group(0) 62 | else: 63 | comment = None 64 | 65 | if comment: 66 | source = re.sub(r'^\s+', '', source[len(comment):]) 67 | trailing_comments.append(comment) 68 | 69 | return '\n'.join(trailing_comments) + source 70 | -------------------------------------------------------------------------------- /lib/parse/peepdf/jsbeautifier/unpackers/evalbased.py: -------------------------------------------------------------------------------- 1 | # 2 | # Unpacker for eval() based packers, a part of javascript beautifier 3 | # by Einar Lielmanis 4 | # 5 | # written by Stefano Sanfilippo 6 | # 7 | # usage: 8 | # 9 | # if detect(some_string): 10 | # unpacked = unpack(some_string) 11 | # 12 | 13 | """Unpacker for eval() based packers: runs JS code and returns result. 14 | Works only if a JS interpreter (e.g. Mozilla's Rhino) is installed and 15 | properly set up on host.""" 16 | 17 | from subprocess import PIPE, Popen 18 | 19 | PRIORITY = 3 20 | 21 | def detect(source): 22 | """Detects if source is likely to be eval() packed.""" 23 | return source.strip().lower().startswith('eval(function(') 24 | 25 | def unpack(source): 26 | """Runs source and return resulting code.""" 27 | return jseval('print %s;' % source[4:]) if detect(source) else source 28 | 29 | # In case of failure, we'll just return the original, without crashing on user. 30 | def jseval(script): 31 | """Run code in the JS interpreter and return output.""" 32 | try: 33 | interpreter = Popen(['js'], stdin=PIPE, stdout=PIPE) 34 | except OSError: 35 | return script 36 | result, errors = interpreter.communicate(script) 37 | if interpreter.poll() or errors: 38 | return script 39 | return result 40 | -------------------------------------------------------------------------------- /lib/parse/peepdf/jsbeautifier/unpackers/javascriptobfuscator.py: -------------------------------------------------------------------------------- 1 | # 2 | # simple unpacker/deobfuscator for scripts messed up with 3 | # javascriptobfuscator.com 4 | # 5 | # written by Einar Lielmanis 6 | # rewritten in Python by Stefano Sanfilippo 7 | # 8 | # Will always return valid javascript: if `detect()` is false, `code` is 9 | # returned, unmodified. 10 | # 11 | # usage: 12 | # 13 | # if javascriptobfuscator.detect(some_string): 14 | # some_string = javascriptobfuscator.unpack(some_string) 15 | # 16 | 17 | """deobfuscator for scripts messed up with JavascriptObfuscator.com""" 18 | 19 | import re 20 | 21 | PRIORITY = 1 22 | 23 | def smartsplit(code): 24 | """Split `code` at " symbol, only if it is not escaped.""" 25 | strings = [] 26 | pos = 0 27 | while pos < len(code): 28 | if code[pos] == '"': 29 | word = '' # new word 30 | pos += 1 31 | while pos < len(code): 32 | if code[pos] == '"': 33 | break 34 | if code[pos] == '\\': 35 | word += '\\' 36 | pos += 1 37 | word += code[pos] 38 | pos += 1 39 | strings.append('"%s"' % word) 40 | pos += 1 41 | return strings 42 | 43 | def detect(code): 44 | """Detects if `code` is JavascriptObfuscator.com packed.""" 45 | # prefer `is not` idiom, so that a true boolean is returned 46 | return (re.search(r'^var _0x[a-f0-9]+ ?\= ?\[', code) is not None) 47 | 48 | def unpack(code): 49 | """Unpacks JavascriptObfuscator.com packed code.""" 50 | if detect(code): 51 | matches = re.search(r'var (_0x[a-f\d]+) ?\= ?\[(.*?)\];', code) 52 | if matches: 53 | variable = matches.group(1) 54 | dictionary = smartsplit(matches.group(2)) 55 | code = code[len(matches.group(0)):] 56 | for key, value in enumerate(dictionary): 57 | code = code.replace(r'%s[%s]' % (variable, key), value) 58 | return code 59 | -------------------------------------------------------------------------------- /lib/parse/peepdf/jsbeautifier/unpackers/myobfuscate.py: -------------------------------------------------------------------------------- 1 | # 2 | # deobfuscator for scripts messed up with myobfuscate.com 3 | # by Einar Lielmanis 4 | # 5 | # written by Stefano Sanfilippo 6 | # 7 | # usage: 8 | # 9 | # if detect(some_string): 10 | # unpacked = unpack(some_string) 11 | # 12 | 13 | # CAVEAT by Einar Lielmanis 14 | 15 | # 16 | # You really don't want to obfuscate your scripts there: they're tracking 17 | # your unpackings, your script gets turned into something like this, 18 | # as of 2011-08-26: 19 | # 20 | # var _escape = 'your_script_escaped'; 21 | # var _111 = document.createElement('script'); 22 | # _111.src = 'http://api.www.myobfuscate.com/?getsrc=ok' + 23 | # '&ref=' + encodeURIComponent(document.referrer) + 24 | # '&url=' + encodeURIComponent(document.URL); 25 | # var 000 = document.getElementsByTagName('head')[0]; 26 | # 000.appendChild(_111); 27 | # document.write(unescape(_escape)); 28 | # 29 | 30 | """Deobfuscator for scripts messed up with MyObfuscate.com""" 31 | 32 | import re 33 | import base64 34 | 35 | # Python 2 retrocompatibility 36 | # pylint: disable=F0401 37 | # pylint: disable=E0611 38 | try: 39 | from urllib import unquote 40 | except ImportError: 41 | from urllib.parse import unquote 42 | 43 | from jsbeautifier.unpackers import UnpackingError 44 | 45 | PRIORITY = 1 46 | 47 | CAVEAT = """// 48 | // Unpacker warning: be careful when using myobfuscate.com for your projects: 49 | // scripts obfuscated by the free online version call back home. 50 | // 51 | 52 | """ 53 | 54 | SIGNATURE = (r'["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F' 55 | r'\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65' 56 | r'\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75' 57 | r'\x76\x77\x78\x79\x7A\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x2B' 58 | r'\x2F\x3D","","\x63\x68\x61\x72\x41\x74","\x69\x6E\x64\x65\x78' 59 | r'\x4F\x66","\x66\x72\x6F\x6D\x43\x68\x61\x72\x43\x6F\x64\x65","' 60 | r'\x6C\x65\x6E\x67\x74\x68"]') 61 | 62 | def detect(source): 63 | """Detects MyObfuscate.com packer.""" 64 | return SIGNATURE in source 65 | 66 | def unpack(source): 67 | """Unpacks js code packed with MyObfuscate.com""" 68 | if not detect(source): 69 | return source 70 | payload = unquote(_filter(source)) 71 | match = re.search(r"^var _escape\='' 115 | JSStrings = ['var ', ';', ')', '(', 'function ', '=', '{', '}', 'if ', 'else', 'return', 'while ', 'for ', ',', 116 | 'eval', 'unescape', '.replace'] 117 | keyStrings = [';', '(', ')'] 118 | stringsFound = [] 119 | limit = 15 120 | minDistinctStringsFound = 5 121 | results = 0 122 | try: 123 | content = unescapeHTMLEntities(content) 124 | except UnicodeDecodeError: 125 | content = unescapeHTMLEntities(content.decode("latin1", errors="xmlcharrefreplace")) 126 | 127 | res = re.findall(reJSscript, content, re.DOTALL | re.IGNORECASE) 128 | if res: 129 | return "\n".join(res) 130 | 131 | for char in content: 132 | if (ord(char) < 32 and char not in ['\n', '\r', '\t', '\f', '\x00']) or ord(char) >= 127: 133 | return '' 134 | 135 | for string in JSStrings: 136 | cont = content.count(string) 137 | results += cont 138 | if cont > 0 and string not in stringsFound: 139 | stringsFound.append(string) 140 | elif cont == 0 and string in keyStrings: 141 | return '' 142 | 143 | if results > limit and len(stringsFound) >= minDistinctStringsFound: 144 | return content 145 | else: 146 | return '' 147 | -------------------------------------------------------------------------------- /xml-output/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'sei-mappel' 2 | --------------------------------------------------------------------------------