├── module ├── modulePDF.sym ├── Makefile.am ├── modulePDF.h └── modulePDF.c ├── tools ├── Makefile.am ├── armadito-pdf │ ├── Makefile.am │ └── main.c ├── cli_analyzer │ ├── Makefile │ ├── scandir.bat │ ├── scandir.sh │ └── main.c ├── perl_poc │ └── lib │ │ ├── conf │ │ └── Config.pm │ │ ├── analysis │ │ ├── CVEs.pm │ │ ├── ObjectAnalysis.pm │ │ └── DocumentStruct.pm │ │ └── utils │ │ └── CleanRewriting.pm └── cli_parser │ └── parser.c ├── autogen.sh ├── Makefile.am ├── .gitignore ├── sonar-project.properties ├── lib ├── libarmadito-pdf.pc.in ├── Makefile.am ├── TODO ├── src │ ├── log.c │ ├── osdeps.c │ ├── armaditopdf.c │ ├── pdfStructs.c │ ├── utils.c │ └── pdfStructAnalysis.c ├── includes │ ├── pdfParsing.h │ ├── pdfAnalysis.h │ ├── osdeps.h │ ├── log.h │ ├── utils.h │ ├── armaditopdf.h │ ├── filters.h │ └── pdfStructs.h └── Spec.txt ├── win32 └── ArmaditoPDF │ ├── ArmaditoPDF.sln │ └── ArmaditoPDF │ ├── ArmaditoPDF.vcxproj.filters │ └── ArmaditoPDF.vcxproj ├── README.md ├── CHANGES ├── configure.ac └── .travis.yml /module/modulePDF.sym: -------------------------------------------------------------------------------- 1 | module 2 | -------------------------------------------------------------------------------- /tools/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS=armadito-pdf -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -x 3 | aclocal --force 4 | libtoolize --force --automake --copy 5 | #autoheader --force 6 | automake --foreign --add-missing --force-missing --copy 7 | autoconf --force 8 | 9 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | if COND_LIBRARY 2 | LIB_DIR = lib 3 | endif 4 | 5 | if COND_MODULE 6 | MOD_DIR = module 7 | endif 8 | 9 | if COND_TOOLS 10 | TOOLS_DIR = tools 11 | endif 12 | 13 | SUBDIRS = $(LIB_DIR) $(TOOLS_DIR) $(MOD_DIR) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Makefile 2 | Makefile.in 3 | autom4te.cache/* 4 | compile 5 | config.log 6 | config.status 7 | configure 8 | aclocal.m4 9 | config.guess 10 | config.sub 11 | depcomp 12 | install-sh 13 | ltmain.sh 14 | missing 15 | *~ 16 | version.m4 17 | *.lo 18 | *.o 19 | -------------------------------------------------------------------------------- /sonar-project.properties: -------------------------------------------------------------------------------- 1 | sonar.projectKey=armadito:mod-pdf:DEV 2 | sonar.projectName=armadito-mod-pdf 3 | sonar.projectVersion=1.0 4 | sonar.sources=. 5 | sonar.branch=DEV 6 | sonar.exclusions=armadito-av/**/*,cov-int/**/* 7 | sonar.cfamily.build-wrapper-output=build-wrapper-out 8 | -------------------------------------------------------------------------------- /tools/armadito-pdf/Makefile.am: -------------------------------------------------------------------------------- 1 | AUTOMAKE_OPTIONS=subdir-objects no-dependencies 2 | 3 | AM_CPPFLAGS= 4 | armadito_pdf_LDADD= @LIBARMADITO_PDF_LIBS@ 5 | armadito_pdf_CFLAGS= @LIBARMADITO_PDF_CFLAGS@ 6 | #armadito_pdf_CFLAGS= -I$(top_srcdir)/lib/includes 7 | 8 | 9 | 10 | bin_PROGRAMS = armadito-pdf 11 | armadito_pdf_SOURCES= main.c -------------------------------------------------------------------------------- /lib/libarmadito-pdf.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | includedir=@includedir@ 4 | libdir=@libdir@ 5 | 6 | Name: libarmadito-pdf 7 | Description: Armadito PDF library 8 | URL: https://github.com/armadito/armadito-mod-pdf 9 | Version: @PACKAGE_VERSION@ 10 | Cflags: -I${includedir} 11 | Libs: -L${libdir} -larmadito-pdf -------------------------------------------------------------------------------- /tools/cli_analyzer/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | gcc -g -o a6oPDFAnalyzer -Wall -Wextra main.c ../../lib/src/*.c -I../../lib/includes 3 | 4 | lib: 5 | gcc -fPIC -g -c -Wall -Wextra src/*.c -Iincludes 6 | gcc -shared -Wl,-soname,a6oPDFAnalyzer.so.1 -o a6oPDFAnalyzer-1.0.1.so *.o -lc 7 | 8 | 9 | clean: 10 | rm a6oPDFAnalyzer 11 | rm *.o 12 | -------------------------------------------------------------------------------- /tools/cli_analyzer/scandir.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | set ANALYZER="a6oPDFAnalyzer.exe" 4 | set DIRPATH=%1 5 | 6 | if [%1]==[] goto :help 7 | 8 | REM for /R in %DIRPATH%\ %%A do echo "entry = %%A" 9 | REM - FOR /R [[drive:]path] %%parameter IN (set) DO command 10 | FOR /R %DIRPATH% %%E IN (*) DO echo %%E && %ANALYZER% %%E >> result.txt 11 | 12 | goto :end 13 | 14 | :help 15 | echo syntax: scandir.bat [directoryPath] 16 | 17 | :end 18 | echo - Exiting... -------------------------------------------------------------------------------- /module/Makefile.am: -------------------------------------------------------------------------------- 1 | AUTOMAKE_OPTIONS=subdir-objects 2 | 3 | modulesdir=$(libdir)/armadito/modules 4 | modules_LTLIBRARIES=modulePDF.la 5 | 6 | 7 | modulePDF_la_SOURCES= \ 8 | modulePDF.c \ 9 | modulePDF.h 10 | 11 | 12 | AM_CPPFLAGS= 13 | modulePDF_la_LDFLAGS= -module -avoid-version -export-symbols "$(srcdir)/modulePDF.sym" 14 | modulePDF_la_CFLAGS= -I$(top_srcdir)/lib/includes 15 | modulePDF_la_LIBADD=../lib/libarmadito-pdf.la 16 | modulePDF_la_CFLAGS+= @LIBARMADITO_CFLAGS@ 17 | modulePDF_la_LIBADD+= @LIBARMADITO_LIBS@ 18 | 19 | 20 | install-exec-hook: 21 | -rm -f "$(DESTDIR)$(modulesdir)"/modulePDF.la "$(DESTDIR)$(modulesdir)"/modulePDF.a 22 | 23 | install-data-hook: 24 | -rm -f "$(DESTDIR)$(modulesdir)"/modulePDF.la "$(DESTDIR)$(modulesdir)"/modulePDF.a 25 | 26 | EXTRA_DIST=modulePDF.sym -------------------------------------------------------------------------------- /tools/cli_analyzer/scandir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This script scan all pdf files in a directory given in parameter 4 | # return the results in another file given as second parameter 5 | # the stats are stored in the stat.txt file 6 | 7 | # check parameters 8 | 9 | # VARIABLES 10 | DIR=$1 11 | RES_FILE=$2 12 | EXE=./a6oPDFAnalyzer 13 | 14 | if [ -z "$1" ] 15 | then 16 | echo "Missing parameters"; 17 | echo "SYNTAX :: scandir.sh [directory] [result_file]"; 18 | exit -1; 19 | fi 20 | 21 | if [ -z "$2" ] 22 | then 23 | echo "Missing parameters" 24 | echo "SYNTAX :: scandir.sh [directory] [result_file]" 25 | ecit -2; 26 | fi 27 | 28 | for f in $DIR/* ; do 29 | echo "Processing $f ..."; 30 | $EXE "$f" >> $RES_FILE 31 | #mv "$f" $DIR/Treated/ 32 | done 33 | 34 | 35 | # Stats coef 36 | more $RES_FILE | grep -e 'Coef =' | sort | uniq -c > stats.txt 37 | 38 | 39 | exit 0; 40 | -------------------------------------------------------------------------------- /module/modulePDF.h: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | #define MALICIOUS_COEF 70 -------------------------------------------------------------------------------- /win32/ArmaditoPDF/ArmaditoPDF.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2013 4 | VisualStudioVersion = 12.0.31101.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ArmaditoPDF", "ArmaditoPDF\ArmaditoPDF.vcxproj", "{667A295C-61CD-47A7-AAFC-5B7F6088CDB5}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Release|Win32 = Release|Win32 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {667A295C-61CD-47A7-AAFC-5B7F6088CDB5}.Debug|Win32.ActiveCfg = Debug|Win32 15 | {667A295C-61CD-47A7-AAFC-5B7F6088CDB5}.Debug|Win32.Build.0 = Debug|Win32 16 | {667A295C-61CD-47A7-AAFC-5B7F6088CDB5}.Release|Win32.ActiveCfg = Release|Win32 17 | {667A295C-61CD-47A7-AAFC-5B7F6088CDB5}.Release|Win32.Build.0 = Release|Win32 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | EndGlobal 23 | -------------------------------------------------------------------------------- /tools/perl_poc/lib/conf/Config.pm: -------------------------------------------------------------------------------- 1 | package Config; 2 | 3 | use strict; 4 | 5 | # GLOBAL CONFIG 6 | our $DEBUG = "no"; 7 | our $ANALYSIS_TIMEOUT = 5; 8 | our $MAX_REP_DETECTION = 150; 9 | 10 | 11 | # PDF STRUCT TESTS coefs 12 | our $ENCRYPTED_PDF = "ENCRYPTED_PDF"; 13 | our $EMPTY_PAGES_WITH_ACTIVE_CONTENT = 99; 14 | our $EMPTY_PAGES_CONTENT = 70; 15 | our $OBJECT_COLLISION_PLUS_BAD_XREF = 90; 16 | our $OBJECT_COLLISION = 10; 17 | our $BAD_XREF_OFFSET = 30; 18 | our $TRAILER_NOT_FOUND = 30; 19 | our $BAD_TRAILER = 40; 20 | our $OBFUSCATED_OBJECTS = 40; 21 | our $MALICIOUS_URI = 50; 22 | our $MULTIPLE_HEADERS = 50; 23 | 24 | 25 | 26 | # OBJECT ANALYSIS TESTS coefs 27 | our $ACTIVE_CONTENT = 40; 28 | our $SHELLCODE = 40; 29 | our $PATTERN_REPETITION = 40; 30 | our $DANGEROUS_PATTERN_HIGH = 90; 31 | our $DANGEROUS_PATTERN_MEDIUM = 40; 32 | our $DANGEROUS_PATTERN_LOW = 20; 33 | our $TIME_EXCEEDED = 20; 34 | 35 | 36 | # CVEs TESTS 37 | our $CVE_2010_2883_DETECTED = 50; 38 | our $CVE_2010_2883_BAD_FONT_FILE_LENGTH = 40; 39 | 40 | 41 | our $MALWARE_DETECTION_COEF = 70; 42 | 43 | # 44 | 45 | 1;; 46 | -------------------------------------------------------------------------------- /lib/Makefile.am: -------------------------------------------------------------------------------- 1 | AUTOMAKE_OPTIONS=subdir-objects no-dependencies 2 | 3 | lib_LTLIBRARIES = libarmadito-pdf.la 4 | libarmadito_pdf_la_LDFLAGS = -version-number 0:12:6 5 | 6 | AM_CPPFLAGS= -I$(top_srcdir)/lib/includes 7 | 8 | libarmadito_pdf_la_SOURCES = \ 9 | $(top_srcdir)/lib/src/armaditopdf.c \ 10 | $(top_srcdir)/lib/src/filters.c \ 11 | $(top_srcdir)/lib/src/log.c \ 12 | $(top_srcdir)/lib/src/osdeps.c \ 13 | $(top_srcdir)/lib/src/pdfObjectsAnalysis.c \ 14 | $(top_srcdir)/lib/src/pdfParsing.c \ 15 | $(top_srcdir)/lib/src/pdfStructAnalysis.c \ 16 | $(top_srcdir)/lib/src/pdfStructs.c \ 17 | $(top_srcdir)/lib/src/utils.c 18 | 19 | armadito_pdfincludedir=$(includedir)/libarmadito-pdf 20 | 21 | armadito_pdfinclude_HEADERS =\ 22 | $(top_srcdir)/lib/includes/armaditopdf.h \ 23 | $(top_srcdir)/lib/includes/filters.h \ 24 | $(top_srcdir)/lib/includes/log.h \ 25 | $(top_srcdir)/lib/includes/miniz.c \ 26 | $(top_srcdir)/lib/includes/osdeps.h \ 27 | $(top_srcdir)/lib/includes/pdfAnalysis.h \ 28 | $(top_srcdir)/lib/includes/pdfParsing.h \ 29 | $(top_srcdir)/lib/includes/pdfStructs.h \ 30 | $(top_srcdir)/lib/includes/utils.h 31 | 32 | 33 | pkgconfigdir = $(libdir)/pkgconfig 34 | pkgconfig_DATA = libarmadito-pdf.pc 35 | 36 | libarmadito-pdf.pc: libarmadito-pdf.pc.in 37 | sed -e 's![@]prefix[@]!$(prefix)!g' \ 38 | -e 's![@]exec_prefix[@]!$(exec_prefix)!g' \ 39 | -e 's![@]includedir[@]!$(includedir)!g' \ 40 | -e 's![@]libdir[@]!$(libdir)!g' \ 41 | -e 's![@]PACKAGE_VERSION[@]!$(PACKAGE_VERSION)!g' \ 42 | $(top_srcdir)/lib/libarmadito-pdf.pc.in > $@ -------------------------------------------------------------------------------- /lib/TODO: -------------------------------------------------------------------------------- 1 | 2 | --------- 3 | TODO LIST 4 | --------- 5 | 6 | // TODO :: checkMagicNumber :: search the header in the 1024 first bytes. 7 | // TODO :: checkMagicNumber :: Thread XDP files. 8 | // TODO :: printAnalysisReport :: filter report informations by log level. 9 | // TODO :: getPDFContent :: set max_size limit. 10 | // TODO :: removeComments :: split this function (implement function get_line, etc.) 11 | // TODO :: check offset :: if the document has been uncommented. the offset should be incorrects. 12 | // TODO :: getObjectInfo :: fill obj->dico_len; 13 | // TODO :: replaceString :: replace all occurrences. 14 | // TODO :: Notation :: empty_doc_with_active_content (if no error). 15 | // TODO :: getActions :: get other potentially dangerous actions (OpenActions - GoToE - GoToR - etc.) 16 | // TODO :: decodeObjectStream :: check if the stream is encrypted. (/Encrypt in the dico) 17 | // TODO :: pdfParsing :: continue if the parsing failure is due to bad stream decode. 18 | // TODO :: getJSContentInXFA :: Check the keyword javascript 19 | // TODO :: decodeObjectStream :: do not try to decode an object twice. 20 | // TODO :: getEmbeddedFile :: Threat the case <> >> 21 | // TODO :: all :: set error codes. 22 | // TODO :: all :: use obj->dico_len instead of strlen(dico). 23 | // TODO :: documentStructureAnalysis :: check trailers. 24 | // TODO :: TOFIX :: filters implementations. 25 | // TODO :: TOFIX :: removeComments() function implementation. 26 | // TODO :: FlateDecode :: check if the stream is conform (Ex: '\r') 27 | // TODO :: all :: declare a public API. (for version 1.0.0) 28 | // TODO :: all :: api documentation. -------------------------------------------------------------------------------- /lib/src/log.c: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | #include "log.h" 23 | 24 | 25 | static enum log_level current_max_level = default_max_level; 26 | 27 | 28 | void set_current_log_level(enum log_level level){ 29 | 30 | current_max_level = level; 31 | 32 | return; 33 | } 34 | 35 | char * lvl_tostring(enum log_level level){ 36 | 37 | switch (level){ 38 | case LOG_LEVEL_ERROR: 39 | return ""; 40 | case LOG_LEVEL_WARNING: 41 | return ""; 42 | case LOG_LEVEL_INFO: 43 | return ""; 44 | case LOG_LEVEL_DEBUG: 45 | return ""; 46 | default: 47 | return ""; 48 | } 49 | 50 | } 51 | 52 | void cli_log(enum log_level level, const char * fmt, ...){ 53 | 54 | va_list ap; 55 | 56 | if (level > current_max_level) 57 | return; 58 | 59 | printf("%s ", lvl_tostring(level)); 60 | 61 | va_start(ap, fmt); 62 | vprintf(fmt, ap); 63 | va_end(ap); 64 | 65 | 66 | return; 67 | } -------------------------------------------------------------------------------- /lib/includes/pdfParsing.h: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | 23 | 24 | #ifndef _pdf_Parsing_h_ 25 | #define _pdf_Parsing_h_ 26 | 27 | 28 | #include "pdfStructs.h" 29 | 30 | 31 | #define LARGE_FILE_SIZE 1500000 32 | 33 | 34 | /***** pdf Parsing functions prototypes *****/ 35 | 36 | int parsePDF(struct pdfDocument * pdf); 37 | int checkMagicNumber(struct pdfDocument * pdf); 38 | int getPDFContent(struct pdfDocument * pdf); 39 | int extractObjectFromObjStream(struct pdfDocument * pdf, struct pdfObject *obj); 40 | int getObjectInfos(struct pdfObject * obj, struct pdfDocument * pdf); 41 | int getPDFObjects(struct pdfDocument * pdf); 42 | int getPDFTrailers(struct pdfDocument * pdf); 43 | int getPDFTrailers_2(struct pdfDocument * pdf); 44 | int decodeObjectStream(struct pdfObject * obj); 45 | int removeComments(struct pdfDocument * pdf); 46 | char * getObjectDictionary(struct pdfObject * obj, struct pdfDocument * pdf); 47 | char * getObjectType(struct pdfObject * obj); 48 | char * getObjectStream(struct pdfObject * obj); 49 | char * getStreamFilters(struct pdfObject * obj); 50 | char * hexaObfuscationDecode(char * dico); 51 | char *removeCommentLine(char * src, int size, int * ret_len); 52 | 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /lib/includes/pdfAnalysis.h: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | 23 | 24 | #ifndef _pdf_Analysis_h_ 25 | #define _pdf_Analysis_h_ 26 | 27 | 28 | #include "pdfStructs.h" 29 | 30 | 31 | /***** pdf Structure analysis functions prototypes *****/ 32 | int documentStructureAnalysis(struct pdfDocument * pdf); 33 | int checkXRef(struct pdfDocument * pdf); 34 | int checkEmptyDocument(struct pdfDocument * pdf); 35 | int checkTrailer(struct pdfDocument * pdf); 36 | 37 | 38 | /***** pdf Objects analysis functions prototypes *****/ 39 | int getDangerousContent(struct pdfDocument* pdf); 40 | int getJavaScript(struct pdfDocument * pdf, struct pdfObject* obj); 41 | int getXFA(struct pdfDocument * pdf, struct pdfObject* obj); 42 | int getEmbeddedFile(struct pdfDocument * pdf, struct pdfObject* obj); 43 | int getInfoObject(struct pdfDocument * pdf); 44 | int unknownPatternRepetition(char * stream, int size, struct pdfDocument * pdf, struct pdfObject * obj); 45 | int findDangerousKeywords(char * stream, struct pdfDocument * pdf, struct pdfObject * obj); 46 | int getURI(struct pdfDocument * pdf, struct pdfObject * obj); 47 | int getJSContentInXFA(char * stream, int size, struct pdfObject * obj, struct pdfDocument * pdf); 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /lib/includes/osdeps.h: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | #ifndef _os_deps_h_ 23 | #define _os_deps_h_ 24 | 25 | #include 26 | 27 | typedef int(*dirent_scan_cb)(int fd, char * filename); 28 | int os_scan_dir(char * path, int recurse, dirent_scan_cb dirent_cb, void * data); 29 | 30 | #ifdef _WIN32 31 | 32 | #include 33 | #include 34 | #define os_strncat strncat_s 35 | #define os_sprintf sprintf_s 36 | #define os_sscanf sscanf_s 37 | #define os_strncpy strncpy_s 38 | #define os_strdup _strdup 39 | #define os_lseek _lseek 40 | #define os_read _read 41 | #define os_fileno _fileno 42 | FILE * os_fopen(const char * filename, const char * mode); 43 | 44 | 45 | #else 46 | 47 | #include 48 | #define os_fopen fopen 49 | #define os_sprintf snprintf 50 | #define os_sscanf sscanf 51 | #define os_strdup strdup 52 | #define os_lseek lseek 53 | #define os_read read 54 | #define os_fileno fileno 55 | //#define os_sprintf(buffer,sizeOfBuffer, format,...) sprintf(buffer, format,...) 56 | int os_strncat(char *strDest, size_t numberOfElements, const char *strSource, size_t count); 57 | int os_strncpy(char *strDest, size_t numberOfElements, const char *strSource, size_t count); 58 | 59 | #endif 60 | 61 | #endif -------------------------------------------------------------------------------- /lib/includes/log.h: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | #ifndef _log_h_ 23 | #define _log_h_ 24 | 25 | #include 26 | #include 27 | 28 | 29 | #define default_max_level LOG_LEVEL_WARNING 30 | #define print_report 1 // print the analysis report. 31 | 32 | enum log_level { 33 | LOG_LEVEL_ERROR = 1 << 1, 34 | LOG_LEVEL_WARNING = 1 << 2, 35 | LOG_LEVEL_INFO = 1 << 3, 36 | LOG_LEVEL_DEBUG = 1 << 4, 37 | LOG_LEVEL_NONE = 1 << 5, 38 | }; 39 | 40 | void cli_log(enum log_level level, const char * fmt, ...); 41 | void set_current_log_level(enum log_level level); 42 | 43 | #ifdef _WIN32 44 | 45 | #define err_log(fmt, ...) cli_log(LOG_LEVEL_ERROR,(fmt),__VA_ARGS__) 46 | #define warn_log(fmt, ...) cli_log(LOG_LEVEL_WARNING,(fmt),__VA_ARGS__) 47 | #define dbg_log(fmt, ...) cli_log(LOG_LEVEL_DEBUG,(fmt),__VA_ARGS__) 48 | #define info_log(fmt, ...) cli_log(LOG_LEVEL_INFO,(fmt),__VA_ARGS__) 49 | 50 | #else 51 | 52 | #define err_log(fmt, ...) cli_log(LOG_LEVEL_ERROR,(fmt),##__VA_ARGS__) 53 | #define warn_log(fmt, ...) cli_log(LOG_LEVEL_WARNING,(fmt),##__VA_ARGS__) 54 | #define dbg_log(fmt, ...) cli_log(LOG_LEVEL_DEBUG,(fmt),##__VA_ARGS__) 55 | #define info_log(fmt, ...) cli_log(LOG_LEVEL_INFO,(fmt),##__VA_ARGS__) 56 | 57 | #endif 58 | 59 | 60 | #endif -------------------------------------------------------------------------------- /lib/includes/utils.h: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | 23 | 24 | #ifndef _utils_h_ 25 | #define _utils_h_ 26 | 27 | 28 | #include "pdfStructs.h" 29 | 30 | /* Utils functions prototypes */ 31 | 32 | void * searchPattern(char* src, char* pat , int pat_size , int size); 33 | struct pdfObject * getPDFObjectByRef(struct pdfDocument * pdf, char * ref); 34 | struct pdfObject * getPDFNextObjectByRef(struct pdfDocument * pdf, struct pdfObject * obj, char * ref); 35 | void printObject(struct pdfObject * obj); 36 | void printObjectByRef(struct pdfDocument * pdf, char * ref); 37 | void printObjectInFile(struct pdfObject * obj); 38 | void printPDFObjects(struct pdfDocument * pdf); 39 | int getNumber(char* ptr, int size); 40 | char* getNumber_s(char* ptr, int size); 41 | char * getIndirectRef(char * ptr, int size); 42 | char * getDelimitedStringContent(char * src, char * delimiter1, char * delimiter2, int src_len); 43 | char * getIndirectRefInString(char * ptr, int size); 44 | char * getPattern(char * ptr, int size, int len); 45 | char * getUnicodeInString(char * stream, int size); 46 | char * getHexa(char * dico, int size); 47 | char * replaceInString(char * src, char * toReplace , char * pat); 48 | char * toBinary(char * stream, int size); 49 | char * binarytoChar(char * binary, int size, int * returned_size); 50 | void printStream(char * stream, int size); 51 | void debugPrint(char * stream, int len); // print in a debug file 52 | 53 | 54 | 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /lib/includes/armaditopdf.h: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | 23 | 24 | #ifndef _armadito_pdf_h_ 25 | #define _armadito_pdf_h_ 26 | 27 | #include "pdfStructs.h" 28 | 29 | #define a6o_pdf_ver "0.12.6" 30 | 31 | 32 | // Tests Coefficients 33 | 34 | //#define bad_header 35 | //#define encrypted 36 | #define EMPTY_PAGE_CONTENT 99 37 | #define OBJECT_COLLISION 10 38 | #define OBJECT_COLLISION_AND_BAD_XREF 60 39 | #define BAD_TRAILER 40 40 | #define BAD_XREF_OFFSET 30 41 | #define BAD_OBJ_OFFSET 20 42 | #define OBFUSCATED_OBJECT 50 43 | #define MULTIPLE_HEADERS 50 44 | #define MALICIOUS_COMMENTS 50 45 | 46 | #define ACTIVE_CONTENT 40 47 | #define SHELLCODE 40 48 | #define PATTERN_HIGH_REPETITION 40 49 | #define DANGEROUS_KEYWORD_HIGH 90 50 | #define DANGEROUS_KEYWORD_MEDIUM 40 51 | #define DANGEROUS_KEYWORD_LOW 20 52 | #define TIME_EXCEEDED 20 53 | 54 | //#define LARGE_FILE_SIZE 1500000 55 | 56 | 57 | enum err_code { 58 | 59 | unexpected_error = -1 << 0, 60 | bad_pdf_version = -1 << 1, 61 | bad_pdf_header = -1 << 2, 62 | unsupported_file = -1 << 3, 63 | bad_xref_format = -1 << 4, 64 | bad_trailer_format = -1 << 5, 65 | bad_object_format = -1 << 6, 66 | stream_not_decoded = -1 << 7 67 | 68 | }; 69 | 70 | 71 | /* Functions */ 72 | char * getVersion(); 73 | int analyzePDF(char * filename); 74 | int analyzePDF_fd(int fd, char * filename); 75 | int analyzePDF_ex(int fd, char * filename); 76 | int calcSuspiciousCoefficient(struct pdfDocument * pdf); 77 | void printAnalysisReport(struct pdfDocument * pdf); 78 | 79 | 80 | 81 | #endif 82 | -------------------------------------------------------------------------------- /lib/includes/filters.h: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | 23 | 24 | #ifndef _filters_h_ 25 | #define _filters_h_ 26 | 27 | 28 | #include "pdfStructs.h" 29 | 30 | 31 | // LZWDecode 32 | #define FIRST_CODE 258 33 | #define EOD_MARKER 257 34 | #define CLEAR_TABLE 256 35 | 36 | #define MAX_CODES 512 37 | 38 | struct LZWdico{ 39 | 40 | unsigned short code; 41 | char * entry; 42 | int entry_len; 43 | 44 | struct LZWdico * next; 45 | }; 46 | 47 | /* Functions prototypes */ 48 | 49 | char * FlateDecode(char * stream, struct pdfObject* obj); 50 | char * ASCIIHexDecode(char * stream, struct pdfObject * obj); 51 | char * LZWDecode(char* stream, struct pdfObject * obj); 52 | char * ASCII85Decode(char * stream, struct pdfObject * obj); 53 | char * CCITTFaxDecode(char* stream, struct pdfObject * obj); 54 | 55 | // LZWDdecode functions. 56 | struct LZWdico * initDico(int code, char * entry); 57 | struct LZWdico * initDico_(int code, char * entry, int len); 58 | int addInDico(struct LZWdico * dico, int code, char * entry); 59 | void freeDico(struct LZWdico * dico); 60 | char * getEntryInDico(struct LZWdico * dico, int code); 61 | unsigned short readData(char ** data, unsigned int * partial_code, unsigned int * partial_bits, unsigned int code_len); 62 | void printDico(struct LZWdico * dico); 63 | 64 | // CCITTFaxDecode functions. 65 | int getRunLengthCodeInTable(char ** table, char * bits, int table_size); 66 | int getMakeUpCodeInTable(char ** table, char *bits, int table_size); 67 | 68 | 69 | // ASCII85Decode functions 70 | char * getTuple(char * data, int len); 71 | 72 | 73 | // CCITTFaxDecode 74 | 75 | extern char * WHITE_RUN_LENGTH_TERMINATING_CODES[]; 76 | extern char * BLACK_RUN_LENGTH_TERMINATING_CODES[]; 77 | extern char * WHITE_MAKE_UP_CODES[]; 78 | extern char * BLACK_MAKE_UP_CODES[]; 79 | extern int WHITE_BLACK_MAKE_UP_CODES_VALUES[]; 80 | 81 | 82 | #endif -------------------------------------------------------------------------------- /tools/perl_poc/lib/analysis/CVEs.pm: -------------------------------------------------------------------------------- 1 | package CVEs; 2 | 3 | use strict; 4 | 5 | my $DEBUG = "no"; 6 | 7 | # This function detects 8 | sub CVE_2010_2883_Detection{ 9 | 10 | my $ref = shift; 11 | my %pdfObjects = %{$ref}; 12 | my $fontfile; 13 | my $status = "none"; 14 | 15 | print "\n\n:::CVE_2010_2883_Detection:::\n" unless $DEBUG eq "no"; 16 | 17 | # Get font descriptors objects 18 | my @objs = values(%pdfObjects); 19 | foreach(@objs){ 20 | 21 | if( exists($_->{"type"}) && $_->{"type"} eq "/FontDescriptor" ){ 22 | print "Found FontDescriptor object :: $_->{ref}\n" unless $DEBUG eq "no"; 23 | 24 | if(exists($_->{"fontfile"}) && $_->{"fontfile"} =~ /(\d+\s\d\sR)/){ 25 | $fontfile = $1 ; 26 | $fontfile =~ s/R/obj/; 27 | print "font File found :: $fontfile\n" unless $DEBUG eq "no"; 28 | }else{ 29 | next; 30 | } 31 | 32 | # Get the font file stream 33 | if(exists($pdfObjects{$fontfile}) && exists($pdfObjects{$fontfile}->{"stream_d"}) && length($pdfObjects{$fontfile}->{"stream_d"}) > 0 ){ 34 | 35 | my $fontstream = $pdfObjects{$fontfile}->{"stream_d"}; 36 | #print "font stream = $fontstream\n"; 37 | 38 | # Check the length of the decoded stream /!\ 39 | #my $realen = length(); 40 | print "Lenght1 = ".$pdfObjects{$fontfile}->{"length1"}."\n" unless ($DEBUG eq "no" or ! exists($pdfObjects{$fontfile}->{"length1"})) ; 41 | print "Real length = ".length($fontstream)."\n" unless $DEBUG eq "no"; 42 | if(exists($pdfObjects{$fontfile}->{"length1"}) && $pdfObjects{$fontfile}->{"length1"} != length($fontstream)){ 43 | print "Warning :: Font File decoded stream Length is Wrong :: ".$pdfObjects{$fontfile}->{"length1"}." :: ".length($fontstream)."\n" unless $DEBUG eq "no"; 44 | #$TESTS_CAT_3{"CVE_2010_2883"} = "BAD_FONT_FILE_LENGTH"; 45 | $status = "BAD_FONT_FILE_LENGTH"; 46 | } 47 | 48 | # Check TrueType required tables 49 | # - cmap - glyf - head - hhea - hmtx - loca - maxp - name - post 50 | # Detect the SING ()Smart INdependent Glyphlets) string 51 | if($fontstream =~ /SING/ ){ 52 | print "Warning :: Found SING (Smart INdependent Glyphlets) :: Possible CVE_2010_2883\n" unless $DEBUG eq "yes"; 53 | #$TESTS_CAT_3{"CVE_2010_2883"} = "DETECTED"; 54 | $status = "DETECTED"; 55 | 56 | # TODO combine with previous test (bad_font_file_length) to detect CVE 57 | } 58 | 59 | 60 | }else{ 61 | print "Warning :: CVE_2010_2883_Detection :: Font File Object $fontfile is not defined :\n" unless $DEBUG eq "no"; 62 | } 63 | 64 | } 65 | } 66 | 67 | return $status; 68 | } 69 | 70 | 1; 71 | 72 | __END__ 73 | 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ARMADITO PDF ANALYZER 2 | ===================== 3 | [![Build Status](https://travis-ci.org/armadito/armadito-mod-pdf.svg?branch=DEV)](https://travis-ci.org/armadito/armadito-mod-pdf) 4 | 5 | Coverity Scan Build Status 7 | 8 | 9 | Armadito module PDF is an heuristic module for PDF documents analysis. 10 | 11 | Copyright (C) Teclib', 2015, 2016 12 | 13 | See Online documentation at : http://armadito-av.readthedocs.io/en/latest/ 14 | 15 | License : GPLv3 16 | 17 | What is it? 18 | ----------- 19 | 20 | Armadito PDF analyzer is a module for PDF documents scanning that includes: 21 | 22 | * a PDF parser 23 | 24 | * an heuristic analyzer that computes the document confidence level 25 | 26 | Licensing 27 | --------- 28 | 29 | Armadito PDF analyzer is licensed under the GPLv3 https://www.gnu.org/licenses/license-list.html#GNUGPLv3 30 | 31 | Dependencies 32 | ------------ 33 | 34 | > miniz.c 35 | 36 | FEATURES 37 | -------- 38 | 39 | ==> Parsing <== 40 | 41 | * Remove PostScript comments in the content of the document. 42 | * Get PDF version in header (Ex: %PDF-1.7). 43 | * Get trailers and xref table or xref objects. 44 | * Get objects informations described in the document (reference, dictionary, type, stream, filters, etc). 45 | * Extract objects embedded in stream objects. 46 | * Decode object streams encoded with filters : FlateDecode, ASCIIHexDecode, ASCII85Decode, LZWDecode, CCITTFaxDecode 47 | 48 | ==> Analysis <== 49 | 50 | * Tests based on PDF document structure (accodring to PDF specifications): 51 | - Check the PDF header version (from version 1.1 to 1.7). 52 | - Check if the content of the document is encrypted. 53 | - Check that the document contains non-empty pages. 54 | - Check object collision in object declaration. 55 | - Check trailers format. 56 | - Check xref table and xref object. 57 | - Check the presence of malicious Postscript comments (which could cause parsing errors). 58 | 59 | 60 | * Tests based on PDF objects content: 61 | - Get potentially malicious active contents (JavaScripts, Embedded files, Forms, URI, etc.) 62 | - JavaScript content analysis (malicious keywords, pattern repetition, unicode strings, etc). 63 | - Info object content analysis (search potentially malicious strings). 64 | - Check if object dictionary is hexa obfuscated. 65 | 66 | 67 | ==> Notation <== 68 | 69 | * A suspicious coefficient is attributed to each test. 70 | * Calc the suspicious coefficient of the pdf document. 71 | 72 | 73 | LIMITATIONS 74 | ----------- 75 | 76 | - Supported PDF versions are: %PDF-1.1 to %PDF-1.7. 77 | - PDF documents with encrypted content are not supported. 78 | - Removing comments is skipped for document > 2MB 79 | -------------------------------------------------------------------------------- /module/modulePDF.c: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | #include "modulePDF.h" 23 | 24 | struct modulePDF_data { 25 | const char *tmp_dir; 26 | int late_days; 27 | int critical_days; 28 | }; 29 | 30 | static enum a6o_mod_status modulePDF_init(struct a6o_module *module) { 31 | 32 | // This module doesn't need initialization. 33 | a6o_log(A6O_LOG_MODULE, A6O_LOG_LEVEL_INFO, "module PDF initialized successfully!"); 34 | return A6O_MOD_OK; 35 | } 36 | 37 | static enum a6o_mod_status modulePDF_close(struct a6o_module *module) { 38 | 39 | // This modules doesn't need close instruction. 40 | return A6O_MOD_OK; 41 | } 42 | 43 | static enum a6o_update_status modulePDF_info(struct a6o_module *module, struct a6o_module_info *info){ 44 | 45 | time_t ts = 0; 46 | struct tm timeptr = {0, 30, 8, 8, 5, 116}; // 01/03/2016 9:30 47 | 48 | ts=mktime(&timeptr); 49 | info->mod_update_ts = ts; 50 | 51 | return A6O_UPDATE_OK; 52 | } 53 | 54 | 55 | static enum a6o_file_status modulePDF_scan(struct a6o_module *module, int fd, const char *path, const char *mime_type, char **pmod_report) { 56 | 57 | enum a6o_file_status status = A6O_FILE_CLEAN; 58 | int ret = 0; 59 | 60 | 61 | // launch analysis 62 | ret = analyzePDF_ex(fd,path); 63 | 64 | if (ret == -1) { 65 | status = A6O_FILE_IERROR; 66 | } 67 | else if (ret == -2) { 68 | status = A6O_FILE_UNDECIDED; // Not supported files (encrypted contents or bad header). 69 | } 70 | else if (ret < MALICIOUS_COEF) { 71 | status = A6O_FILE_CLEAN; 72 | } 73 | else if (ret >= MALICIOUS_COEF) { 74 | status = A6O_FILE_SUSPICIOUS; 75 | 76 | *pmod_report = os_strdup("ModulePDF!SuspiciousPDF"); 77 | } 78 | 79 | return status; 80 | } 81 | 82 | 83 | static const char *modulePDF_mime_types[] = { 84 | "application/pdf", 85 | NULL, 86 | }; 87 | 88 | struct a6o_module module = { 89 | .init_fun = modulePDF_init, 90 | .conf_table = NULL, 91 | .post_init_fun = NULL, 92 | .scan_fun = modulePDF_scan, 93 | .close_fun = modulePDF_close, 94 | .info_fun = modulePDF_info, 95 | .supported_mime_types = modulePDF_mime_types, 96 | .name = "modulePDF", 97 | .size = sizeof(struct modulePDF_data), 98 | }; 99 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | :: Change Log :: 2 | All notable changes to the project Armadito-pdf will be documented in this file. 3 | This project adheres to [Semantic versionning](http://semver.org) 4 | 5 | 6 | Mon, 27 May 2016 :: 0.10.1 :: (ufausther) 7 | ----------------------------------------- 8 | *** New version 0.10.1 pushed in github (DEV branch). 9 | 10 | 11 | Wed, 1 June 2016 :: 0.11.0 :: (ufausther) 12 | ----------------------------------------- 13 | 14 | + add custom log function (cli_log with macro definition.) 15 | + add "filename" field to pdfDocument struct. 16 | + modif function printAnalysisReport(struct pdfDocument * pdf) prototype. 17 | + add errors count in analysis report. 18 | + add functions headers. 19 | + remove duplicated call to checkMagicNumber() function 20 | + rename function getNumber_a(...) => getNumber_s(...) 21 | + improve error handling. 22 | + FlateDecode filter : increase destination buffer size on MZ_BUFF_ERROR error. 23 | 24 | Fri, 03 June 2016 :: 0.11.1 :: (ufausther) 25 | ------------------------------------------ 26 | 27 | + FlateDecode filter : increase destination buffer size on MZ_BUFF_ERROR error. 28 | 29 | 30 | Mon, 06 June 2016 :: 0.11.1 :: (ufausther) 31 | ------------------------------------------ 32 | 33 | + translate/update README.md 34 | 35 | Tue, 07 June 2016 :: 0.12.0 :: (ufausther) 36 | ------------------------------------------ 37 | 38 | + Add header files (armaditopdf.h - pdfStructs.h - pdfParsing.h - pdfAnalysis.h - utils.h ) and corresponding sources files. 39 | + folders reorganisation. 40 | + define version string in header (#define a6o_pdf_ver) 41 | + Add change log file. 42 | 43 | Tue, 07 June 2016 :: 0.12.1 :: (ufausther) 44 | ------------------------------------------ 45 | 46 | + bug fix: getDelimitedStringContent() temporary buffer was not updated. 47 | 48 | 49 | Wed, 08 June 2016 :: 0.12.1 :: (ufausther) 50 | ------------------------------------------ 51 | 52 | + modif for linux compatibility. 53 | 54 | Thu, 09 June 2016 :: 0.12.2 :: (ufausther) 55 | ------------------------------------------ 56 | 57 | + fix some crashes detected with fuzzing 58 | 59 | Tue, 14 June 2016 :: 0.12.3 :: (ufausther) 60 | ------------------------------------------ 61 | 62 | + fix crash :: getStreamFilters :: malformed dictionary. 63 | + recursive scan fix :: scan was stopped on error. 64 | + checkXref :: skip white spaces at the end of the line. 65 | 66 | Wed, 15 June 2016 :: 0.12.3 :: (ufausther) 67 | ------------------------------------------ 68 | 69 | + fix craches :: extractObjectFromObjStream() :: invalid offset (/First) and number of object (/N) in dictionary 70 | 71 | 72 | Fri, 19 Aug 2016 :: 0.12.4 :: (ufausther) 73 | ------------------------------------------ 74 | 75 | + fix warning in function getTrailers :: test file = clam.pdf 76 | + getEmbeddedFile :: looking for FileSpec object is not necessary for embedded file detection. 77 | 78 | 79 | Thu, 9 Feb 2017 :: 0.12.5 :: (ufausther) 80 | ------------------------------------------ 81 | 82 | + Fix some memory leaks. 83 | + Fix default log level to warn. 84 | + Minors changes 85 | + pdfParsing: improve code. 86 | 87 | 88 | Mon, 20 Feb 2017 :: 0.12.6 :: (ufausther) 89 | ------------------------------------------ 90 | 91 | + Minors changes 92 | + Fix resource leaks 93 | + Get library version with getVersion() function. 94 | -------------------------------------------------------------------------------- /win32/ArmaditoPDF/ArmaditoPDF/ArmaditoPDF.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | {de5d87ae-3c70-4a71-877f-f5cef05b237e} 18 | 19 | 20 | {2cf89655-f2a4-4131-a986-3db75eda9ff0} 21 | 22 | 23 | 24 | 25 | Fichiers sources 26 | 27 | 28 | Fichiers sources 29 | 30 | 31 | Fichiers sources 32 | 33 | 34 | Fichiers sources 35 | 36 | 37 | Fichiers sources 38 | 39 | 40 | Fichiers sources 41 | 42 | 43 | Fichiers sources 44 | 45 | 46 | Fichiers sources 47 | 48 | 49 | Fichiers sources 50 | 51 | 52 | Fichiers sources\osdeps 53 | 54 | 55 | 56 | 57 | Fichiers d%27en-tête 58 | 59 | 60 | Fichiers d%27en-tête 61 | 62 | 63 | Fichiers d%27en-tête 64 | 65 | 66 | Fichiers d%27en-tête 67 | 68 | 69 | Fichiers d%27en-tête 70 | 71 | 72 | Fichiers d%27en-tête 73 | 74 | 75 | Fichiers d%27en-tête 76 | 77 | 78 | Fichiers d%27en-tête\osdeps 79 | 80 | 81 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | 2 | AC_INIT(armadito-pdf, [0.12.6], ufausther@teclib.com) 3 | AM_INIT_AUTOMAKE([foreign]) 4 | 5 | # collect arguments 6 | # debug 7 | AC_ARG_ENABLE([debug], 8 | AS_HELP_STRING([--enable-debug],[enable debugging @<:@default is yes@:>@]), 9 | , 10 | enable_debug="yes") 11 | 12 | # armadito pdf library 13 | AC_ARG_ENABLE([lib], 14 | AS_HELP_STRING([--enable-lib],[enable armadito-pdf library @<:@default is no@:>@]), 15 | enable_lib="yes", 16 | enable_lib="no") 17 | 18 | # module 19 | AC_ARG_ENABLE([module], 20 | AS_HELP_STRING([--enable-module],[enable armadito-av module @<:@default is no@:>@]), 21 | enable_module="yes", 22 | enable_module="no") 23 | 24 | 25 | # cli tools 26 | AC_ARG_ENABLE([tools], 27 | AS_HELP_STRING([--enable-tools],[enable armadito-pdf cli tools @<:@default is no@:>@]), 28 | [enable_tools="yes"], 29 | [enable_tools="no"]) 30 | 31 | PKG_PROG_PKG_CONFIG 32 | 33 | # check for analysis module sources 34 | AC_MSG_CHECKING(for armadito-pdf library sources) 35 | AC_CONFIG_SRCDIR(lib/src/armaditopdf.c) 36 | AC_MSG_RESULT(yes) 37 | 38 | # check for needed programs 39 | AC_PROG_CC([gcc]) 40 | AC_PROG_LIBTOOL 41 | 42 | # check for debug arg 43 | AC_MSG_CHECKING(for debug) 44 | AC_MSG_RESULT($enable_debug) 45 | AM_CONDITIONAL([DEBUG], [test "$enable_debug" = "yes"]) 46 | if test "$enable_debug" = "yes"; then 47 | AC_DEFINE(DEBUG,1,[Define DEBUG to enable debug messages]) 48 | CFLAGS="${CFLAGS} -g" 49 | fi 50 | 51 | 52 | # check for library arg 53 | AC_MSG_CHECKING(for armadito PDF library) 54 | AC_MSG_RESULT($enable_lib) 55 | AM_CONDITIONAL([COND_LIBRARY], [test "$enable_lib" = "yes"]) 56 | 57 | # check for module arg 58 | AC_MSG_CHECKING(for armadito module) 59 | AC_MSG_RESULT($enable_module) 60 | AM_CONDITIONAL([COND_MODULE], [test "$enable_module" = "yes"]) 61 | 62 | # check for libarmadito if module is enabled. 63 | if test "$enable_module" = "yes"; then 64 | # check for libarmadito 65 | PKG_CHECK_MODULES(LIBARMADITO, libarmadito, [HAVE_LIBARMADITO=yes], [HAVE_LIBARMADITO=no]) 66 | # check for libarmadito/armadito.h in source tree 67 | if test "$HAVE_LIBARMADITO" = "no"; then 68 | OLD_CPPFLAGS="$CPPFLAGS" 69 | CPPFLAGS=-I$srcdir/../../armadito-core/libmodule/include 70 | AC_CHECK_HEADER([libarmadito/armadito.h], [], [AC_MSG_ERROR([cannot find Armadito module library])], []) 71 | CPPFLAGS="$OLD_CPPFLAGS" 72 | LIBARMADITO_CFLAGS='-I$(top_srcdir)/../../armadito-core/libmodule/include' 73 | AC_SUBST([LIBARMADITO_CFLAGS]) 74 | fi 75 | fi 76 | 77 | # check for tools arg 78 | AC_MSG_CHECKING(for cli tools) 79 | AC_MSG_RESULT($enable_tools) 80 | AM_CONDITIONAL([COND_TOOLS], [test "$enable_tools" = "yes"]) 81 | 82 | # check for libarmadito-pdf if module is enabled. 83 | if test "$enable_tools" = "yes"; then 84 | 85 | # check for libarmadito-pdf 86 | PKG_CHECK_MODULES(LIBARMADITO_PDF, libarmadito-pdf, [HAVE_LIBARMADITO_PDF=yes], [HAVE_LIBARMADITO_PDF=no]) 87 | if test "$HAVE_LIBARMADITO_PDF" = "no"; then 88 | AC_MSG_ERROR("cannot find ARMADITO-PDF library: skipping armadito-pdf tool build.") 89 | fi 90 | fi 91 | 92 | 93 | # Output files 94 | AC_CONFIG_FILES([ 95 | Makefile 96 | ]) 97 | 98 | # conditional makefile for library. 99 | AM_COND_IF([COND_LIBRARY], 100 | AC_CONFIG_FILES([ 101 | lib/Makefile 102 | ])) 103 | 104 | # conditional makefile for tools. 105 | AM_COND_IF([COND_TOOLS], 106 | AC_CONFIG_FILES([ 107 | tools/Makefile 108 | tools/armadito-pdf/Makefile 109 | ])) 110 | 111 | # conditional makefile for module. 112 | AM_COND_IF([COND_MODULE], 113 | AC_CONFIG_FILES([ 114 | module/Makefile 115 | ])) 116 | 117 | AC_OUTPUT 118 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: C 2 | sudo: required 3 | dist: trusty 4 | addons: 5 | apt: 6 | packages: 7 | - libtool 8 | - libglib2.0-dev 9 | - libmagic-dev 10 | - libxml2-dev 11 | - libmicrohttpd-dev 12 | - libcurl4-openssl-dev 13 | - libjansson-dev 14 | - libjansson4 15 | - autoconf-archive 16 | coverity_scan: 17 | project: 18 | name: armadito/armadito-mod-pdf 19 | description: Build submitted via Travis CI 20 | notification_email: vhamon@teclib.com 21 | build_command_prepend: "./configure --enable-lib PKG_CONFIG_PATH=/home/travis/build/armadito-core/lib/pkgconfig; 22 | make clean" 23 | build_command: make -j 4 24 | branch_pattern: coverity_scan 25 | sonarqube: 26 | branches : 27 | - DEV 28 | deploy: 29 | skip_cleanup: true 30 | provider: launchpad 31 | slug: "~armadito/armadito-mod-pdf/+git/main" 32 | oauth_token: 33 | secure: d8X9y9hEPB0zgibcvVL2Q2qrDoM3o2/Wh28bZcD/A4TH5pgDK9oYbgtU5ywozK/CScpHEHmRTjZhwq9Mh4xVNni7XIKigpHbAKH0NhR6wKryQwR3VwUgRBKxp/UAnWFSv9RiPT7fG5tOQpHaD+8O8N06vLXZ0p4xV29jwnxPEvYEUt0fKmo4SWI18HQLpoFCuQLchgzvPs1X+iixWms5BmBlDIhnwx79geaMLF6sCc9f4TcvJ8yT9s4VbH/qYhsbCHB8lkRiBY1qQqw4sN67gQhJ9oY5J/AbGMJSdb4nimMhhUfkiME8iQVOx07YOaQJ6pMz5VYWpF7dOLlXdvEgc5caVs/gENmpb270aQHUSILn2A+3NI+LnSW3R3dkrtLU+HX9zVtMvnus/8T1f5KSecLvH+mRU4J6RTl9+MEDYA6sD59Ie2sBPqgljymgq8DJ8yoVqBzPzVCk95n5KTu21xobHgqXn4QfB/vGqQbE3U9XBFloALzsd4kMp+W37JtztVgaTlG+YNslCzWUP+f8pO6wHaRzJeVHC9/h24b5fk87TtvwTtVrxinNFXzj4i42A+6zlsW8BZXJ9I8jgaNhKGiWpHUy/iH9EADo2Y2rtYd09adAelEsIIJ4X+/N2+QfRjnDMGUV+8v3xUbLayQT3nb2G05X8YA19NlDoXZATYU= 34 | oauth_token_secret: 35 | secure: MnRk4gTNMwpSUodIwrEvdPg5mrufFjodZElpWomHg7gNBOfp/rsbzk4MEeu8sqxq1s0lUQUPzw4uiMy7tKy0x3Gn4XAhFMW9WDOtanDQV3qfPiFxlxHpfg2p+Tu5Ol6TnSYRI0l/vC3rN/pEJPOJoWJu6md6LEXX9U5Itub+rcmiMyygz11bMY46jY+vSZhLcT1LmgK6sojyCdIBFOjd+ACpvK+Lu/6Kj/QYhz3txOjMbsEgz+L2giSgs3j9zmpbg4zPFCL2Cq04muXVr3mOW1fmAXWop6DEs0rc9ovkqJwKaNcPWc1Inm5QDaAcR5ckeY/Oy4L/mHe2zv3/d7RpubrdudhFJzYogsHY2r7kGtQYNCfXaj5QHVRxCnvHhvMCiBI4lrqFZEMZii9eXSQJZJk32Cz20RiTvHeJwQsuukizfuHIzCNLwbb4aumR6m3uAL2U3QJE7j2ZfgkSF1boOvCQplCRiBL6kexNLHvTlVGHbt8a+Eptp0KE+7lKXk9o7CONWxLaCVGOe+2zTms7b5/4FYLD4gajoBRUmaTe0KZVlvvKaoQanj1+ay9SA7pIlBMHoTmnoHHZVu8l2lOERXd4VM1bE+zWH5MULy76cSIhuerwJg5rPUbizVpFsAVrk346gUKcGzWkibY2uSvUyVEoef2jiX62uJ9FUmVUgFo= 36 | on: 37 | tags: true 38 | env: 39 | global: 40 | - secure: ZqqOmdvcZ/tUhR87wlAhnDeFgkCywwYlAchKMIIGJPjGpsMEZU/m+kKi49YJEZrC6i7QO/WexCi3TVIFWaIGtWXCyri0rJIIDPdJtiJ3SNvDhwjcB+eNOsNVnv3Hg8OllZV3+GvoKNKNrwYtYraill4C3+8PeDLLpOI6wQmNJWUsLMRHlSXM6ZDS9bxILrNHFobvTdtuM9wkfwh0JfoIWMdYnzmqrRrhT+bEZaDHMHp7GTSiFBL2lvqlutMWMn8fvTLn35M8839WyFPcr/lnFZZZcSKJBqM8GySq1yNr9NNAoUUhhmP0Oope8iks59mkDQRyRbVpkOYQGkJChhGt2/LQE5W0WjpmWS+5cAqYFlqJKs2AFPNI9a94HI/XZjO0RwQqkpbNHc06wbPIYf0h1aPi75xYsC07NhdwvS40NYou82IjnpffdRDrqNcVP4yBASqaQlaDCrr35nPGm993vPCVa7i0NxrqWmtf5KFo/Nk6Lxh3nWFLABLZI8DEIogOCnVF2W+Q9LaSmbUcmsovV2XNdLNYoKETVlZp5F0h7mXjyYsRN8dexPZb8DBYNWCI5PbJS4gf98JjDRxKVuIbWqRg8W/KbMdU78d4aetznVeLkh7SnVyFtR4ImYEod1xFVdulh1F09VAfLBk/J6HTs84uqdW65w1TKHAtZyO/77Q= 41 | - secure: eFWWkuzR9vYPgXPz1f0JyZQFFdipAu5b5fOKXeDyzlb27DG0jQYUeRou6xdtXvH9jKgAtRxwa0IdtV+2cEW+deTr/bxvWlGGLHHnQSgtgvq/BgbDptuTsSVWfRhuNJogNGB5BaMd33lyX5nKrH6Jbz8+qp8DnE2l96kThSbByyamVG/7Q9sXAHEfCUI4UdRSuGijB18ezzzxloDuA3pZPaa6laDXxXSCVRyaY+qREL6qwhWK9LwNdqtSuqcFs5ppIYhz3c0KCMMKb86cZrtJTXolSK6Oe6sjXy+Pj+E1D895jZF2k1YDQ7YLFNhYyFClC85l2NvJBitr5l7FUHhQGsuLfFk3z2yMkJE9UbDOog98EjiRH/VRICReFrEN78D8k8JSziyIXB0FGHoIKEUXV3n9Og++AkFF4qdwxPC/VGGComZBAGC7VMEg67AIuG2vKoCgl7GHvPu/lW0cCZUE0wt+RZOXB6mJv6Fr9qmXWibarv/wA7gItDj1PP8pgxSWn69b4U8aNwoYHNYyBldqK7YRevqwAnaFfohMUmYPJnE80pWLDe2i/4T+Wl8XhuOubG9k3uncXZ6uB4mzVooITYiAzTzesqkU4ix62bm3C0g6h0XawQjzHbGpK0Je2AX7DMlCLyjef+R/4Cj6hP2rsK1IrJbh5McwoSTErydzQU8= 42 | before_install: 43 | - echo -n | openssl s_client -connect scan.coverity.com:443 | sed -ne '/-BEGIN CERTIFICATE-/,/-END 44 | CERTIFICATE-/p' | sudo tee -a /etc/ssl/certs/ca- 45 | - wget https://sonarqube.com/static/cpp/build-wrapper-linux-x86.zip 46 | - unzip build-wrapper-linux-x86.zip 47 | - export PATH=$PWD/build-wrapper-linux-x86:$PATH 48 | compiler: 49 | - gcc 50 | before_script: 51 | - git clone git://github.com/armadito/armadito-av -b DEV 52 | - cd armadito-av/ 53 | - "./autogen.sh" 54 | - mkdir -p /home/travis/build/armadito-core 55 | - "./configure --prefix=/home/travis/build/armadito-core" 56 | - make 57 | - make install 58 | - cd /home/travis/build/armadito/armadito-mod-pdf 59 | - "./autogen.sh" 60 | script: 61 | - "./configure --enable-lib PKG_CONFIG_PATH=/home/travis/build/armadito-core/lib/pkgconfig" 62 | - build-wrapper-linux-x86-64 --out-dir build-wrapper-out make clean all 63 | - sonar-scanner -X -Dsonar.host.url=https://sonarqube.com -Dsonar.login=$SONAR_TOKEN 64 | -------------------------------------------------------------------------------- /lib/src/osdeps.c: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | 23 | 24 | #include "osdeps.h" 25 | #include 26 | #include "log.h" 27 | 28 | 29 | #ifdef _WIN32 30 | 31 | FILE * os_fopen(const char * filename, const char * mode) { 32 | 33 | FILE * f = NULL; 34 | 35 | fopen_s(&f, filename,mode); 36 | 37 | return f; 38 | 39 | 40 | } 41 | 42 | 43 | int os_scan_dir(char * path, int recurse, dirent_scan_cb dirent_cb, void * data){ 44 | 45 | char * rpath = NULL, *entryPath = NULL; 46 | char * escapedPath = NULL; 47 | int ret = 0; 48 | int size = 0; 49 | HANDLE fh = NULL; 50 | WIN32_FIND_DATAA fdata; 51 | WIN32_FIND_DATAA tmp; 52 | int fd = -1; 53 | 54 | if (path == NULL || dirent_cb == NULL){ 55 | err_log("scan_dir :: invalid parameter\n"); 56 | return -1; 57 | } 58 | 59 | dbg_log("scan_dir :: path = %s\n", path); 60 | 61 | // Check if it is a directory // TODO :: os_scan_dir :: scan a file. 62 | if (!(GetFileAttributesA(path) & FILE_ATTRIBUTE_DIRECTORY)) { 63 | err_log("scan_dir :: (%s) is not a directory\n", path); 64 | return -2; 65 | } 66 | 67 | size = strlen(path) + 3; 68 | rpath = (char*)calloc(size + 1, sizeof(char)); 69 | rpath[size] = '\0'; 70 | sprintf_s(rpath, size, "%s\\*", path); 71 | 72 | dbg_log("scan_dir :: rpath = %s\n",rpath); 73 | 74 | /* 75 | FindFirstFile note 76 | Be aware that some other thread or process could create or delete a file with this name between the time you query for the result and the time you act on the information. If this is a potential concern for your application, one possible solution is to use the CreateFile function with CREATE_NEW (which fails if the file exists) or OPEN_EXISTING (which fails if the file does not exist). 77 | */ 78 | fh = FindFirstFile(rpath, &fdata); 79 | if (fh == INVALID_HANDLE_VALUE) { 80 | warn_log("scan_dir :: FindFirstFileA call failed :: err= [%d]\n", GetLastError()); 81 | goto clean; 82 | } 83 | 84 | while (fh != INVALID_HANDLE_VALUE && FindNextFile(fh, &tmp) != FALSE) { 85 | 86 | // exclude paths "." and ".." 87 | if (!strcmp(tmp.cFileName, ".") || !strcmp(tmp.cFileName, "..")) 88 | continue; 89 | 90 | // build the entry complete path. 91 | size = strlen(path) + strlen(tmp.cFileName) + 2; 92 | 93 | entryPath = (char*)calloc(size + 1, sizeof(char)); 94 | entryPath[size] = '\0'; 95 | sprintf_s(entryPath, size, "%s\\%s", path, tmp.cFileName); 96 | dbg_log("scan_dir :: cfilename = %s\n", &tmp.cFileName); 97 | dbg_log("scan_dir :: entryPath = %s\n", entryPath); 98 | 99 | 100 | // If it is a directory and we do recursive scan 101 | if ((GetFileAttributesA(entryPath) & FILE_ATTRIBUTE_DIRECTORY) && recurse >= 1) { 102 | 103 | ret = os_scan_dir(entryPath, recurse, dirent_cb, data); 104 | if (ret != 0){ 105 | free(entryPath); 106 | break; 107 | } 108 | } 109 | else { 110 | 111 | (*dirent_cb)(fd,entryPath); 112 | } 113 | 114 | free(entryPath); 115 | entryPath = NULL; 116 | } 117 | 118 | 119 | // TODO :: os_scan_dir :: get stats. 120 | 121 | clean: 122 | if (rpath != NULL){ 123 | free(rpath); 124 | rpath = NULL; 125 | } 126 | FindClose(fh); 127 | 128 | return ret; 129 | 130 | } 131 | 132 | 133 | 134 | #else 135 | 136 | // Linux part for compatibility. 137 | int os_strncat(char *strDest, size_t numberOfElements, const char *strSource, size_t count) { 138 | 139 | 140 | if( strncat(strDest, strSource, count) == NULL){ 141 | return -1; 142 | } 143 | 144 | 145 | return 0; 146 | 147 | } 148 | 149 | int os_strncpy(char *strDest, size_t numberOfElements, const char *strSource, size_t count) { 150 | 151 | 152 | if( strncpy(strDest , strSource, count) == NULL){ 153 | return -1; 154 | } 155 | 156 | return 0; 157 | } 158 | 159 | 160 | int os_scan_dir(char * path, int recurse, dirent_scan_cb dirent_cb, void * data){ 161 | 162 | int ret = 0; 163 | 164 | return ret; 165 | } 166 | 167 | 168 | #endif -------------------------------------------------------------------------------- /tools/cli_analyzer/main.c: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | 23 | 24 | #include "armaditopdf.h" 25 | #include "osdeps.h" 26 | #include "log.h" 27 | #include "getopt.h" 28 | 29 | 30 | struct scan_options { 31 | char *path_to_scan; 32 | enum log_level log_level; 33 | }; 34 | 35 | 36 | static struct option cli_option_def[] = { 37 | {"help", no_argument, 0, 'h'}, 38 | {"version", no_argument, 0, 'v'}, 39 | {"log-level", required_argument, 0, 'l'}, 40 | {0, 0, 0, 0} 41 | }; 42 | 43 | 44 | void Usage(){ 45 | 46 | fprintf(stderr, "usage: armadito-pdf [options] FILE|DIR\n"); 47 | fprintf(stderr, "\n"); 48 | fprintf(stderr, "Armadito PDF scanner\n"); 49 | fprintf(stderr, "\n"); 50 | fprintf(stderr, "Options:\n"); 51 | fprintf(stderr, " --help -h print help and quit\n"); 52 | fprintf(stderr, " --version -V print program version\n"); 53 | fprintf(stderr, " --log-level=LEVEL | -l LEVEL set log level [debug=X; warning=Y; error=Z]\n"); 54 | fprintf(stderr, "\n"); 55 | 56 | exit(-1); 57 | 58 | } 59 | 60 | void Version(){ 61 | 62 | printf("armadito-pdf v%s (c) 2015 - 2017 by Teclib\n",a6o_pdf_ver); 63 | exit(1); 64 | } 65 | 66 | 67 | int parse_options(int argc, char ** argv, struct scan_options * opts){ 68 | 69 | while(1){ 70 | 71 | int c, option_index = 0; 72 | 73 | c = getopt_long (argc, argv, "hvil:", cli_option_def, &option_index); 74 | 75 | /* Detect the end of the options. */ 76 | if (c == -1){ 77 | break; 78 | } 79 | 80 | switch(c){ 81 | 82 | case 'h': 83 | Usage(); 84 | break; 85 | 86 | case 'v': 87 | Version(); 88 | break; 89 | 90 | case 'l': 91 | 92 | if(!strcmp("error",optarg)) 93 | opts->log_level = LOG_LEVEL_ERROR; 94 | else if(!strcmp("warn",optarg)) 95 | opts->log_level = LOG_LEVEL_WARNING; 96 | else if(!strcmp("info",optarg)) 97 | opts->log_level = LOG_LEVEL_INFO; 98 | else if(!strcmp("debug",optarg)) 99 | opts->log_level = LOG_LEVEL_DEBUG; 100 | else if(!strcmp("none",optarg)) 101 | opts->log_level = LOG_LEVEL_NONE; 102 | else{ 103 | fprintf(stderr, "Option Error: Bad log level value\n"); 104 | Usage(); 105 | abort(); 106 | } 107 | break; 108 | 109 | default: 110 | abort(); 111 | break; 112 | } 113 | 114 | } 115 | 116 | if (optind < argc){ 117 | 118 | opts->path_to_scan = strdup(argv[optind]); 119 | 120 | }else{ 121 | fprintf(stderr, "Argument Error: Missing file or directory path\n"); 122 | Usage(); 123 | } 124 | 125 | return 0; 126 | } 127 | 128 | 129 | // Launch a scan directory 130 | int do_scan(struct scan_options * opts){ 131 | 132 | int ret; 133 | FILE * f = NULL; 134 | int fd = -1; 135 | 136 | // analysis with opened file descriptor. 137 | if(!(f = os_fopen(opts->path_to_scan,"rb"))){ 138 | err_log("Can't open file %s\n", opts->path_to_scan); 139 | return -1; 140 | } 141 | 142 | fd = os_fileno(f); 143 | ret = analyzePDF_ex(fd, opts->path_to_scan); 144 | fclose(f); 145 | 146 | return ret; 147 | } 148 | 149 | 150 | // launch a task according to options and parameters. 151 | int process_opts(struct scan_options * opts){ 152 | 153 | 154 | if(opts == NULL || opts->path_to_scan == NULL){ 155 | return -1; 156 | } 157 | 158 | // Set log level 159 | if(opts->log_level > 0) 160 | set_current_log_level(opts->log_level); 161 | 162 | return do_scan(opts); 163 | 164 | } 165 | 166 | 167 | int main (int argc, char ** argv){ 168 | 169 | int ret = 0; 170 | struct scan_options * opts = NULL; 171 | 172 | if( !(opts = (struct scan_options*)calloc(1,sizeof(struct scan_options)))){ 173 | err_log("Memory allocation failed!\n"); 174 | return -1; 175 | } 176 | 177 | opts->log_level = -1; 178 | opts->path_to_scan = NULL; 179 | 180 | parse_options(argc,argv,opts); 181 | 182 | ret = process_opts(opts); 183 | 184 | if(opts->path_to_scan != NULL){ 185 | free(opts->path_to_scan); 186 | opts->path_to_scan = NULL; 187 | } 188 | 189 | free(opts); 190 | opts = NULL; 191 | 192 | return ret; 193 | 194 | } -------------------------------------------------------------------------------- /tools/cli_parser/parser.c: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | 23 | 24 | #include "armaditopdf.h" 25 | 26 | 27 | 28 | void Banner(){ 29 | 30 | printf("----------------------------\n"); 31 | printf("-- ARMADITO PDF ANALYZER --\n"); 32 | printf("----------------------------\n\n"); 33 | 34 | return; 35 | } 36 | 37 | 38 | void Helper(){ 39 | 40 | printf("ARMADITO PDF ANALYZER :: No file in parameter\n"); 41 | printf("Command : ./pdfAnalyzer [filename]\n\n"); 42 | 43 | return; 44 | } 45 | 46 | void Commands(){ 47 | 48 | Banner(); 49 | 50 | printf("Commands list:\n"); 51 | printf("- avscan :: launch a complete analysis and display report\n"); 52 | printf("- decode [obj_ref] :: decode object stream\n"); 53 | printf("- dump [obj_ref] :: dump object stream\n"); 54 | printf("- exit :: exit the parser.\n"); 55 | printf("- object [obj_ref] :: display object infos\n"); 56 | printf("- quit :: exit the parser.\n"); 57 | printf("\n"); 58 | printf("objects actions:\n"); 59 | printf("- decode [obj_ref] :: decode object stream\n"); 60 | printf("- object [obj_ref] :: display object infos\n"); 61 | printf("\n"); 62 | 63 | return; 64 | } 65 | 66 | 67 | 68 | 69 | int main (int argc, char ** argv){ 70 | 71 | int ret; 72 | FILE * f = NULL; 73 | int fd = -1; 74 | struct pdfDocument * pdf = NULL; 75 | struct pdfObject * obj = NULL; 76 | char cmd[512] = {0}; 77 | char params[512] = {0}; 78 | 79 | 80 | #ifdef DEBUG 81 | Banner(); 82 | #endif 83 | 84 | if(argc < 2){ 85 | Helper(); 86 | return (-1); 87 | } 88 | 89 | //printf ("Analyzing file : %s\n",argv[1]); 90 | if(!(f = os_fopen(argv[1],"rb"))){ 91 | printf("[-] Error :: main :: Error while opening file %s\n",argv[1]); 92 | return -1; 93 | } 94 | 95 | 96 | // Initialize the pdfDocument struct 97 | if(!(pdf = initPDFDocument())){ 98 | printf("[-] Error :: analyzePDF :: Error while allocating memory for pdfDocument structure\n"); 99 | fclose(f); 100 | return -1; 101 | } 102 | pdf->fh = f; 103 | 104 | if ( parsePDF(pdf)< 0){ 105 | printf("[-] Error :: parsing error\n"); 106 | return -2; 107 | } 108 | 109 | 110 | 111 | while(1){ 112 | 113 | printf("enter a command:\nUHPDF>"); 114 | scanf("%s",&cmd); 115 | //scanf("%[^\t\r\n]",&cmd); 116 | 117 | if(strncmp(cmd,"quit",4) == 0 || strncmp(cmd,"exit",4) == 0 ){ 118 | 119 | break; 120 | 121 | }else if(strncmp(cmd,"help",4) == 0){ 122 | 123 | Commands(); 124 | 125 | }else if(strncmp(cmd,"avscan",6) == 0){ 126 | 127 | printf("[TODO] :: av scan\n"); 128 | 129 | }else if(strncmp(cmd,"info",4) == 0){ 130 | 131 | printf("[TODO] :: display infos.\n"); 132 | 133 | }else if(strncmp(cmd,"obj",3) == 0){ 134 | 135 | //printf("[TODO] :: display object. %s \n",params); 136 | printf("Enter an object reference: UHPDF>"); 137 | scanf("%10s",params); 138 | //printf("params = %s\n",params ); 139 | sprintf(ref, "%s 0 obj",params ); 140 | printf("object = %s\n",ref ); 141 | 142 | //printf("Decoding object :: %s\n","83 0 obj"); 143 | obj = getPDFObjectByRef(pdf,ref); 144 | 145 | if(obj == NULL){ 146 | printf("[-] Error :: Object [%s] not found!\n",ref); 147 | continue; 148 | } 149 | 150 | printf("Display object :: %s\n","83 0 obj"); 151 | 152 | printObject(obj); 153 | 154 | 155 | 156 | }else if(strncmp(cmd,"decode",6) == 0){ 157 | 158 | printf("[TODO] :: display object.\n"); 159 | 160 | printf("Enter object reference: UHPDF>"); 161 | scanf("%s",params); 162 | //printf("params = %s\n",params ); 163 | sprintf(ref, "%s 0 obj",params ); 164 | printf("object = %s\n",ref ); 165 | 166 | obj = getPDFObjectByRef(pdf,ref); 167 | 168 | if(obj == NULL){ 169 | printf("[-] Error :: Object [%s] not found!\n",ref); 170 | continue; 171 | } 172 | decodeObjectStream(obj); 173 | 174 | 175 | } 176 | else{ 177 | printf("Command [%s] not found. See Help (command: help)!\n",cmd); 178 | } 179 | 180 | 181 | 182 | } 183 | 184 | //fclose(f); 185 | freePDFDocumentStruct(pdf); 186 | 187 | //system("pause"); 188 | 189 | return ret; 190 | } -------------------------------------------------------------------------------- /tools/armadito-pdf/main.c: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015 - 2017 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | 23 | #include "libarmadito-pdf/armaditopdf.h" 24 | #include "libarmadito-pdf/osdeps.h" 25 | #include "libarmadito-pdf/log.h" 26 | #include "getopt.h" 27 | 28 | #define TOOL_VERSION "0.13.2" 29 | 30 | 31 | struct scan_options { 32 | char *path_to_scan; 33 | enum log_level log_level; 34 | }; 35 | 36 | 37 | static struct option cli_option_def[] = { 38 | {"help", no_argument, 0, 'h'}, 39 | {"version", no_argument, 0, 'v'}, 40 | {"log-level", required_argument, 0, 'l'}, 41 | {0, 0, 0, 0} 42 | }; 43 | 44 | 45 | void Usage(){ 46 | 47 | fprintf(stderr, "usage: armadito-pdf [options] FILE\n"); 48 | fprintf(stderr, "\n"); 49 | fprintf(stderr, "Armadito PDF scanner\n"); 50 | fprintf(stderr, "\n"); 51 | fprintf(stderr, "Options:\n"); 52 | fprintf(stderr, " --help -h print help and quit\n"); 53 | fprintf(stderr, " --version -V print program version\n"); 54 | fprintf(stderr, " --log-level=LEVEL | -l LEVEL set log level [error, warn, info, debug, none]\n"); 55 | fprintf(stderr, "\n"); 56 | 57 | exit(-1); 58 | 59 | } 60 | 61 | void Version(){ 62 | 63 | printf("armadito-pdf v%s (using libarmadito-pdf v%s) \nCopyright (C) 2015 - 2017 by Teclib.\n",TOOL_VERSION,getVersion()); 64 | exit(1); 65 | } 66 | 67 | 68 | int parse_options(int argc, char ** argv, struct scan_options * opts){ 69 | 70 | while(1){ 71 | 72 | int c, option_index = 0; 73 | 74 | c = getopt_long (argc, argv, "hvil:", cli_option_def, &option_index); 75 | 76 | /* Detect the end of the options. */ 77 | if (c == -1){ 78 | break; 79 | } 80 | 81 | switch(c){ 82 | 83 | case 'h': 84 | Usage(); 85 | break; 86 | 87 | case 'v': 88 | Version(); 89 | break; 90 | 91 | case 'l': 92 | 93 | if(!strcmp("error",optarg)) 94 | opts->log_level = LOG_LEVEL_ERROR; 95 | else if(!strcmp("warn",optarg)) 96 | opts->log_level = LOG_LEVEL_WARNING; 97 | else if(!strcmp("info",optarg)) 98 | opts->log_level = LOG_LEVEL_INFO; 99 | else if(!strcmp("debug",optarg)) 100 | opts->log_level = LOG_LEVEL_DEBUG; 101 | else if(!strcmp("none",optarg)) 102 | opts->log_level = LOG_LEVEL_NONE; 103 | else{ 104 | fprintf(stderr, "Option Error: Bad log level value\n"); 105 | Usage(); 106 | abort(); 107 | } 108 | break; 109 | 110 | default: 111 | abort(); 112 | break; 113 | } 114 | 115 | } 116 | 117 | if (optind < argc){ 118 | 119 | opts->path_to_scan = strdup(argv[optind]); 120 | 121 | }else{ 122 | fprintf(stderr, "Argument Error: Missing file or directory path\n"); 123 | Usage(); 124 | } 125 | 126 | return 0; 127 | } 128 | 129 | 130 | // Launch a scan directory 131 | int do_scan(struct scan_options * opts){ 132 | 133 | int ret; 134 | FILE * f = NULL; 135 | int fd = -1; 136 | 137 | // analysis with opened file descriptor. 138 | if(!(f = os_fopen(opts->path_to_scan,"rb"))){ 139 | err_log("Can't open file %s\n", opts->path_to_scan); 140 | return -1; 141 | } 142 | 143 | fd = os_fileno(f); 144 | ret = analyzePDF_ex(fd, opts->path_to_scan); 145 | fclose(f); 146 | 147 | return ret; 148 | } 149 | 150 | 151 | // launch a task according to options and parameters. 152 | int process_opts(struct scan_options * opts){ 153 | 154 | 155 | if(opts == NULL || opts->path_to_scan == NULL){ 156 | return -1; 157 | } 158 | 159 | // Set log level 160 | if(opts->log_level > 0) 161 | set_current_log_level(opts->log_level); 162 | 163 | return do_scan(opts); 164 | 165 | } 166 | 167 | 168 | int main (int argc, char ** argv){ 169 | 170 | int ret = 0; 171 | struct scan_options * opts = NULL; 172 | 173 | if( !(opts = (struct scan_options*)calloc(1,sizeof(struct scan_options)))){ 174 | err_log("Memory allocation failed!\n"); 175 | return -1; 176 | } 177 | 178 | opts->log_level = -1; 179 | opts->path_to_scan = NULL; 180 | 181 | parse_options(argc,argv,opts); 182 | 183 | ret = process_opts(opts); 184 | 185 | if(opts->path_to_scan != NULL){ 186 | free(opts->path_to_scan); 187 | opts->path_to_scan = NULL; 188 | } 189 | 190 | free(opts); 191 | opts = NULL; 192 | 193 | return ret; 194 | 195 | } -------------------------------------------------------------------------------- /lib/includes/pdfStructs.h: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | 23 | 24 | #ifndef _pdf_Structs_h_ 25 | #define _pdf_Structs_h_ 26 | 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | 33 | // PDF object structure 34 | struct pdfObject{ 35 | 36 | char * reference; // reference of the object Ex : 12 0 obj 37 | char * content; // The content of the object obj...endobj 38 | char * dico; // The dictionary (if any) 39 | char * type; // The type of the object (if any) 40 | char * stream; // The content stream. stream...endstream 41 | char * filters; 42 | char * decoded_stream; 43 | int offset; // offset (in byte) in the file 44 | int stream_size; // Size in byte of the object's stream 45 | int tmp_stream_size; // temp size of the stream (between two decoding process) 46 | int decoded_stream_size; // Size in byte of the object's decoded stream 47 | int content_size; // size in byte of the object's content 48 | int errors; // errors in parsing 49 | 50 | struct pdfObject* next; // next object in the list. 51 | 52 | 53 | }; 54 | 55 | 56 | // PDF Trailer structure 57 | struct pdfTrailer{ 58 | 59 | int offset; // offset in the document 60 | char * content; // content of the trailer 61 | char * dico; 62 | struct pdfTrailer* next; // next trailer in the document 63 | 64 | }; 65 | 66 | 67 | // PDF Cross-reference table structure 68 | struct pdfXRef{ 69 | 70 | int offset; // offset in the document 71 | char * content; // content of the XRef 72 | struct pdfXRef* next; // next trailer in the document 73 | 74 | }; 75 | 76 | 77 | // Suit of tests according to the PDF structure specifications. 78 | struct testsPDFStruct{ 79 | 80 | int bad_header; // when the PDF header is incorrect 81 | int encrypted; // when the document is encrypted 82 | int empty_page_content; // when all pages are empty of content 83 | int object_collision; // when two objects have the same reference in the document. 84 | int bad_trailer; // when the trailer is in an incorrect form 85 | int bad_xref_offset; // when the offset of the xref table is incorrect; 86 | int bad_obj_offset; // When at least an object's offset in the reference table is incorrect 87 | int obfuscated_object; // when an object dictionary is obfuscated within hexa 88 | int multiple_headers; // when several headers are found in the document. 89 | int large_file; 90 | int comments; // If PostScript comments are found in pdf. 91 | int malicious_comments; // Malicious comments found (potentially defeat pdf parsers). 92 | 93 | }; 94 | 95 | 96 | // Suit of tests for PDF objects content 97 | struct testsPDFObjAnalysis{ 98 | 99 | int active_content; // presence of js, embedded files, or forms. 100 | int shellcode; // presence of shellcode in an object stream content 101 | int pattern_high_repetition; // high scale repetition of a pattern in a stream content 102 | int dangerous_keyword_high; // potentially dangerous keyword (high level) 103 | int dangerous_keyword_medium; // potentially dangerous keyword (medium level) 104 | int dangerous_keyword_low; // potentially dangerous keyword (lowlevel) 105 | int time_exceeded; // when the analysis of an object stream exceed a given duration. 106 | 107 | int js; // number of js content 108 | int xfa; // number of xfa objects 109 | int ef; // number of ef objects 110 | 111 | 112 | }; 113 | 114 | 115 | // PDF Document structure 116 | struct pdfDocument{ 117 | 118 | FILE * fh; // File handle of the document 119 | int fd; 120 | char * fname; 121 | char * content; 122 | struct pdfObject * objects; // List of objects 123 | int coef; // Suspicious coefficient 124 | int size; // size in bytes of the PDF 125 | char * version; // PDF specification version 126 | struct pdfTrailer* trailers; 127 | struct pdfXRef* xref; 128 | struct testsPDFStruct * testStruct; 129 | struct testsPDFObjAnalysis * testObjAnalysis; 130 | double scan_time; // time elapsed in second for parse or scan. 131 | int errors; // treatment errors 132 | 133 | }; 134 | 135 | 136 | 137 | /* pdf structures functions prototypes */ 138 | 139 | struct pdfDocument* initPDFDocument(); 140 | struct pdfObject* initPDFObject(); 141 | struct pdfTrailer* initPDFTrailer(); 142 | struct testsPDFStruct * initTestsPDFStruct(); 143 | struct testsPDFObjAnalysis * initTestsPDFObjAnalysisStruct(); 144 | 145 | void freePDFDocumentStruct(struct pdfDocument * pdf); 146 | void freePDFObjectStruct(struct pdfObject * obj); 147 | void freePDFTrailerStruct(struct pdfTrailer * trailer); 148 | 149 | int addObjectInList(struct pdfObject* obj, struct pdfDocument* pdf); 150 | int addTrailerInList(struct pdfDocument * pdf, struct pdfTrailer * trailer); 151 | 152 | 153 | #endif 154 | -------------------------------------------------------------------------------- /win32/ArmaditoPDF/ArmaditoPDF/ArmaditoPDF.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {667A295C-61CD-47A7-AAFC-5B7F6088CDB5} 15 | Win32Proj 16 | ArmaditoPDF 17 | 18 | 19 | 20 | Application 21 | true 22 | v120 23 | MultiByte 24 | 25 | 26 | Application 27 | false 28 | v120 29 | true 30 | MultiByte 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | true 44 | 45 | 46 | false 47 | 48 | 49 | 50 | 51 | 52 | Level3 53 | Disabled 54 | WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 55 | true 56 | $(SolutionDir)..\..\lib\includes;%(AdditionalIncludeDirectories) 57 | 58 | 59 | Console 60 | true 61 | 62 | 63 | 64 | 65 | Level3 66 | 67 | 68 | MaxSpeed 69 | true 70 | true 71 | WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) 72 | true 73 | $(SolutionDir)..\..\includes;%(AdditionalIncludeDirectories) 74 | 75 | 76 | Console 77 | true 78 | true 79 | true 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /lib/Spec.txt: -------------------------------------------------------------------------------- 1 | ::::::::::::::::::: PDF ANALYZER SPECIFICATIONS ::::::::::::::::::::::: 2 | 3 | Author: Ulrich FAUSTHER 4 | Modification date: 21/01/2015 5 | ----------------------------------------------------------------------- 6 | 7 | Plan: 8 | 9 | I- Description 10 | 11 | II- Parsing du PDF 12 | II-a PDF Header 13 | II-b Objets 14 | II-c Trailer 15 | II-d Cross-reference table 16 | 17 | 18 | III- Analyse Anti-malware 19 | 20 | III-a Format PDF 21 | III-b Analyse des objets 22 | III-c CVEs detection 23 | III-d Coefficient de suspicion 24 | III-e Cas non pris en compte 25 | 26 | IV- Réécriture de PDF 27 | 28 | IV-a Principes de réécriture de PDF 29 | IV-b Cas non pris en compte 30 | 31 | ----------------------------------------------------------------------- 32 | 33 | I- Description 34 | 35 | 36 | II- Parsing du PDF 37 | 38 | Cette étape a pour but d'extraire tous les informations nécessaires à l'analyse antivirale du PDF. 39 | 40 | ------- II-a PDF Header 41 | 42 | 43 | Vérification de l'entête du document à analyser. 44 | L'entête du document doit etre de la forme: %PDF-x.y (où x-y représente la version du PDF) Exemple : %PDF-1.7 (version 1.7 des spécifications). 45 | 46 | /!\NB: L'analyseur prend egalement en compte le format XDP (XML Data Package). Qui représente la forme XML d'un PDF. Dans ce format, le contenu du PDF est encodé dans le XML en base64. 47 | L'anlyseur va alors extraire le PDF packagé dans le XML, puis vérifier le header. A noter que la suite de l'analyse se fera sur le PDF extrait. 48 | 49 | Dans le cas où l'entête serait incorrect, l'analyse s'arrete en produisant un rapport d'erreur. 50 | 51 | ------- II-b Objets 52 | 53 | 54 | Récupération de tous les objets qui constituent le document PDF et parsing des attributs. 55 | Les attributs de l'objet récupérés sont rangés dans une structure (table de hashage). 56 | Les principaux attributs d'un objects qui sont récupérés sont: 57 | * La réference (Ex: 16 0 obj); 58 | * L'offset en octet du début de l'objet dans le document; 59 | * Le contenu integrale de l'objet (11 0 obj......endobj); 60 | 61 | Si présent: 62 | * Le dictionaire de l'objects (<<...>>); 63 | * Le type de l'objet. 64 | * Puis d'autres attributs specifiques au type d'objet. etc... 65 | 66 | /!\NB: A partir de la version 1.5 de PDF : Récupération des objets intégrés dans des objets de type /ObjStm (object stream). 67 | Dans ce cas, des attributs sont rajoutés à la structure de l'objet à savoir ( objstm = l'objet stream dans lequel il est integré; objStmOff = l'offset dans l'object stream ); 68 | 69 | 70 | Decodage des streams presentes dans les objects; Les filtres implémentés sont: 71 | * FlateDecode 72 | * ASCIIHexDecode 73 | * ASCII85Decode 74 | * LZWDecode 75 | * CCITTFaxDecode 76 | * DCTDecode 77 | 78 | 79 | 80 | 81 | ------- II-c Trailer 82 | 83 | 84 | Récupération des trailers du PDF. (TODO:: Récupérer seulement le "dernier" Trailer mis à jour) 85 | Conformément aux specifications PDF (jusqu'a la version 1.4 incluse), le trailer du document est défini sous la forme: 86 | trailer 87 | <> 88 | startxref 89 | xref_table_offset 90 | %%EOF 91 | A partir de la version 1.5 de PDF, le trailer peut etre de la forme: 92 | startxref 93 | xref_stream_offset 94 | %%EOF 95 | 96 | Dans le cas où la première forme du trailer n'aurait pas été retrouvée, essayer de l'obtenir sous la deuxième forme. 97 | 98 | /!\NB: Pour les documents PDF ayant été mis à jour, on peut retrouver plusieurs trailers. 99 | 100 | 101 | ------- II-d Cross-reference table 102 | 103 | 104 | Récupération de la table de référence des objects. 105 | La récupération de la table de référence des objects se fera lors de la vérification de sa confirmité (voir partie III-a). 106 | 107 | Conformément aux spécifications PDF (jusqu'à la version 1.4 incluse); la table de reference des objets est de la forme: 108 | xref 109 | 0 3 110 | 0000000000 65535 f 111 | 0000000009 00000 n 112 | 0000000098 00000 n 113 | 114 | A partir de la version 1.5, la table de référence peut etre représentée via des objets de type (/XRef). 115 | /!\NB: Pour les documents PDF ayant été mis à jour, on peut retrouver plusieurs trailers. 116 | 117 | 118 | 119 | III- Analyse Anti-malware 120 | 121 | Objectif: Déterminer le niveau de suspicion (ou de dangerosité) du document analysé. 122 | Pour se faire, plusieurs tests divisés en 3 catégories seront effectuées sur le document. 123 | * Tests portant sur la structure du document. 124 | * Tests analysant le contenu des objets. 125 | * Tests d'identification de exploitation de vulnérabilités (CVE). 126 | 127 | 128 | ------- III-a Format PDF 129 | 130 | 131 | L'objectif de cette suite de tests est de déterminer si le document respecte bien les spécifications sur la structure du PDF. 132 | 133 | * Vérification du trailer 134 | Vérifier qu'un trailer a bien été trouvé dans le document. 135 | Si oui, vérifier les paramètres du dictionaire du trailer (Ex: /root 1 0 R :: vérifier que l'objet '1 0 obj' référencé est bien de type /Catalog). 136 | 137 | * Vérification de la table de reference des objets (Xref table). 138 | Vérifier que l'offset de la table de reference (ou de l'objet de type XRef) défini dans le trailer. 139 | Si ok, vérifier les entrées de la table de réference (par rapport aux offsets des objets). 140 | 141 | * Vérification du contenu du document. 142 | Vérifier que le document ne contienne pas uniquement que des pages vides. 143 | /!\NB: Un document PDF dont toutes les pages sont vides est considéré comme suspect. 144 | 145 | * Collisions d'objets 146 | Vérifier qu'un object ne soit pas défini plusieurs fois dans le PDF (avec une table de reference incorrecte). 147 | 148 | 149 | * Detection de PDF dont le contenu est chiffré. 150 | /!\NB: Dans ce cas, ne pas continuer le traitement du document. 151 | 152 | 153 | 154 | ------- III-b Analyse des objets 155 | 156 | L'objectif de cette suite de tests est de detecter des éléments potentiellement dangereux dans le contenu des objects du PDF. 157 | 158 | * Recherche d'objets potentiellement dangereux. 159 | -> javascript, 160 | -> embedded file, 161 | -> formulaire XFA, 162 | -> action de type /Launch, 163 | -> URI 164 | -> etc. 165 | 166 | * Recherche de pattern potentiellement dangereux dans les objects. 167 | -> shellcode, 168 | -> pattern à forte répetition, 169 | -> unicode strings, 170 | -> mots clés potentiellement dangereux (heapSpray, payload, shellcode, etc.), 171 | -> javascript fonctions (StringfromChar, byteToChar, eval, unescape, split, etc.), 172 | -> path traversal URI. 173 | 174 | 175 | ------- III-c CVEs exploit detection 176 | 177 | 178 | L'objectif de ces tests est de determiner une possible exploitation d'une vulnérabilité CVE. 179 | 180 | Les vulnérabilités implémentées sont: 181 | * CVE-2010-2883 182 | 183 | 184 | ------- III-d Coefficient de suspicion 185 | 186 | Afin de définir un coefficient de suspicion du document analysé, un coefficient va etre attribué au resultat de chaque test: 187 | * $EMPTY_PAGES_WITH_ACTIVE_CONTENT = 99; 188 | * $EMPTY_PAGES_CONTENT = 70; 189 | * $OBJECT_COLLISION_PLUS_BAD_XREF = 90; 190 | * $OBJECT_COLLISION = 10; 191 | * $BAD_XREF_OFFSET = 30; 192 | * $TRAILER_NOT_FOUND = 30; 193 | * $BAD_TRAILER = 40; 194 | * $OBFUSCATED_OBJECTS = 40; 195 | * $ACTIVE_CONTENT = 40; 196 | * $SHELLCODE = 40; 197 | * $PATTERN_REPETITION = 40; 198 | * $DANGEROUS_PATTERN_HIGH = 90; 199 | * $DANGEROUS_PATTERN_MEDIUM = 40; 200 | * $DANGEROUS_PATTERN_LOW = 20; 201 | * $TIME_EXCEEDED = 20; 202 | * $MALICIOUS_URI = 50; 203 | * $MULTIPLE_HEADERS = 50; 204 | 205 | 206 | NB: Ces coefficients doivent etre calibrés afin d'obtenir de meilleurs resultats. 207 | 208 | Le coefficient de suspicion est la somme de tous les résultats des tests positifs. Plus le coefficient est élevé, plus le document est suspect. 209 | 210 | 211 | III-e Cas non pris en compte 212 | 213 | IV- Réécriture de PDF 214 | 215 | IV-a Principes de réécriture de PDF 216 | IV-b Cas non pris en compte 217 | -------------------------------------------------------------------------------- /lib/src/armaditopdf.c: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | 23 | 24 | #include "armaditopdf.h" 25 | #include "pdfParsing.h" 26 | #include "pdfAnalysis.h" 27 | #include "osdeps.h" 28 | #include "log.h" 29 | #include 30 | 31 | 32 | 33 | char * getVersion(){ 34 | return a6o_pdf_ver; 35 | } 36 | 37 | 38 | /* 39 | printAnalysisReport() :: print a report of the analysis (debug only). 40 | parameters: 41 | - struct pdfDocument * pdf 42 | returns: 43 | - none. 44 | */ 45 | // TODO :: printAnalysisReport :: filter report informations by log level. 46 | void printAnalysisReport(struct pdfDocument * pdf){ 47 | 48 | 49 | if (!print_report || pdf == NULL){ 50 | return; 51 | } 52 | 53 | printf("\n\n"); 54 | printf("----------------------------------\n"); 55 | printf("-- ARMADITO PDF ANALYZER REPORT --\n"); 56 | printf("----------------------------------\n\n"); 57 | 58 | printf("Filename = %s\n",pdf->fname); 59 | if (pdf->version) 60 | printf("PDF version = %s\n",pdf->version); 61 | 62 | printf("size = %d bytes\n", pdf->size); 63 | 64 | printf("\n\n"); 65 | printf("::: PDF Document Structure Tests :::\n\n"); 66 | 67 | 68 | printf("bad_header = %d\n", pdf->testStruct->bad_header); 69 | printf("encrypted = %d\n", pdf->testStruct->encrypted); 70 | printf("empty_page_content = %d\n", pdf->testStruct->empty_page_content); 71 | printf("object_collision = %d\n", pdf->testStruct->object_collision); 72 | printf("bad_trailer = %d\n", pdf->testStruct->bad_trailer); 73 | printf("bad_xref_offset = %d\n", pdf->testStruct->bad_xref_offset); 74 | printf("bad_obj_offset = %d\n", pdf->testStruct->bad_obj_offset); 75 | printf("obfuscated_object = %d\n", pdf->testStruct->obfuscated_object); 76 | printf("multiple_headers = %d\n", pdf->testStruct->multiple_headers); 77 | printf("postscript_comments = %d\n", pdf->testStruct->comments); 78 | printf("malicious_comments = %d\n", pdf->testStruct->malicious_comments); 79 | 80 | printf("\n\n"); 81 | printf("::: PDF Object Analysis Tests :::\n\n"); 82 | 83 | printf("active_content = %d\n", pdf->testObjAnalysis->active_content); 84 | printf(" - js content = %d\n", pdf->testObjAnalysis->js); 85 | printf(" - xfa content = %d\n", pdf->testObjAnalysis->xfa); 86 | printf(" - ef content = %d\n", pdf->testObjAnalysis->ef); 87 | printf("shellcode = %d\n", pdf->testObjAnalysis->shellcode); 88 | printf("pattern_high_repetition = %d\n", pdf->testObjAnalysis->pattern_high_repetition); 89 | printf("dangerous_keyword_high = %d\n", pdf->testObjAnalysis->dangerous_keyword_high); 90 | printf("dangerous_keyword_medium = %d\n", pdf->testObjAnalysis->dangerous_keyword_medium); 91 | printf("dangerous_keyword_low = %d\n", pdf->testObjAnalysis->dangerous_keyword_low); 92 | printf("time_exceeded = %d\n", pdf->testObjAnalysis->time_exceeded); 93 | 94 | 95 | printf("\n\n"); 96 | printf("::: Suspicious Coefficient :::\n\n"); 97 | printf("errors = %d\n", pdf->errors); 98 | 99 | if(pdf->testStruct->bad_header > 0) 100 | printf("Coef = BAD_HEADER\n"); 101 | else 102 | if(pdf->testStruct->large_file > 0) 103 | printf("Coef = %d (LARGE_FILE)\n",pdf->coef); 104 | else 105 | if(pdf->testStruct->encrypted > 0) 106 | printf("Coef = Encrypted_PDF\n"); 107 | else 108 | printf("Coef = %d\n",pdf->coef); 109 | 110 | 111 | printf("-------------------------------------------------------\n"); 112 | //printf("-------------------------------------------------------\n"); 113 | printf("Execution time : %.2lf sec \n",pdf->scan_time); 114 | printf("-------------------------------------------------------\n"); 115 | printf("-------------------------------------------------------\n\n"); 116 | 117 | return; 118 | 119 | } 120 | 121 | 122 | // This function calc the suspicious coefficient according to the tests results 123 | // TODO Improve this fucntion by calc the coef with the operation coef += test_result * test_coef 124 | int calcSuspiciousCoefficient(struct pdfDocument * pdf){ 125 | 126 | // check parameters 127 | if(pdf == NULL){ 128 | return -1; 129 | } 130 | 131 | // PDF Document Structure tests 132 | /* 133 | EMPTY_PAGE_CONTENT 99 134 | OBJECT_COLLISION 10 135 | BAD_TRAILER 40 136 | BAD_XREF_OFFSET 30 137 | BAD_OBJ_OFFSET 20 138 | OBFUSCATED_OBJECT 50 139 | MULTIPLE_HEADERS 50 140 | */ 141 | 142 | pdf->coef = 0; 143 | 144 | if(pdf->testStruct->encrypted > 0 ){ 145 | pdf->coef = -2; 146 | return -2; 147 | } 148 | 149 | if(pdf->testStruct->empty_page_content > 0){ 150 | pdf->coef = EMPTY_PAGE_CONTENT; 151 | return 0; 152 | } 153 | 154 | if(pdf->testStruct->object_collision > 0 && ( pdf->testStruct->bad_obj_offset > 0 || pdf->testStruct->bad_xref_offset > 0 )){ 155 | pdf->coef += OBJECT_COLLISION_AND_BAD_XREF; 156 | }else{ 157 | 158 | if(pdf->testStruct->object_collision > 0){ 159 | pdf->coef += OBJECT_COLLISION; 160 | } 161 | 162 | if(pdf->testStruct->bad_obj_offset > 0){ 163 | pdf->coef += BAD_OBJ_OFFSET; 164 | } 165 | 166 | if( pdf->testStruct->bad_xref_offset > 0){ 167 | pdf->coef += BAD_XREF_OFFSET; 168 | } 169 | } 170 | 171 | if(pdf->testStruct->bad_trailer > 0){ 172 | pdf->coef += BAD_TRAILER; 173 | } 174 | 175 | if(pdf->testStruct->multiple_headers > 0){ 176 | pdf->coef += MULTIPLE_HEADERS; 177 | } 178 | 179 | if(pdf->testStruct->obfuscated_object > 0){ 180 | pdf->coef += OBFUSCATED_OBJECT; 181 | } 182 | 183 | if(pdf->testStruct->malicious_comments > 0){ 184 | pdf->coef += MALICIOUS_COMMENTS; 185 | } 186 | 187 | 188 | // PDF Objects Analysis tests 189 | /* 190 | ACTIVE_CONTENT 40 191 | SHELLCODE 40 192 | PATTERN_HIGH_REPETITION 40 193 | DANGEROUS_KEYWORD_HIGH 90 194 | DANGEROUS_KEYWORD_MEDIUM 40 195 | DANGEROUS_KEYWORD_LOW 20 196 | TIME_EXCEEDED 20 197 | */ 198 | 199 | 200 | if(pdf->testObjAnalysis->active_content > 0){ 201 | pdf->coef += ACTIVE_CONTENT; 202 | } 203 | 204 | if(pdf->testObjAnalysis->shellcode > 0){ 205 | pdf->coef += SHELLCODE; 206 | } 207 | 208 | if(pdf->testObjAnalysis->pattern_high_repetition > 0){ 209 | pdf->coef += PATTERN_HIGH_REPETITION; 210 | } 211 | 212 | if(pdf->testObjAnalysis->dangerous_keyword_high > 0){ 213 | pdf->coef += DANGEROUS_KEYWORD_HIGH; 214 | } 215 | 216 | if(pdf->testObjAnalysis->dangerous_keyword_medium > 0){ 217 | pdf->coef += DANGEROUS_KEYWORD_MEDIUM; 218 | } 219 | 220 | if(pdf->testObjAnalysis->dangerous_keyword_low > 0){ 221 | pdf->coef += DANGEROUS_KEYWORD_LOW; 222 | } 223 | 224 | if(pdf->testObjAnalysis->time_exceeded > 0){ 225 | pdf->coef += TIME_EXCEEDED; 226 | } 227 | 228 | 229 | return 0; 230 | 231 | } 232 | 233 | 234 | /* 235 | analyzePDF_ex() :: Analyze pdf extension function 236 | parameters: 237 | - int fd (file descriptor of the file to analyze) 238 | - char * filename (file name of the file). 239 | returns: 240 | - the suspicious coefficient (>=0) on success. 241 | - an error code (<0) on error. 242 | */ 243 | int analyzePDF_ex(int fd, char * filename){ 244 | 245 | int ret = 0; 246 | struct pdfDocument * pdf = NULL; 247 | time_t start_time =0, end_time = 0; 248 | double time_elapsed = 0; 249 | int res = 0; 250 | FILE * fh = NULL; 251 | 252 | 253 | if (fd < 0 && filename == NULL){ 254 | err_log("analyzePDF_ex :: invalid parameters!",0); 255 | return -1; 256 | } 257 | 258 | dbg_log("analyzePDF_ex :: Analyzing file :: [%s]\n", filename); 259 | 260 | // open the file if fd is invalid 261 | if (fd < 0 && !(fh = os_fopen(filename, "rb"))){ 262 | err_log("analyzePDF_ex :: Can't open file %s\n", filename); 263 | return -1; 264 | } 265 | 266 | 267 | // Initialize pdfDocument struct 268 | if (!(pdf = initPDFDocument())){ 269 | err_log("analyzePDF_ex :: pdfDocument initialization failed!\n"); 270 | 271 | if(fh != NULL) 272 | fclose(fh); 273 | 274 | return -1; 275 | } 276 | 277 | pdf->fh = fh; 278 | pdf->fd = fd; 279 | pdf->fname = os_strdup(filename); 280 | 281 | // start time initialization. 282 | time(&start_time); 283 | 284 | // Parse pdf document content. 285 | if ((ret = parsePDF(pdf)) < 0){ 286 | err_log("analyzePDF_ex :: parsing PDF document failed\n"); 287 | goto clean; 288 | } 289 | 290 | /* this is for debug purpose only */ 291 | // printPDFObjects(pdf); 292 | // printObjectReferences(pdf); 293 | 294 | 295 | // PDF objects analysis. 296 | if ((ret = getDangerousContent(pdf)) < 0){ 297 | err_log("analyzePDF_ex :: get dangerous content failed\n"); 298 | goto clean; 299 | } 300 | 301 | 302 | // Document structure analysis 303 | if((ret = documentStructureAnalysis(pdf))< 0){ 304 | err_log("analyzePDF_ex :: document structure Analysis failed\n"); 305 | goto clean; 306 | } 307 | 308 | 309 | clean: 310 | 311 | time(&end_time); 312 | time_elapsed = difftime(end_time, start_time); 313 | 314 | pdf->scan_time = time_elapsed; 315 | 316 | // calc supicious coefficient of the document. 317 | calcSuspiciousCoefficient(pdf); 318 | 319 | // print report. (debug only) 320 | printAnalysisReport(pdf); 321 | 322 | if (ret >= 0){ 323 | ret = pdf->coef; 324 | dbg_log("[armaditoPDF] Coef = %d\n", ret); 325 | } 326 | 327 | if (pdf != NULL){ 328 | freePDFDocumentStruct(pdf); 329 | } 330 | 331 | 332 | return ret; 333 | 334 | 335 | } -------------------------------------------------------------------------------- /lib/src/pdfStructs.c: -------------------------------------------------------------------------------- 1 | /*** 2 | 3 | Copyright (C) 2015, 2016 Teclib' 4 | 5 | This file is part of Armadito module PDF. 6 | 7 | Armadito module PDF is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | Armadito module PDF is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with Armadito module PDF. If not, see . 19 | 20 | ***/ 21 | 22 | 23 | #include "pdfStructs.h" 24 | #include "log.h" 25 | 26 | 27 | /* 28 | freePDFObjectStruct() :: free the allocated memory PDF object structure. 29 | parameters: 30 | - struct pdfObject * pdf (the pdf object list pointer). 31 | returns: 32 | - none 33 | */ 34 | void freePDFObjectStruct(struct pdfObject * obj){ 35 | 36 | struct pdfObject * tmp = NULL; 37 | 38 | if(obj == NULL){ 39 | return ; 40 | } 41 | 42 | while(obj != NULL){ 43 | 44 | tmp = obj; 45 | obj = obj->next; 46 | 47 | // free all elements 48 | free(tmp->reference); 49 | free(tmp->content); 50 | free(tmp->dico); 51 | free(tmp->type); 52 | free(tmp->stream); 53 | free(tmp->filters); 54 | free(tmp->decoded_stream); 55 | 56 | free(tmp); 57 | tmp = NULL; 58 | 59 | } 60 | 61 | return; 62 | 63 | } 64 | 65 | 66 | /* 67 | freePDFTrailerStruct() :: free the allocated memory PDF trailer structure. 68 | parameters: 69 | - struct pdfTrailer * pdf (the pdf trailer list pointer). 70 | returns: 71 | - none 72 | */ 73 | void freePDFTrailerStruct(struct pdfTrailer * trailer){ 74 | 75 | struct pdfTrailer * tmp = NULL; 76 | 77 | if(trailer == NULL){ 78 | return ; 79 | } 80 | 81 | while(trailer!= NULL){ 82 | 83 | tmp = trailer; 84 | trailer = trailer->next; 85 | 86 | free(tmp->dico); 87 | free(tmp->content); 88 | 89 | free(tmp); 90 | tmp = NULL; 91 | 92 | } 93 | 94 | return; 95 | 96 | } 97 | 98 | 99 | /* 100 | freePDFDocumentStruct() :: free the allocated memory PDF document structure. 101 | parameters: 102 | - struct pdfDocument * pdf (the pdf document pointer). 103 | returns: 104 | - none 105 | */ 106 | void freePDFDocumentStruct(struct pdfDocument * pdf){ 107 | 108 | 109 | if(pdf == NULL){ 110 | return ; 111 | } 112 | 113 | if (pdf->fname != NULL){ 114 | free(pdf->fname); 115 | pdf->fname = NULL; 116 | } 117 | 118 | // Free objects 119 | if (pdf->objects != NULL){ 120 | freePDFObjectStruct(pdf->objects); 121 | } 122 | 123 | // Free trailer 124 | if (pdf->trailers != NULL){ 125 | freePDFTrailerStruct(pdf->trailers); 126 | } 127 | 128 | if (pdf->fh != NULL){ 129 | fclose(pdf->fh); 130 | pdf->fh = NULL; 131 | } 132 | 133 | if (pdf->version != NULL){ 134 | free(pdf->version); 135 | pdf->version = NULL; 136 | } 137 | 138 | if (pdf->content != NULL){ 139 | free(pdf->content); 140 | pdf->content = NULL; 141 | } 142 | 143 | if (pdf->testStruct != NULL){ 144 | free(pdf->testStruct); 145 | pdf->testStruct = NULL; 146 | } 147 | 148 | if (pdf->testObjAnalysis != NULL){ 149 | free(pdf->testObjAnalysis); 150 | pdf->testObjAnalysis = NULL; 151 | } 152 | 153 | 154 | free(pdf); 155 | pdf = NULL; 156 | 157 | 158 | return ; 159 | 160 | } 161 | 162 | 163 | /* 164 | addObjectInList() :: add an object in the pdf document object list 165 | parameters: 166 | - struct pdfObject * obj (pdf object pointer) 167 | - struct pdfDocument * pdf (pdf document pointer) 168 | returns: (int) 169 | - 0 on success. 170 | - an error code (<0) on error. 171 | */ 172 | int addObjectInList(struct pdfObject* obj, struct pdfDocument* pdf){ 173 | 174 | struct pdfObject* tmp = NULL; 175 | 176 | if(obj == NULL || pdf == NULL){ 177 | err_log("addObjectInList :: invalid parameter\n"); 178 | return -1; 179 | } 180 | 181 | if(pdf->objects == NULL){ 182 | pdf->objects = obj; 183 | }else{ 184 | 185 | tmp = pdf->objects; 186 | 187 | // Object collision detection 188 | if(strncmp(tmp->reference,obj->reference,strlen(tmp->reference)) == 0 && strncmp(tmp->reference,obj->reference,strlen(obj->reference)) == 0){ 189 | 190 | warn_log("addObjectInList :: Object collision :: %s\n", obj->reference); 191 | pdf->testStruct->object_collision ++; 192 | } 193 | 194 | 195 | while(tmp->next != NULL){ 196 | 197 | // Object collision detection 198 | tmp = tmp->next; 199 | 200 | if(strncmp(tmp->reference,obj->reference,strlen(tmp->reference)) == 0 && strncmp(tmp->reference,obj->reference,strlen(obj->reference)) == 0){ 201 | warn_log("addObjectInList :: Object collision :: %s\n", obj->reference); 202 | pdf->testStruct->object_collision ++; 203 | } 204 | } 205 | tmp->next = obj; 206 | 207 | } 208 | 209 | return 0; 210 | } 211 | 212 | 213 | /* 214 | initTestsPDFStruct() :: Initialize PDF Tests structure. 215 | parameters: 216 | - none 217 | returns: (struct testsPDFStruct *) 218 | - the testsPDFStruct pointer on success. 219 | - NULL on error. 220 | */ 221 | struct testsPDFStruct * initTestsPDFStruct(){ 222 | 223 | struct testsPDFStruct * testStruct = NULL; 224 | 225 | if( !(testStruct = (struct testsPDFStruct *)calloc(1,sizeof(struct testsPDFStruct)) ) ){ 226 | err_log("initTestsPDFStruct :: memory allocation failed\n"); 227 | return NULL; 228 | } 229 | 230 | testStruct->bad_header = 0; 231 | testStruct->encrypted = 0; 232 | testStruct->empty_page_content = 0; 233 | testStruct->object_collision = 0; 234 | testStruct->bad_trailer = 0; 235 | testStruct->bad_xref_offset = 0; 236 | testStruct->bad_obj_offset = 0; 237 | testStruct->obfuscated_object = 0; 238 | testStruct->multiple_headers = 0; 239 | testStruct->large_file = 0; 240 | testStruct->comments = 0; 241 | testStruct->malicious_comments = 0; 242 | 243 | return testStruct; 244 | } 245 | 246 | 247 | /* 248 | initTestsPDFObjAnalysisStruct() :: Initialize PDF Tests structure. 249 | parameters: 250 | - none 251 | returns: (struct testsPDFObjAnalysis *) 252 | - the testsPDFObjAnalysis pointer on success. 253 | - NULL on error. 254 | */ 255 | struct testsPDFObjAnalysis * initTestsPDFObjAnalysisStruct(){ 256 | 257 | struct testsPDFObjAnalysis * testObjAnalysis = NULL; 258 | 259 | if( !(testObjAnalysis = (struct testsPDFObjAnalysis *)calloc(1,sizeof(struct testsPDFObjAnalysis)) ) ){ 260 | err_log("initTestsPDFObjAnalysisStruct :: memory allocation failed\n"); 261 | return NULL; 262 | } 263 | 264 | testObjAnalysis->active_content = 0; 265 | testObjAnalysis->shellcode = 0; 266 | testObjAnalysis->pattern_high_repetition = 0; 267 | testObjAnalysis->dangerous_keyword_high = 0; 268 | testObjAnalysis->dangerous_keyword_medium = 0; 269 | testObjAnalysis->dangerous_keyword_low = 0; 270 | testObjAnalysis->time_exceeded = 0; 271 | 272 | testObjAnalysis->js = 0; 273 | testObjAnalysis->xfa = 0; 274 | testObjAnalysis->ef = 0; 275 | 276 | return testObjAnalysis; 277 | } 278 | 279 | 280 | /* 281 | initPDFDocument() :: Initialize pdfDocument structure. 282 | parameters: 283 | - none 284 | returns: (struct pdfDocument *) 285 | - the pdfDocument pointer on success. 286 | - NULL on error. 287 | */ 288 | struct pdfDocument* initPDFDocument(){ 289 | 290 | struct pdfDocument* pdf = NULL; 291 | int err = 0; 292 | 293 | if( (pdf = (struct pdfDocument *)calloc(1,sizeof(struct pdfDocument))) == NULL ){ 294 | err_log("initPDFDocument :: memory allocation failed\n"); 295 | err++; 296 | goto clean; 297 | } 298 | 299 | if( (pdf->testStruct = initTestsPDFStruct()) == NULL ){ 300 | err_log("initPDFDocument :: testsPDFstruct initialization failed!\n"); 301 | err++; 302 | goto clean; 303 | } 304 | 305 | if( (pdf->testObjAnalysis = initTestsPDFObjAnalysisStruct()) == NULL ){ 306 | err_log("initPDFDocument :: testsPDFObjAnalysisStruct initialization failed!\n"); 307 | err++; 308 | goto clean; 309 | } 310 | 311 | // Initialize entries 312 | pdf->fh = NULL; 313 | pdf->fd = -1; 314 | pdf->fname = NULL; 315 | pdf->content = NULL; 316 | pdf->objects =NULL; 317 | pdf->coef = 0; 318 | pdf->size = 0; 319 | pdf->version = NULL; 320 | pdf->trailers = NULL; 321 | pdf->xref = NULL; 322 | pdf->errors = 0; 323 | pdf->scan_time=0; 324 | 325 | clean: 326 | if (err != 0){ 327 | if (pdf != NULL){ 328 | freePDFDocumentStruct(pdf); 329 | pdf = NULL; 330 | } 331 | } 332 | 333 | return pdf; 334 | 335 | } 336 | 337 | 338 | /* 339 | initPDFObject() :: Initialize pdfObject object structure. 340 | parameters: 341 | - none 342 | returns: (struct pdfObject *) 343 | - the pdfObject pointer on success. 344 | - NULL on error. 345 | */ 346 | struct pdfObject* initPDFObject(){ 347 | 348 | struct pdfObject* obj = NULL; 349 | 350 | 351 | if( !(obj = (struct pdfObject*)calloc(1,sizeof(struct pdfObject)) ) ){ 352 | err_log("initPDFObject :: memory allocation failed\n"); 353 | return NULL; 354 | } 355 | 356 | // Initialize entries 357 | obj->reference = NULL; 358 | obj->content = NULL; 359 | obj->dico = NULL; 360 | obj->type = NULL; 361 | obj->stream = NULL; 362 | obj->filters = NULL; 363 | obj->decoded_stream = NULL; 364 | obj->offset = 0; 365 | obj->next = NULL; 366 | obj->stream_size = 0; 367 | obj->tmp_stream_size = 0; 368 | obj->content_size = 0; 369 | obj->decoded_stream_size = 0; 370 | obj->errors = 0; 371 | 372 | return obj; 373 | 374 | } 375 | 376 | 377 | /* 378 | initPDFTrailer() :: Initialize pdf trailer structure 379 | parameters: 380 | - none 381 | returns: (struct pdfTrailer *) 382 | - the pdfTrailer pointer on success. 383 | - NULL on error. 384 | */ 385 | struct pdfTrailer* initPDFTrailer(){ 386 | 387 | struct pdfTrailer* trailer = NULL; 388 | 389 | if( !(trailer = (struct pdfTrailer *)calloc(1,sizeof(struct pdfTrailer)) ) ){ 390 | err_log("initPDFTrailer :: memory allocation failed\n"); 391 | return NULL; 392 | } 393 | 394 | // Initialize entries 395 | trailer->offset = 0; 396 | trailer->content = NULL; 397 | trailer->dico = NULL; 398 | trailer->next = NULL; 399 | 400 | return trailer; 401 | 402 | } 403 | 404 | 405 | /* 406 | addTrailerInList() :: add a trailer in the list of trailers 407 | parameters: 408 | - struct pdfDocument * pdf 409 | - struct pdfTrailer * trailer 410 | returns: (int) 411 | - 0 on success 412 | - -1 on error. 413 | */ 414 | int addTrailerInList(struct pdfDocument * pdf, struct pdfTrailer * trailer){ 415 | 416 | struct pdfTrailer * tmp = NULL; 417 | 418 | if(pdf == NULL || trailer == NULL){ 419 | err_log("addTrailerInList :: invalid parameters\n"); 420 | return -1; 421 | } 422 | 423 | 424 | if(pdf->trailers == NULL){ 425 | pdf->trailers = trailer; 426 | }else{ 427 | 428 | tmp = pdf->trailers; 429 | while(tmp->next != NULL){ 430 | tmp = tmp->next; 431 | } 432 | tmp->next = trailer; 433 | 434 | } 435 | 436 | return 0; 437 | } -------------------------------------------------------------------------------- /tools/perl_poc/lib/analysis/ObjectAnalysis.pm: -------------------------------------------------------------------------------- 1 | package ObjectAnalysis; 2 | 3 | use strict; 4 | 5 | use lib::conf::Config; 6 | 7 | my $DEBUG = "no"; 8 | 9 | 10 | # This function analyzes uri (for example detect a path traversal pattern in URI object.) 11 | # TODO to improve 12 | sub URI_analysis{ 13 | 14 | my $obj_ref = shift; 15 | 16 | 17 | #print "Warning :: URI_analysis :: $obj_ref->{ref}\n"; 18 | 19 | 20 | my $test = "../../../myPath"; 21 | #my $test = "..\..\..\myPath"; 22 | 23 | 24 | # Path traversal detection 25 | #if($obj_ref->{uri} =~ /([\.\.\/|\.\.\\])+/){ 26 | #if($test =~ /(\.\.\/)+/){ 27 | if($obj_ref->{uri} =~ /(\.\.\/)+/){ 28 | #if($obj_ref->{uri} =~ /(\.\.\/|\.\.\\)+/){ 29 | print "Warning :: URI_analysis :: Found path traversal in $obj_ref->{ref} URI :: $obj_ref->{uri}\n"; 30 | 31 | if(exists($main::TESTS_CAT_2{"Malicious URI"})){ 32 | $main::TESTS_CAT_2{"Malicious URI"} ++; 33 | }else{ 34 | $main::TESTS_CAT_2{"Malicious URI"} =1; 35 | } 36 | 37 | } 38 | 39 | 40 | # potential dangerous pattern :: system32 41 | if($obj_ref->{uri} =~ /(system32)+/){ 42 | print "Warning :: URI_analysis :: Found potentially dangerous pattern in $obj_ref->{ref} URI :: $obj_ref->{uri}\n"; 43 | #$main::%TESTS_CAT_2{"Malicious URI"} ++; 44 | 45 | if(exists($main::TESTS_CAT_2{"Malicious URI"})){ 46 | $main::TESTS_CAT_2{"Malicious URI"} ++; 47 | }else{ 48 | $main::TESTS_CAT_2{"Malicious URI"} =1; 49 | } 50 | } 51 | 52 | 53 | 54 | } 55 | 56 | 57 | 58 | # The basic analysis consists to parse the content of object and detect all potential dangerous patterns. 59 | # Returns "none" - "high" - "medium" - or "low" 60 | sub DangerousKeywordsResearch{ 61 | 62 | # 63 | #$TESTS_CAT_2{"Dangerous Pattern High"} ; 64 | #$TESTS_CAT_2{"Dangerous Pattern Medium"}; 65 | #$TESTS_CAT_2{"Dangerous Pattern Low"}; 66 | 67 | my ($obj_ref,$content) = @_; 68 | 69 | if(!$content){ 70 | #print "Error :: DangerousKeywordsResearch :: empty content\n"; 71 | return "none"; 72 | } 73 | 74 | 75 | 76 | # keywords (HIGH) :: HeapSpray - heap - spray - hack - shellcode - shell - Execute - exe - exploit - pointers - memory - exportDataObject -app.LaunchURL -byteToChar - system32 - payload 77 | if( $content =~ /(HeapSpray|heap|spray|hack|shellcode|shell|Execute|pointers|byteToChar|system32|payload|console)/si ){ 78 | #$TESTS_CAT_2{"Dangerous Pattern High"} ++; 79 | print "Dangerous Pattern \(High\) found :: $1 :: in $obj_ref->{ref} \n"; 80 | return "High"; 81 | } 82 | 83 | # Unicode detection 84 | my @rep_unicode = ($content =~ /(\%u[a-f0-9]{4})/gi); 85 | my $count = @rep_unicode; 86 | print "unicode string = $count :: @rep_unicode\n" if ($count > 0); 87 | 88 | if($count > 10){ 89 | print "Warning :: DangerousKeywordsResearch :: Found unicode strings :: @rep_unicode\n"; 90 | return "High"; 91 | } 92 | 93 | # TODO combinaison between unicode and medium 94 | 95 | 96 | # Javascript keywords (MEDIUM) :: substring - toSring - split - eval - String.replace - unescape - exportDataObject - StringfromChar - util.print 97 | if( $content =~ /(toString|substring|split|eval|addToolButton|String\.replace|unescape|exportDataObject|StringfromChar|util\.print)/si ){ 98 | #$TESTS_CAT_2{"Dangerous Pattern Medium"} ++; 99 | print "Dangerous Pattern \(Medium\) found :: $1 :: in $obj_ref->{ref} \n"; 100 | return "Medium"; 101 | } 102 | 103 | 104 | # javascript keywords :: 105 | # 106 | # 107 | # NOP detection "90" 108 | # 109 | # %u... like %u4141%u4141%u63a5%u4a80%u0000 110 | 111 | 112 | 113 | # TODO Look for JavaScript in XFA block Ex: 114 | 115 | return "none"; 116 | } 117 | 118 | 119 | # This function detect the wide repetition of an unknown pattern 120 | # Test1 files :: unknown pattern repetition 121 | # 618b5fcf762bc7397a22e568753858c9 122 | # 6254e7e17d9796028bdc56ba81022617 123 | # 6bffa8f1f0155a554fcdca6a1839576e 124 | # 8e88d64028093d2ef6a633c83ee28e44 125 | # b400e8d3635f91176e1d56a38e6aa590 126 | # c8c39082dfca15d5ded02ca050a96112 127 | # de8bcc90ecd0049a1ab4e5a5087359b4 128 | # fa2ddb10d9184dba0f90c88b7786f6ec 129 | sub Unknown_Pattern_Repetition_Detection{ 130 | 131 | 132 | my $result = 0; 133 | my $objcontent = shift; 134 | my %h; # hash table containing the results. 135 | my $cpt=5; # number of characteres repetition to detect 136 | 137 | my $timeout = 5; 138 | my $nb_rep_max = 200; 139 | 140 | if(!$objcontent){ 141 | return 0; 142 | } 143 | 144 | my $start_time = time - $^T; 145 | 146 | # Remove a white characters for a better processing 147 | $objcontent =~ s/\s//g; 148 | 149 | # split into array 150 | my @a =split('',$objcontent); 151 | 152 | for (my $i = 0 ; $i<= $#a-$cpt ; $i++){ 153 | 154 | my $pat; 155 | 156 | # generate pattern according to number of caracter 157 | for (my $y=0 ; $y<$cpt ; $y++){ 158 | $pat .= $a[$i+$y]; 159 | } 160 | 161 | # if the pattern is already in the table 162 | if(!exists($h{"$pat"})){ 163 | 164 | my $count = 0; 165 | # count the number of repetition in the content 166 | my @rep = ($objcontent =~ /\Q$pat/g); 167 | $count = @rep; 168 | #print "pat = $pat :: count = $count :: rep = $#repp \n"; 169 | $h{"$pat"} = $count; 170 | 171 | if($count > $Config::MAX_REP_DETECTION){ 172 | print "FOUND = $pat => $count\n\n" unless $DEBUG eq "yes"; 173 | $result ++; 174 | return $result; 175 | } 176 | 177 | } 178 | 179 | my $time_elapsed = time - $^T; 180 | if($time_elapsed-$start_time > $Config::ANALYSIS_TIMEOUT ){ 181 | 182 | print "TIME_EXCEEDED\n"; 183 | return -1; 184 | } 185 | 186 | } 187 | 188 | 189 | # print 190 | my $sum=0; 191 | my $nb =0; 192 | while ((my $key, my $value) = each %h) { 193 | $sum+= $value; 194 | $nb ++; 195 | #print "$key => $value\n"; 196 | } 197 | 198 | if($nb==0){ 199 | return 0; 200 | } 201 | 202 | # Calcul de l'ecart-type 203 | my $moyenne =0 ;# moyenne 204 | my $var =0; # variance 205 | my $et = 0; # ecart type 206 | 207 | 208 | $moyenne = $sum/$nb; 209 | 210 | 211 | print "100% => $sum :: cpt => $cpt :: m => $moyenne \n" unless $DEBUG eq "no"; 212 | 213 | while ((my $key, my $value) = each %h) { 214 | 215 | my $pourcent = ($value*100)/$sum; 216 | $var += ($value-$moyenne)*($value-$moyenne); 217 | } 218 | 219 | 220 | $var = $var/$nb; 221 | $et = sqrt($var); 222 | 223 | print "moyenne = $moyenne :: nb = $nb :: variance = $var :: ecartype = $et\n" unless $DEBUG eq "no"; 224 | 225 | while ((my $key, my $value) = each %h) { 226 | 227 | if($value > 2*$et && $value > $nb_rep_max){ 228 | print "FOUND = $key => $value :: \n\n" unless $DEBUG eq "yes"; 229 | $result ++ ; 230 | } 231 | } 232 | 233 | #print "END\n\n"; 234 | return $result; 235 | 236 | } 237 | 238 | 239 | sub Unknown_Pattern_Repetition_Detection__{ 240 | 241 | 242 | my $result = 0; 243 | my @found; 244 | my $objcontent = shift; 245 | my %h; # hash table containing the results. 246 | my $cpt=5; # number of characteres repetition to detect 247 | my $rep; # The number of repetition to reach to trigger an alert 248 | 249 | if(!$objcontent){ 250 | return; 251 | } 252 | 253 | # Remove a white characters for a better processing 254 | $objcontent =~ s/\s//g; 255 | 256 | # split into array 257 | my @a =split('',$objcontent); 258 | 259 | for (my $i = 0 ; $i<= $#a-$cpt ; $i++){ 260 | 261 | #my $pat = $a[$i].$a[$i+1]; 262 | my $pat; 263 | 264 | # generate pattern according to number of caracter 265 | for (my $y=0 ; $y<$cpt ; $y++){ 266 | $pat .= $a[$i+$y]; 267 | } 268 | 269 | # if the pattern is already in the table 270 | if(exists($h{"$pat"})){ 271 | next; 272 | } 273 | 274 | for (my $j = $i+$cpt ; $j<= $#a-$cpt ; $j++){ 275 | 276 | my $pat2; 277 | # generate pattern according to number of caracter 278 | for (my $y=0 ; $y<$cpt ; $y++){ 279 | $pat2 .= $a[$j+$y]; 280 | } 281 | 282 | if($pat eq $pat2 && $i!=$j){ 283 | 284 | 285 | # add in repetition hash table 286 | if(exists($h{"$pat"})){ # If the pattern as already been detected 287 | # add in offset array 288 | # search if the offset is already in the array 289 | my $in=0; 290 | my @tmp=@{$h{"$pat"}}; 291 | foreach(@tmp){ 292 | if($_ == $j){ 293 | $in = 1; 294 | } 295 | } 296 | 297 | push($h{"$pat"}, $j) unless $in == 1; 298 | }else{ 299 | my @tmp_array; 300 | push @tmp_array, $i; 301 | push @tmp_array, $j; 302 | $h{"$pat"}= \@tmp_array; 303 | } 304 | 305 | } 306 | } 307 | } 308 | 309 | 310 | 311 | my $sum=0; 312 | my $nb =0; 313 | while ((my $key, my $value) = each %h) { 314 | 315 | my @arr= @{$value}; 316 | $sum+= $#arr+1; 317 | $nb ++; 318 | } 319 | 320 | # Calcul de l'ecart-type 321 | 322 | my $moyenne =0 ;# moyenne 323 | my $var =0; # variance 324 | my $et = 0; # ecart type 325 | 326 | 327 | if($nb > 0){ 328 | $moyenne = $sum/$nb; 329 | } 330 | print "100% => $sum :: cpt => $cpt :: m => $moyenne \n" unless $DEBUG eq "no"; 331 | 332 | while ((my $key, my $value) = each %h) { 333 | my @arr= @{$value}; 334 | my $rep = $#arr+1; 335 | my $pourcent = ($rep*100)/$sum; 336 | #print "$key => $rep ::: $pourcent %\n\n"; 337 | 338 | $var += ($rep-$moyenne)*($rep-$moyenne); 339 | 340 | } 341 | 342 | if($nb > 0){ 343 | $var = $var/$nb; 344 | $et = sqrt($var); 345 | } 346 | 347 | 348 | print "moyenne = $moyenne :: nb = $nb :: variance = $var :: ecartype = $et\n" unless $DEBUG eq "no"; 349 | 350 | while ((my $key, my $value) = each %h) { 351 | 352 | my @arr= @{$value}; 353 | my $rep = $#arr+1; 354 | 355 | if($rep > 2*$et && $rep > 30){ 356 | print "FOUND = $key => $rep\n\n" unless $DEBUG eq "yes"; 357 | $result ++ ; 358 | } 359 | } 360 | 361 | 362 | #if($result > 0){ 363 | # $TESTS_CAT_2{"Pattern Repetition"} = "DETECTED"; 364 | #} 365 | 366 | return $result; 367 | 368 | 369 | } 370 | 371 | 372 | 373 | 374 | # This function detect a shellcode or suite of hexa insertion 375 | # Test2 files :: shellcode or hexa insertion 376 | # 5c08ea688165940008949a86805ff1d0 377 | # 5f27adfa55628ea4674348351e241be8 378 | # 73b0e8c5a7e5814c723295313ce0262d 379 | # 75c1ae242d07bb738a5d9a9766c2a7de 380 | # 7bcb4c9c35e01bd985f74aec66c19876 381 | # 84d860a4c9e8d2baec983ef35789449a 382 | # ab3f72df228715e6265cb222c586254e 383 | # b823473c7206d64fa3ce20c4669b707d 384 | # d785f43c523bf36d1678da84fa84617f 385 | # edab6ed2809f739b67667e8fed689992 386 | sub Shellcode_Detection{ 387 | 388 | my $objcontent = shift; 389 | my $res = 0; 390 | my @found; 391 | 392 | if(!$objcontent){ 393 | return 0; 394 | } 395 | 396 | # Remove white space for a better processing 397 | $objcontent =~ s/\s//g; 398 | 399 | 400 | # Shellcode detection // ou repetition de chiffres, separated by an element (,_\-...) 401 | 402 | # 73b0e8c5a7e5814c723295313ce0262d 403 | # 5f27adfa55628ea4674348351e241be8 404 | # 5c08ea688165940008949a86805ff1d0 405 | # 73b0e8c5a7e5814c723295313ce0262d 406 | # 7bcb4c9c35e01bd985f74aec66c19876 407 | # d785f43c523bf36d1678da84fa84617f 408 | # 75c1ae242d07bb738a5d9a9766c2a7de 409 | # ab3f72df228715e6265cb222c586254e 410 | # b823473c7206d64fa3ce20c4669b707d 411 | if( $objcontent =~ /(([\d]{1,2}[\/,%\$@^_]{1,2}){100})/ig){ 412 | print "\n\n:::TEST 2:::\n" unless $DEBUG eq "no"; 413 | print "DANGEROUS PATTERN 1 FOUND !!\n" unless $DEBUG eq "no"; 414 | $res ++; 415 | push @found, $1; 416 | #print "$1\n"; 417 | 418 | # TODO look for "split" pattern (or medium dangerous pattern) 419 | 420 | 421 | } 422 | 423 | 424 | if( $objcontent =~ /(([\d]{1,}[\/,%\$@^_-]{1,2}){100})/ig){ 425 | print "\n\n:::TEST 2:::\n" unless $DEBUG eq "no"; 426 | print "DANGEROUS PATTERN 1.1 FOUND !!\n" unless $DEBUG eq "no"; 427 | $res ++; 428 | push @found, $1; 429 | print "$1\n" unless $DEBUG eq "no"; 430 | 431 | # TODO look for "split" pattern (or medium dangerous pattern) 432 | 433 | } 434 | 435 | #pat = 9804c-9686c7351c-7254c27757c-27643c18532c-18500c32447c-32352c28309c-28201c10773c-10724c12582c-12521c 436 | # 84d860a4c9e8d2baec983ef35789449a 437 | #if( $objcontent =~ /([\dABCDEF\-]{100})/ig){ 438 | if( $objcontent =~ /(([\dABCDEF]{2,}[-]){100})/ig){ 439 | print "\n\n:::TEST 2:::\n" unless $DEBUG eq "no"; 440 | print "DANGEROUS PATTERN 2 FOUND !!\n" unless $DEBUG eq "no"; 441 | $res ++; 442 | push @found, $1; 443 | print "$1\n" unless $DEBUG eq "no"; 444 | } 445 | 446 | # edab6ed2809f739b67667e8fed689992 447 | #if( $objcontent =~ /([\d\/A-z,]{100})/ig){ 448 | 449 | #if($res eq "true"){ 450 | # $TESTS_CAT_2{"Shellcode"} = "DETECTED"; 451 | #} 452 | 453 | 454 | return $res; 455 | 456 | } 457 | 458 | 459 | 460 | 1; 461 | -------------------------------------------------------------------------------- /tools/perl_poc/lib/analysis/DocumentStruct.pm: -------------------------------------------------------------------------------- 1 | package DocumentStruct; 2 | 3 | use strict; 4 | use MIME::Base64 (); 5 | 6 | my $DEBUG = "no"; 7 | 8 | 9 | # Check the magic number of a PDF file 10 | sub CheckMagicNumber{ 11 | 12 | my $file_ref= shift; 13 | my $file = $file_ref; 14 | 15 | my $len=8; 16 | my $offset=0; 17 | my $ver="undef"; 18 | 19 | 20 | 21 | seek ($file, 0, 0); 22 | read $file, $ver, $len, $offset or print "read failed :: $!\n"; 23 | 24 | 25 | if( $ver =~ /\%PDF-\d\.\d/){ 26 | print "PDF header : OK\n" unless $DEBUG eq "no"; 27 | 28 | 29 | # Check if there is several headers in file 30 | seek ($file, 0, 0); 31 | my $content = do { local $/; <$file>}; 32 | 33 | my @pdf_headers = $content =~ /\%PDF-\d\.\d/sg; 34 | my $num = @pdf_headers; 35 | 36 | if($num > 1){ 37 | print "Warning :: CheckMagicNumber :: There are $num pdf headers in this file\n"; 38 | $main::TESTS_CAT_1{"Multiple Headers"} = $num; 39 | } 40 | 41 | return ($ver,"OK"); 42 | } 43 | 44 | # Check string 45 | seek ($file, 0, 0); # rewind file 46 | my $content = do { local $/; <$file>}; 47 | #print "$content\n"; 48 | if($content =~ /(.*)<\/chunk><\/document><\/pdf>/si){ 49 | #if($content =~ /(.*)<\/chunk>/si){ 50 | print "This document is an XML Data Package (XDP)\n" unless $DEBUG eq "no"; 51 | my $chunkContent = $1; 52 | #print "chunkContent = $chunkContent\n"; 53 | 54 | #decode base64 content 55 | my $decodedContent = MIME::Base64::decode($chunkContent) or print "Error while decoding base64 :: $!\n"; 56 | #print "decoded content = $decodedContent\n"; 57 | 58 | # write content in a new file 59 | close($file); 60 | open $file, ">tmp.pdf" or die "open failed in tmp.pdf : $! "; 61 | binmode $file; 62 | print $file $decodedContent; 63 | #print "file handle = $file\n"; 64 | close($file); 65 | open $file, "{"type"}) && $_->{"type"} eq "/Pages" ){ 110 | 111 | print "FOUND Pages object :: $_->{ref} :: \n" unless $DEBUG eq "yes"; 112 | 113 | # Get kid node pages 114 | my @pages = $_->{"kids"} =~ /(\d+\s\d\sR)/sg; 115 | #print @pages; 116 | 117 | foreach(@pages){ 118 | my $page_ref = $_; 119 | $page_ref =~ s/R/obj/; 120 | print "page ref = $page_ref\n"; 121 | 122 | # if the page exists and the /Content parameter is set 123 | if(exists($pdfObjects->{$page_ref}) && exists($pdfObjects->{$page_ref}->{"pagecontent"}) ){ 124 | 125 | # Check if it's not an empty content 126 | #my $p_content = $pdfObjects{$page_ref}->{"pagecontent"}; 127 | 128 | 129 | # If the Contents fiels is an array 130 | my @pcontents = $pdfObjects->{$page_ref}->{"pagecontent"} =~ /(\d+\s\d\sR)/sg; 131 | 132 | foreach (@pcontents){ 133 | 134 | my $content_page_obj = $_; 135 | $content_page_obj =~ s/R/obj/; 136 | 137 | print ":: page content = $content_page_obj :: \n";#" $pdfObjects{$contentp}->{content}\n"; 138 | 139 | if(exists($pdfObjects->{$content_page_obj}) && exists($pdfObjects->{$content_page_obj}->{"stream"}) && length($pdfObjects->{$content_page_obj}->{"stream"}) > 0 ){ 140 | $ret ++; 141 | print "Page $page_ref is not empty => OK\n"unless $DEBUG eq "no"; 142 | 143 | }elsif(! exists($pdfObjects->{$content_page_obj})){ 144 | print "Warning : Content Object ($content_page_obj) of page $page_ref doesn\'t exist\n" unless $DEBUG eq "no"; 145 | 146 | }elsif( exists($pdfObjects->{$content_page_obj}->{content}) ){ 147 | 148 | # Trigger the case when the object represents an array of objects Ex: [422 0 R 423 0 R 424 0 R 425 0 R 426 0 R 427 0 R 428 0 R 429 0 R] 149 | 150 | my @content_page_array = $pdfObjects->{$content_page_obj}->{"content"} =~ /(\d+\s\d\sR)/sg; 151 | 152 | foreach(@content_page_array){ 153 | 154 | my $content_page_obj_2 = $_; 155 | $content_page_obj_2 =~ s/R/obj/; 156 | #print " Found obj :: $content_page_obj_2\n"; 157 | 158 | if(exists($pdfObjects->{$content_page_obj_2})){ 159 | 160 | # TODEBUG print ""; 161 | my $test = $pdfObjects->{$content_page_obj_2} ; 162 | 163 | print "DEBUG :: ".$pdfObjects->{$content_page_obj_2}->{stream}."\n"; 164 | 165 | if( exists($pdfObjects->{$content_page_obj_2}->{"stream"}) && length($pdfObjects->{$content_page_obj_2}->{"stream"}) > 0 ){ 166 | $ret ++; 167 | print "Found content of the page $page_ref in obj $content_page_obj_2 => OK\n"unless $DEBUG eq "no"; 168 | }else{ 169 | print "Warning :: Page content Object ($content_page_obj_2) is empty !!!!\n" unless $DEBUG eq "yes"; 170 | } 171 | 172 | }else{ 173 | print "Warning :: Empty_Pages_Document_detection :: Page content Object ($content_page_obj_2) is not defined\n" unless $DEBUG eq "yes"; 174 | } 175 | } 176 | 177 | 178 | }else{ 179 | print "Warning :: Empty_Pages_Document_detection :: The Stream of the Content Object is empty\n" unless $DEBUG eq "yes"; 180 | 181 | } 182 | 183 | } 184 | 185 | 186 | }elsif(! exists($pdfObjects->{$page_ref})){ 187 | print "Warning :: Empty_Pages_Document_detection :: Page $page_ref does\'nt exist.\n" unless $DEBUG eq "o"; 188 | }else{ 189 | print "Warning :: Empty_Pages_Document_detection :: Page $page_ref is empty\n" unless $DEBUG eq "o"; 190 | } 191 | 192 | 193 | } 194 | 195 | } 196 | 197 | # TODO Verify that the number of treated pages is the number of pages in the document. 198 | 199 | } 200 | 201 | return $ret; 202 | 203 | } 204 | 205 | 206 | sub Empty_Pages_Document_detection{ 207 | 208 | #my $ref = shift; 209 | my $pdfObjects = shift; 210 | 211 | #print "DEBUG = $ref\n"; 212 | 213 | #my %pdfObjects = %{$ref}; 214 | 215 | 216 | my $ret=0; 217 | my $numPages =0; # Number of pages found 218 | my $active_content =0; # Number of js, embedded files 219 | 220 | print "\n\n ::: Empty Pages With Active Content detection ::: \n" unless $DEBUG eq "no"; 221 | 222 | my @objs = values(%{$pdfObjects}); 223 | foreach(@objs){ 224 | 225 | 226 | if( exists($_->{"type"}) && $_->{"type"} eq "/Pages" ){ 227 | 228 | print "FOUND Pages object :: $_->{ref} :: \n" unless $DEBUG eq "no"; 229 | 230 | # Get kid node pages 231 | my @pages = $_->{"kids"} =~ /(\d+\s\d\sR)/sg; 232 | #print @pages; 233 | 234 | foreach(@pages){ 235 | my $page_ref = $_; 236 | $page_ref =~ s/R/obj/; 237 | #print "page ref = $page_ref\n"; 238 | 239 | # if the page exists and the /Content parameter is set 240 | if($pdfObjects->{$page_ref}->{"type"} eq "/Page" && exists($pdfObjects->{$page_ref}) && exists($pdfObjects->{$page_ref}->{"pagecontent"}) ){ 241 | 242 | # Check if it's not an empty content 243 | #my $p_content = $pdfObjects{$page_ref}->{"pagecontent"}; 244 | 245 | 246 | # If the Contents fiels is an array 247 | my @pcontents = $pdfObjects->{$page_ref}->{"pagecontent"} =~ /(\d+\s\d\sR)/sg; 248 | 249 | foreach (@pcontents){ 250 | 251 | my $content_page_obj = $_; 252 | $content_page_obj =~ s/R/obj/; 253 | 254 | #print ":: page content = $content_page_obj :: \n";#" $pdfObjects{$contentp}->{content}\n"; 255 | 256 | if(exists($pdfObjects->{$content_page_obj}) && exists($pdfObjects->{$content_page_obj}->{"stream"}) && length($pdfObjects->{$content_page_obj}->{"stream"}) > 0 ){ 257 | $ret ++; 258 | print "Page $page_ref is not empty => OK\n"unless $DEBUG eq "no"; 259 | 260 | }elsif(! exists($pdfObjects->{$content_page_obj})){ 261 | print "Warning : Content Object ($content_page_obj) of page $page_ref doesn\'t exist\n" unless $DEBUG eq "yes"; 262 | 263 | }elsif( exists($pdfObjects->{$content_page_obj}->{content}) ){ 264 | 265 | # Trigger the case when the object represents an array of objects Ex: [422 0 R 423 0 R 424 0 R 425 0 R 426 0 R 427 0 R 428 0 R 429 0 R] 266 | 267 | my @content_page_array = $pdfObjects->{$content_page_obj}->{"content"} =~ /(\d+\s\d\sR)/sg; 268 | 269 | foreach(@content_page_array){ 270 | 271 | my $content_page_obj_2 = $_; 272 | $content_page_obj_2 =~ s/R/obj/; 273 | #print " Found obj :: $content_page_obj_2\n"; 274 | 275 | if(exists($pdfObjects->{$content_page_obj_2})){ 276 | 277 | # TODEBUG print ""; 278 | #my $test = $pdfObjects->{$content_page_obj_2}; 279 | 280 | #print "DEBUG :: ".$test->{stream}."\n"; 281 | 282 | if( exists($pdfObjects->{$content_page_obj_2}->{"stream"}) && length($pdfObjects->{$content_page_obj_2}->{"stream"}) > 0 ){ 283 | $ret ++; 284 | print "Found content of the page $page_ref in obj $content_page_obj_2 => OK\n"unless $DEBUG eq "no"; 285 | }else{ 286 | print "Warning :: Page content Object ($content_page_obj_2) is empty \n" unless $DEBUG eq "yes"; 287 | } 288 | 289 | }else{ 290 | print "Warning :: Empty_Pages_Document_detection :: Page content Object ($content_page_obj_2) is not defined\n" unless $DEBUG eq "yes"; 291 | } 292 | } 293 | 294 | 295 | }else{ 296 | print "Warning :: Empty_Pages_Document_detection :: The Stream of the Content Object is empty\n" unless $DEBUG eq "yes"; 297 | 298 | } 299 | 300 | } 301 | 302 | 303 | }elsif(! exists($pdfObjects->{$page_ref})){ 304 | print "Warning :: Empty_Pages_Document_detection :: Page $page_ref does\'nt exist.\n" unless $DEBUG eq "o"; 305 | }elsif( (! exists($pdfObjects->{$page_ref}->{"pagecontent"})) && $pdfObjects->{$page_ref}->{"type"} eq "/Page" ){ 306 | print "Warning :: Empty_Pages_Document_detection :: Page $page_ref is empty\n" unless $DEBUG eq "o"; 307 | } 308 | 309 | 310 | } 311 | 312 | } 313 | 314 | # TODO Verify that the number of treated pages is the number of pages in the document. 315 | 316 | } 317 | 318 | return $ret; 319 | 320 | } 321 | 322 | 323 | 324 | 325 | 326 | 327 | # This function check if the xref table is conform 328 | # TODO return 0 if failed and 1 if sucess and the error status 329 | sub Check_xref{ 330 | 331 | my ($trailer, $fh, $pdfObjects_ref) = @_; 332 | my $xref_offset; 333 | my $len=4; # "xref" string length. 334 | my $res; 335 | my $ret = 0; 336 | 337 | my %pdfObjects = %{$pdfObjects_ref}; 338 | 339 | # Get the startxref offset in the trailer 340 | if ($trailer =~ /startxref\s*(\d+)\s*%%EOF/){ 341 | $xref_offset = $1; 342 | }else{ 343 | #return (0,$BAD_XREF_OFFSET); 344 | return 0; 345 | } 346 | print "\nxref_offset = $xref_offset\n" unless $DEBUG eq "no"; 347 | 348 | 349 | # Test XRef keyword 350 | seek ($fh, $xref_offset, 0); # Go to the xref offset 351 | read ($fh, $res, $len) or print "Check_xref :: read failed :: $!\n"; 352 | print "res = $res\n" unless $DEBUG eq "no"; 353 | 354 | 355 | if($res ne "xref"){ # Test for object stream reference 356 | $len = 10; 357 | seek ($fh, $xref_offset, 0); # Go to the xref offset 358 | read ($fh, $res, $len) or print "Check_xref :: read failed :: $!\n"; 359 | print "res2 = $res\n" unless $DEBUG eq "no"; 360 | 361 | if($res =~ /^(\d+\s\d\sobj)/){ 362 | # TODO decode xref stream. 363 | #print ""; 364 | # Check if the object is well a XRef type object 365 | my $obj_ref= $1; 366 | 367 | if(exists($pdfObjects{$obj_ref}) && $pdfObjects{$obj_ref}->{"type"} eq "/XRef"){ 368 | return 1; 369 | }else{ 370 | return 0; 371 | } 372 | 373 | 374 | }else{ 375 | #print "BAD xref offset!!\n"; 376 | #return $BAD_XREF_OFFSET; 377 | #return (0,$BAD_XREF_OFFSET); 378 | return 0; 379 | } 380 | 381 | } 382 | 383 | # Get xref entries 384 | my $xref_content=$res; 385 | #print "Offset position = ".tell($fh)."\n" unless $DEBUG eq "no"; 386 | my $i=5; 387 | while(!( $xref_content =~ /trailer$/)){ 388 | 389 | read ($fh, $xref_content, 1, $i) or print "Check_xref :: read failed :: $!\n"; 390 | $i++; 391 | } 392 | 393 | print "$xref_content\n" unless $DEBUG eq "no"; 394 | 395 | # nnnnnnnnnn ggggg n eol 396 | # nnnnnnnnnn is a 10-digit byte offset 397 | # ggggg is a 5-digit generation number 398 | # n is a literal keyword identifying this as an in-use entry 399 | # my @xref_entries = $xref_content =~ /(\d{10}\s\d{5}\s[f|n]\n)/; 400 | my $first_obj; 401 | my $number_of_entries; 402 | if($xref_content =~ /(\d{1,3})\s(\d{1,3})/g){ 403 | $first_obj = $1; 404 | $number_of_entries=$2; 405 | print "$first_obj :: $number_of_entries\n\n" unless $DEBUG eq "no"; 406 | } 407 | my @xref_entries = $xref_content =~ /(\d{10}\s\d{5}\s[f|n])/g; 408 | 409 | # @pdf_objects; 410 | 411 | # Check object's offets 412 | my $id=0; 413 | foreach(@xref_entries){ 414 | 415 | if(/(\d{10})\s(\d{5})\s([f|n])/){ 416 | 417 | #print "\n$1::$2::$3\n"; 418 | my $off = $1; 419 | my $gen = $2; 420 | my $free = $3; 421 | 422 | my $digit = length($id); 423 | 424 | $len = 6+length($id); # len depends of the number of digit of ref id 425 | 426 | seek ($fh, $off, 0); 427 | read ($fh, $res, $len) or print "Check_xref :: read failed :: off=$off :: len=$len\n"; 428 | chomp $res; 429 | 430 | #print "res = $res\n" if $id == 100; 431 | 432 | if($res =~/($id\s0\sobj)/ or $free ne "n"){ 433 | 434 | my $obj_ref = $1; 435 | 436 | # save the object's offset 437 | if(exists($pdfObjects{$obj_ref}) ){ 438 | print "object $obj_ref is at offset $off\n" unless $DEBUG eq "no"; 439 | $pdfObjects{$obj_ref}->{"offset"} = $off ; 440 | } 441 | 442 | 443 | }else{ 444 | print "WRONG Object offset :: $id $gen obj :: offset $off\n"unless $DEBUG eq "yes"; 445 | #$ret = $BAD_OBJ_OFFSET; 446 | #return (0,$BAD_OBJ_OFFSET); 447 | return 0; 448 | } 449 | $id ++; 450 | 451 | } 452 | } 453 | 454 | 455 | return 1; 456 | } 457 | 458 | 459 | 460 | 461 | 462 | 1; 463 | __END__ 464 | -------------------------------------------------------------------------------- /tools/perl_poc/lib/utils/CleanRewriting.pm: -------------------------------------------------------------------------------- 1 | package CleanRewriting; 2 | 3 | use strict; 4 | 5 | use lib::utils::Filters; 6 | use File::Basename; 7 | 8 | #use bytes; 9 | 10 | my $DEBUG = "no"; 11 | 12 | 13 | # This function remove the JavaScript content of an object (not an object stream) 14 | sub RemoveJSContentFromObj{ 15 | 16 | my ($obj,$pdfObjects) = @_; 17 | 18 | 19 | print "The object is at offset $pdfObjects->{$obj}->{objStmOff} in object stream\n"; 20 | 21 | 22 | my $len = length($pdfObjects->{$obj}->{js}); 23 | 24 | print "js len = $len\n"; 25 | 26 | 27 | my $comment = ""; 28 | my $pat= $pdfObjects->{$obj}->{content}; 29 | for(my $i =0; $i <$len-2 ; $i++){ 30 | $comment.=" "; 31 | } 32 | $comment="(".$comment.")"; 33 | 34 | # Get the offset of the js content 35 | my $off = index($pdfObjects->{$obj}->{content}, $pdfObjects->{$obj}->{js}); 36 | 37 | 38 | print "verif1 :: $pdfObjects->{$obj}->{content}\n"; 39 | 40 | # Replace js content by empty string 41 | substr($pdfObjects->{$obj}->{content},$off,$len,$comment); 42 | 43 | print "verif2 :: $pdfObjects->{$obj}->{content}\n"; 44 | 45 | return 0; 46 | } 47 | 48 | sub RemoveJSContentFromXFA{ 49 | 50 | my ($obj,$pdfObjects) = @_; 51 | 52 | #print " Content = $pdfObjects->{$obj}->{stream_d}\n\n" if $obj eq "26 0 obj"; 53 | 54 | # 55 | # 56 | #my @js_content = $pdfObjects->{$obj}->{stream_d} =~ /(javascript)/gi ; 57 | #my @js_content = $pdfObjects->{$obj}->{stream_d} =~ /(