├── module
├── modulePDF.sym
├── Makefile.am
├── modulePDF.h
└── modulePDF.c
├── tools
├── Makefile.am
├── armadito-pdf
│ ├── Makefile.am
│ └── main.c
├── cli_analyzer
│ ├── Makefile
│ ├── scandir.bat
│ ├── scandir.sh
│ └── main.c
├── perl_poc
│ └── lib
│ │ ├── conf
│ │ └── Config.pm
│ │ ├── analysis
│ │ ├── CVEs.pm
│ │ ├── ObjectAnalysis.pm
│ │ └── DocumentStruct.pm
│ │ └── utils
│ │ └── CleanRewriting.pm
└── cli_parser
│ └── parser.c
├── autogen.sh
├── Makefile.am
├── .gitignore
├── sonar-project.properties
├── lib
├── libarmadito-pdf.pc.in
├── Makefile.am
├── TODO
├── src
│ ├── log.c
│ ├── osdeps.c
│ ├── armaditopdf.c
│ ├── pdfStructs.c
│ ├── utils.c
│ └── pdfStructAnalysis.c
├── includes
│ ├── pdfParsing.h
│ ├── pdfAnalysis.h
│ ├── osdeps.h
│ ├── log.h
│ ├── utils.h
│ ├── armaditopdf.h
│ ├── filters.h
│ └── pdfStructs.h
└── Spec.txt
├── win32
└── ArmaditoPDF
│ ├── ArmaditoPDF.sln
│ └── ArmaditoPDF
│ ├── ArmaditoPDF.vcxproj.filters
│ └── ArmaditoPDF.vcxproj
├── README.md
├── CHANGES
├── configure.ac
└── .travis.yml
/module/modulePDF.sym:
--------------------------------------------------------------------------------
1 | module
2 |
--------------------------------------------------------------------------------
/tools/Makefile.am:
--------------------------------------------------------------------------------
1 | SUBDIRS=armadito-pdf
--------------------------------------------------------------------------------
/autogen.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -x
3 | aclocal --force
4 | libtoolize --force --automake --copy
5 | #autoheader --force
6 | automake --foreign --add-missing --force-missing --copy
7 | autoconf --force
8 |
9 |
--------------------------------------------------------------------------------
/Makefile.am:
--------------------------------------------------------------------------------
1 | if COND_LIBRARY
2 | LIB_DIR = lib
3 | endif
4 |
5 | if COND_MODULE
6 | MOD_DIR = module
7 | endif
8 |
9 | if COND_TOOLS
10 | TOOLS_DIR = tools
11 | endif
12 |
13 | SUBDIRS = $(LIB_DIR) $(TOOLS_DIR) $(MOD_DIR)
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | Makefile
2 | Makefile.in
3 | autom4te.cache/*
4 | compile
5 | config.log
6 | config.status
7 | configure
8 | aclocal.m4
9 | config.guess
10 | config.sub
11 | depcomp
12 | install-sh
13 | ltmain.sh
14 | missing
15 | *~
16 | version.m4
17 | *.lo
18 | *.o
19 |
--------------------------------------------------------------------------------
/sonar-project.properties:
--------------------------------------------------------------------------------
1 | sonar.projectKey=armadito:mod-pdf:DEV
2 | sonar.projectName=armadito-mod-pdf
3 | sonar.projectVersion=1.0
4 | sonar.sources=.
5 | sonar.branch=DEV
6 | sonar.exclusions=armadito-av/**/*,cov-int/**/*
7 | sonar.cfamily.build-wrapper-output=build-wrapper-out
8 |
--------------------------------------------------------------------------------
/tools/armadito-pdf/Makefile.am:
--------------------------------------------------------------------------------
1 | AUTOMAKE_OPTIONS=subdir-objects no-dependencies
2 |
3 | AM_CPPFLAGS=
4 | armadito_pdf_LDADD= @LIBARMADITO_PDF_LIBS@
5 | armadito_pdf_CFLAGS= @LIBARMADITO_PDF_CFLAGS@
6 | #armadito_pdf_CFLAGS= -I$(top_srcdir)/lib/includes
7 |
8 |
9 |
10 | bin_PROGRAMS = armadito-pdf
11 | armadito_pdf_SOURCES= main.c
--------------------------------------------------------------------------------
/lib/libarmadito-pdf.pc.in:
--------------------------------------------------------------------------------
1 | prefix=@prefix@
2 | exec_prefix=@exec_prefix@
3 | includedir=@includedir@
4 | libdir=@libdir@
5 |
6 | Name: libarmadito-pdf
7 | Description: Armadito PDF library
8 | URL: https://github.com/armadito/armadito-mod-pdf
9 | Version: @PACKAGE_VERSION@
10 | Cflags: -I${includedir}
11 | Libs: -L${libdir} -larmadito-pdf
--------------------------------------------------------------------------------
/tools/cli_analyzer/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | gcc -g -o a6oPDFAnalyzer -Wall -Wextra main.c ../../lib/src/*.c -I../../lib/includes
3 |
4 | lib:
5 | gcc -fPIC -g -c -Wall -Wextra src/*.c -Iincludes
6 | gcc -shared -Wl,-soname,a6oPDFAnalyzer.so.1 -o a6oPDFAnalyzer-1.0.1.so *.o -lc
7 |
8 |
9 | clean:
10 | rm a6oPDFAnalyzer
11 | rm *.o
12 |
--------------------------------------------------------------------------------
/tools/cli_analyzer/scandir.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 |
3 | set ANALYZER="a6oPDFAnalyzer.exe"
4 | set DIRPATH=%1
5 |
6 | if [%1]==[] goto :help
7 |
8 | REM for /R in %DIRPATH%\ %%A do echo "entry = %%A"
9 | REM - FOR /R [[drive:]path] %%parameter IN (set) DO command
10 | FOR /R %DIRPATH% %%E IN (*) DO echo %%E && %ANALYZER% %%E >> result.txt
11 |
12 | goto :end
13 |
14 | :help
15 | echo syntax: scandir.bat [directoryPath]
16 |
17 | :end
18 | echo - Exiting...
--------------------------------------------------------------------------------
/module/Makefile.am:
--------------------------------------------------------------------------------
1 | AUTOMAKE_OPTIONS=subdir-objects
2 |
3 | modulesdir=$(libdir)/armadito/modules
4 | modules_LTLIBRARIES=modulePDF.la
5 |
6 |
7 | modulePDF_la_SOURCES= \
8 | modulePDF.c \
9 | modulePDF.h
10 |
11 |
12 | AM_CPPFLAGS=
13 | modulePDF_la_LDFLAGS= -module -avoid-version -export-symbols "$(srcdir)/modulePDF.sym"
14 | modulePDF_la_CFLAGS= -I$(top_srcdir)/lib/includes
15 | modulePDF_la_LIBADD=../lib/libarmadito-pdf.la
16 | modulePDF_la_CFLAGS+= @LIBARMADITO_CFLAGS@
17 | modulePDF_la_LIBADD+= @LIBARMADITO_LIBS@
18 |
19 |
20 | install-exec-hook:
21 | -rm -f "$(DESTDIR)$(modulesdir)"/modulePDF.la "$(DESTDIR)$(modulesdir)"/modulePDF.a
22 |
23 | install-data-hook:
24 | -rm -f "$(DESTDIR)$(modulesdir)"/modulePDF.la "$(DESTDIR)$(modulesdir)"/modulePDF.a
25 |
26 | EXTRA_DIST=modulePDF.sym
--------------------------------------------------------------------------------
/tools/cli_analyzer/scandir.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # This script scan all pdf files in a directory given in parameter
4 | # return the results in another file given as second parameter
5 | # the stats are stored in the stat.txt file
6 |
7 | # check parameters
8 |
9 | # VARIABLES
10 | DIR=$1
11 | RES_FILE=$2
12 | EXE=./a6oPDFAnalyzer
13 |
14 | if [ -z "$1" ]
15 | then
16 | echo "Missing parameters";
17 | echo "SYNTAX :: scandir.sh [directory] [result_file]";
18 | exit -1;
19 | fi
20 |
21 | if [ -z "$2" ]
22 | then
23 | echo "Missing parameters"
24 | echo "SYNTAX :: scandir.sh [directory] [result_file]"
25 | ecit -2;
26 | fi
27 |
28 | for f in $DIR/* ; do
29 | echo "Processing $f ...";
30 | $EXE "$f" >> $RES_FILE
31 | #mv "$f" $DIR/Treated/
32 | done
33 |
34 |
35 | # Stats coef
36 | more $RES_FILE | grep -e 'Coef =' | sort | uniq -c > stats.txt
37 |
38 |
39 | exit 0;
40 |
--------------------------------------------------------------------------------
/module/modulePDF.h:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 | #include
23 | #include
24 |
25 | #include
26 | #include
27 |
28 | #define MALICIOUS_COEF 70
--------------------------------------------------------------------------------
/win32/ArmaditoPDF/ArmaditoPDF.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 2013
4 | VisualStudioVersion = 12.0.31101.0
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ArmaditoPDF", "ArmaditoPDF\ArmaditoPDF.vcxproj", "{667A295C-61CD-47A7-AAFC-5B7F6088CDB5}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|Win32 = Debug|Win32
11 | Release|Win32 = Release|Win32
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {667A295C-61CD-47A7-AAFC-5B7F6088CDB5}.Debug|Win32.ActiveCfg = Debug|Win32
15 | {667A295C-61CD-47A7-AAFC-5B7F6088CDB5}.Debug|Win32.Build.0 = Debug|Win32
16 | {667A295C-61CD-47A7-AAFC-5B7F6088CDB5}.Release|Win32.ActiveCfg = Release|Win32
17 | {667A295C-61CD-47A7-AAFC-5B7F6088CDB5}.Release|Win32.Build.0 = Release|Win32
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | EndGlobal
23 |
--------------------------------------------------------------------------------
/tools/perl_poc/lib/conf/Config.pm:
--------------------------------------------------------------------------------
1 | package Config;
2 |
3 | use strict;
4 |
5 | # GLOBAL CONFIG
6 | our $DEBUG = "no";
7 | our $ANALYSIS_TIMEOUT = 5;
8 | our $MAX_REP_DETECTION = 150;
9 |
10 |
11 | # PDF STRUCT TESTS coefs
12 | our $ENCRYPTED_PDF = "ENCRYPTED_PDF";
13 | our $EMPTY_PAGES_WITH_ACTIVE_CONTENT = 99;
14 | our $EMPTY_PAGES_CONTENT = 70;
15 | our $OBJECT_COLLISION_PLUS_BAD_XREF = 90;
16 | our $OBJECT_COLLISION = 10;
17 | our $BAD_XREF_OFFSET = 30;
18 | our $TRAILER_NOT_FOUND = 30;
19 | our $BAD_TRAILER = 40;
20 | our $OBFUSCATED_OBJECTS = 40;
21 | our $MALICIOUS_URI = 50;
22 | our $MULTIPLE_HEADERS = 50;
23 |
24 |
25 |
26 | # OBJECT ANALYSIS TESTS coefs
27 | our $ACTIVE_CONTENT = 40;
28 | our $SHELLCODE = 40;
29 | our $PATTERN_REPETITION = 40;
30 | our $DANGEROUS_PATTERN_HIGH = 90;
31 | our $DANGEROUS_PATTERN_MEDIUM = 40;
32 | our $DANGEROUS_PATTERN_LOW = 20;
33 | our $TIME_EXCEEDED = 20;
34 |
35 |
36 | # CVEs TESTS
37 | our $CVE_2010_2883_DETECTED = 50;
38 | our $CVE_2010_2883_BAD_FONT_FILE_LENGTH = 40;
39 |
40 |
41 | our $MALWARE_DETECTION_COEF = 70;
42 |
43 | #
44 |
45 | 1;;
46 |
--------------------------------------------------------------------------------
/lib/Makefile.am:
--------------------------------------------------------------------------------
1 | AUTOMAKE_OPTIONS=subdir-objects no-dependencies
2 |
3 | lib_LTLIBRARIES = libarmadito-pdf.la
4 | libarmadito_pdf_la_LDFLAGS = -version-number 0:12:6
5 |
6 | AM_CPPFLAGS= -I$(top_srcdir)/lib/includes
7 |
8 | libarmadito_pdf_la_SOURCES = \
9 | $(top_srcdir)/lib/src/armaditopdf.c \
10 | $(top_srcdir)/lib/src/filters.c \
11 | $(top_srcdir)/lib/src/log.c \
12 | $(top_srcdir)/lib/src/osdeps.c \
13 | $(top_srcdir)/lib/src/pdfObjectsAnalysis.c \
14 | $(top_srcdir)/lib/src/pdfParsing.c \
15 | $(top_srcdir)/lib/src/pdfStructAnalysis.c \
16 | $(top_srcdir)/lib/src/pdfStructs.c \
17 | $(top_srcdir)/lib/src/utils.c
18 |
19 | armadito_pdfincludedir=$(includedir)/libarmadito-pdf
20 |
21 | armadito_pdfinclude_HEADERS =\
22 | $(top_srcdir)/lib/includes/armaditopdf.h \
23 | $(top_srcdir)/lib/includes/filters.h \
24 | $(top_srcdir)/lib/includes/log.h \
25 | $(top_srcdir)/lib/includes/miniz.c \
26 | $(top_srcdir)/lib/includes/osdeps.h \
27 | $(top_srcdir)/lib/includes/pdfAnalysis.h \
28 | $(top_srcdir)/lib/includes/pdfParsing.h \
29 | $(top_srcdir)/lib/includes/pdfStructs.h \
30 | $(top_srcdir)/lib/includes/utils.h
31 |
32 |
33 | pkgconfigdir = $(libdir)/pkgconfig
34 | pkgconfig_DATA = libarmadito-pdf.pc
35 |
36 | libarmadito-pdf.pc: libarmadito-pdf.pc.in
37 | sed -e 's![@]prefix[@]!$(prefix)!g' \
38 | -e 's![@]exec_prefix[@]!$(exec_prefix)!g' \
39 | -e 's![@]includedir[@]!$(includedir)!g' \
40 | -e 's![@]libdir[@]!$(libdir)!g' \
41 | -e 's![@]PACKAGE_VERSION[@]!$(PACKAGE_VERSION)!g' \
42 | $(top_srcdir)/lib/libarmadito-pdf.pc.in > $@
--------------------------------------------------------------------------------
/lib/TODO:
--------------------------------------------------------------------------------
1 |
2 | ---------
3 | TODO LIST
4 | ---------
5 |
6 | // TODO :: checkMagicNumber :: search the header in the 1024 first bytes.
7 | // TODO :: checkMagicNumber :: Thread XDP files.
8 | // TODO :: printAnalysisReport :: filter report informations by log level.
9 | // TODO :: getPDFContent :: set max_size limit.
10 | // TODO :: removeComments :: split this function (implement function get_line, etc.)
11 | // TODO :: check offset :: if the document has been uncommented. the offset should be incorrects.
12 | // TODO :: getObjectInfo :: fill obj->dico_len;
13 | // TODO :: replaceString :: replace all occurrences.
14 | // TODO :: Notation :: empty_doc_with_active_content (if no error).
15 | // TODO :: getActions :: get other potentially dangerous actions (OpenActions - GoToE - GoToR - etc.)
16 | // TODO :: decodeObjectStream :: check if the stream is encrypted. (/Encrypt in the dico)
17 | // TODO :: pdfParsing :: continue if the parsing failure is due to bad stream decode.
18 | // TODO :: getJSContentInXFA :: Check the keyword javascript
19 | // TODO :: decodeObjectStream :: do not try to decode an object twice.
20 | // TODO :: getEmbeddedFile :: Threat the case <> >>
21 | // TODO :: all :: set error codes.
22 | // TODO :: all :: use obj->dico_len instead of strlen(dico).
23 | // TODO :: documentStructureAnalysis :: check trailers.
24 | // TODO :: TOFIX :: filters implementations.
25 | // TODO :: TOFIX :: removeComments() function implementation.
26 | // TODO :: FlateDecode :: check if the stream is conform (Ex: '\r')
27 | // TODO :: all :: declare a public API. (for version 1.0.0)
28 | // TODO :: all :: api documentation.
--------------------------------------------------------------------------------
/lib/src/log.c:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 | #include "log.h"
23 |
24 |
25 | static enum log_level current_max_level = default_max_level;
26 |
27 |
28 | void set_current_log_level(enum log_level level){
29 |
30 | current_max_level = level;
31 |
32 | return;
33 | }
34 |
35 | char * lvl_tostring(enum log_level level){
36 |
37 | switch (level){
38 | case LOG_LEVEL_ERROR:
39 | return "";
40 | case LOG_LEVEL_WARNING:
41 | return "";
42 | case LOG_LEVEL_INFO:
43 | return "";
44 | case LOG_LEVEL_DEBUG:
45 | return "";
46 | default:
47 | return "";
48 | }
49 |
50 | }
51 |
52 | void cli_log(enum log_level level, const char * fmt, ...){
53 |
54 | va_list ap;
55 |
56 | if (level > current_max_level)
57 | return;
58 |
59 | printf("%s ", lvl_tostring(level));
60 |
61 | va_start(ap, fmt);
62 | vprintf(fmt, ap);
63 | va_end(ap);
64 |
65 |
66 | return;
67 | }
--------------------------------------------------------------------------------
/lib/includes/pdfParsing.h:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 |
23 |
24 | #ifndef _pdf_Parsing_h_
25 | #define _pdf_Parsing_h_
26 |
27 |
28 | #include "pdfStructs.h"
29 |
30 |
31 | #define LARGE_FILE_SIZE 1500000
32 |
33 |
34 | /***** pdf Parsing functions prototypes *****/
35 |
36 | int parsePDF(struct pdfDocument * pdf);
37 | int checkMagicNumber(struct pdfDocument * pdf);
38 | int getPDFContent(struct pdfDocument * pdf);
39 | int extractObjectFromObjStream(struct pdfDocument * pdf, struct pdfObject *obj);
40 | int getObjectInfos(struct pdfObject * obj, struct pdfDocument * pdf);
41 | int getPDFObjects(struct pdfDocument * pdf);
42 | int getPDFTrailers(struct pdfDocument * pdf);
43 | int getPDFTrailers_2(struct pdfDocument * pdf);
44 | int decodeObjectStream(struct pdfObject * obj);
45 | int removeComments(struct pdfDocument * pdf);
46 | char * getObjectDictionary(struct pdfObject * obj, struct pdfDocument * pdf);
47 | char * getObjectType(struct pdfObject * obj);
48 | char * getObjectStream(struct pdfObject * obj);
49 | char * getStreamFilters(struct pdfObject * obj);
50 | char * hexaObfuscationDecode(char * dico);
51 | char *removeCommentLine(char * src, int size, int * ret_len);
52 |
53 |
54 | #endif
55 |
--------------------------------------------------------------------------------
/lib/includes/pdfAnalysis.h:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 |
23 |
24 | #ifndef _pdf_Analysis_h_
25 | #define _pdf_Analysis_h_
26 |
27 |
28 | #include "pdfStructs.h"
29 |
30 |
31 | /***** pdf Structure analysis functions prototypes *****/
32 | int documentStructureAnalysis(struct pdfDocument * pdf);
33 | int checkXRef(struct pdfDocument * pdf);
34 | int checkEmptyDocument(struct pdfDocument * pdf);
35 | int checkTrailer(struct pdfDocument * pdf);
36 |
37 |
38 | /***** pdf Objects analysis functions prototypes *****/
39 | int getDangerousContent(struct pdfDocument* pdf);
40 | int getJavaScript(struct pdfDocument * pdf, struct pdfObject* obj);
41 | int getXFA(struct pdfDocument * pdf, struct pdfObject* obj);
42 | int getEmbeddedFile(struct pdfDocument * pdf, struct pdfObject* obj);
43 | int getInfoObject(struct pdfDocument * pdf);
44 | int unknownPatternRepetition(char * stream, int size, struct pdfDocument * pdf, struct pdfObject * obj);
45 | int findDangerousKeywords(char * stream, struct pdfDocument * pdf, struct pdfObject * obj);
46 | int getURI(struct pdfDocument * pdf, struct pdfObject * obj);
47 | int getJSContentInXFA(char * stream, int size, struct pdfObject * obj, struct pdfDocument * pdf);
48 |
49 | #endif
50 |
--------------------------------------------------------------------------------
/lib/includes/osdeps.h:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 | #ifndef _os_deps_h_
23 | #define _os_deps_h_
24 |
25 | #include
26 |
27 | typedef int(*dirent_scan_cb)(int fd, char * filename);
28 | int os_scan_dir(char * path, int recurse, dirent_scan_cb dirent_cb, void * data);
29 |
30 | #ifdef _WIN32
31 |
32 | #include
33 | #include
34 | #define os_strncat strncat_s
35 | #define os_sprintf sprintf_s
36 | #define os_sscanf sscanf_s
37 | #define os_strncpy strncpy_s
38 | #define os_strdup _strdup
39 | #define os_lseek _lseek
40 | #define os_read _read
41 | #define os_fileno _fileno
42 | FILE * os_fopen(const char * filename, const char * mode);
43 |
44 |
45 | #else
46 |
47 | #include
48 | #define os_fopen fopen
49 | #define os_sprintf snprintf
50 | #define os_sscanf sscanf
51 | #define os_strdup strdup
52 | #define os_lseek lseek
53 | #define os_read read
54 | #define os_fileno fileno
55 | //#define os_sprintf(buffer,sizeOfBuffer, format,...) sprintf(buffer, format,...)
56 | int os_strncat(char *strDest, size_t numberOfElements, const char *strSource, size_t count);
57 | int os_strncpy(char *strDest, size_t numberOfElements, const char *strSource, size_t count);
58 |
59 | #endif
60 |
61 | #endif
--------------------------------------------------------------------------------
/lib/includes/log.h:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 | #ifndef _log_h_
23 | #define _log_h_
24 |
25 | #include
26 | #include
27 |
28 |
29 | #define default_max_level LOG_LEVEL_WARNING
30 | #define print_report 1 // print the analysis report.
31 |
32 | enum log_level {
33 | LOG_LEVEL_ERROR = 1 << 1,
34 | LOG_LEVEL_WARNING = 1 << 2,
35 | LOG_LEVEL_INFO = 1 << 3,
36 | LOG_LEVEL_DEBUG = 1 << 4,
37 | LOG_LEVEL_NONE = 1 << 5,
38 | };
39 |
40 | void cli_log(enum log_level level, const char * fmt, ...);
41 | void set_current_log_level(enum log_level level);
42 |
43 | #ifdef _WIN32
44 |
45 | #define err_log(fmt, ...) cli_log(LOG_LEVEL_ERROR,(fmt),__VA_ARGS__)
46 | #define warn_log(fmt, ...) cli_log(LOG_LEVEL_WARNING,(fmt),__VA_ARGS__)
47 | #define dbg_log(fmt, ...) cli_log(LOG_LEVEL_DEBUG,(fmt),__VA_ARGS__)
48 | #define info_log(fmt, ...) cli_log(LOG_LEVEL_INFO,(fmt),__VA_ARGS__)
49 |
50 | #else
51 |
52 | #define err_log(fmt, ...) cli_log(LOG_LEVEL_ERROR,(fmt),##__VA_ARGS__)
53 | #define warn_log(fmt, ...) cli_log(LOG_LEVEL_WARNING,(fmt),##__VA_ARGS__)
54 | #define dbg_log(fmt, ...) cli_log(LOG_LEVEL_DEBUG,(fmt),##__VA_ARGS__)
55 | #define info_log(fmt, ...) cli_log(LOG_LEVEL_INFO,(fmt),##__VA_ARGS__)
56 |
57 | #endif
58 |
59 |
60 | #endif
--------------------------------------------------------------------------------
/lib/includes/utils.h:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 |
23 |
24 | #ifndef _utils_h_
25 | #define _utils_h_
26 |
27 |
28 | #include "pdfStructs.h"
29 |
30 | /* Utils functions prototypes */
31 |
32 | void * searchPattern(char* src, char* pat , int pat_size , int size);
33 | struct pdfObject * getPDFObjectByRef(struct pdfDocument * pdf, char * ref);
34 | struct pdfObject * getPDFNextObjectByRef(struct pdfDocument * pdf, struct pdfObject * obj, char * ref);
35 | void printObject(struct pdfObject * obj);
36 | void printObjectByRef(struct pdfDocument * pdf, char * ref);
37 | void printObjectInFile(struct pdfObject * obj);
38 | void printPDFObjects(struct pdfDocument * pdf);
39 | int getNumber(char* ptr, int size);
40 | char* getNumber_s(char* ptr, int size);
41 | char * getIndirectRef(char * ptr, int size);
42 | char * getDelimitedStringContent(char * src, char * delimiter1, char * delimiter2, int src_len);
43 | char * getIndirectRefInString(char * ptr, int size);
44 | char * getPattern(char * ptr, int size, int len);
45 | char * getUnicodeInString(char * stream, int size);
46 | char * getHexa(char * dico, int size);
47 | char * replaceInString(char * src, char * toReplace , char * pat);
48 | char * toBinary(char * stream, int size);
49 | char * binarytoChar(char * binary, int size, int * returned_size);
50 | void printStream(char * stream, int size);
51 | void debugPrint(char * stream, int len); // print in a debug file
52 |
53 |
54 |
55 |
56 | #endif
57 |
--------------------------------------------------------------------------------
/lib/includes/armaditopdf.h:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 |
23 |
24 | #ifndef _armadito_pdf_h_
25 | #define _armadito_pdf_h_
26 |
27 | #include "pdfStructs.h"
28 |
29 | #define a6o_pdf_ver "0.12.6"
30 |
31 |
32 | // Tests Coefficients
33 |
34 | //#define bad_header
35 | //#define encrypted
36 | #define EMPTY_PAGE_CONTENT 99
37 | #define OBJECT_COLLISION 10
38 | #define OBJECT_COLLISION_AND_BAD_XREF 60
39 | #define BAD_TRAILER 40
40 | #define BAD_XREF_OFFSET 30
41 | #define BAD_OBJ_OFFSET 20
42 | #define OBFUSCATED_OBJECT 50
43 | #define MULTIPLE_HEADERS 50
44 | #define MALICIOUS_COMMENTS 50
45 |
46 | #define ACTIVE_CONTENT 40
47 | #define SHELLCODE 40
48 | #define PATTERN_HIGH_REPETITION 40
49 | #define DANGEROUS_KEYWORD_HIGH 90
50 | #define DANGEROUS_KEYWORD_MEDIUM 40
51 | #define DANGEROUS_KEYWORD_LOW 20
52 | #define TIME_EXCEEDED 20
53 |
54 | //#define LARGE_FILE_SIZE 1500000
55 |
56 |
57 | enum err_code {
58 |
59 | unexpected_error = -1 << 0,
60 | bad_pdf_version = -1 << 1,
61 | bad_pdf_header = -1 << 2,
62 | unsupported_file = -1 << 3,
63 | bad_xref_format = -1 << 4,
64 | bad_trailer_format = -1 << 5,
65 | bad_object_format = -1 << 6,
66 | stream_not_decoded = -1 << 7
67 |
68 | };
69 |
70 |
71 | /* Functions */
72 | char * getVersion();
73 | int analyzePDF(char * filename);
74 | int analyzePDF_fd(int fd, char * filename);
75 | int analyzePDF_ex(int fd, char * filename);
76 | int calcSuspiciousCoefficient(struct pdfDocument * pdf);
77 | void printAnalysisReport(struct pdfDocument * pdf);
78 |
79 |
80 |
81 | #endif
82 |
--------------------------------------------------------------------------------
/lib/includes/filters.h:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 |
23 |
24 | #ifndef _filters_h_
25 | #define _filters_h_
26 |
27 |
28 | #include "pdfStructs.h"
29 |
30 |
31 | // LZWDecode
32 | #define FIRST_CODE 258
33 | #define EOD_MARKER 257
34 | #define CLEAR_TABLE 256
35 |
36 | #define MAX_CODES 512
37 |
38 | struct LZWdico{
39 |
40 | unsigned short code;
41 | char * entry;
42 | int entry_len;
43 |
44 | struct LZWdico * next;
45 | };
46 |
47 | /* Functions prototypes */
48 |
49 | char * FlateDecode(char * stream, struct pdfObject* obj);
50 | char * ASCIIHexDecode(char * stream, struct pdfObject * obj);
51 | char * LZWDecode(char* stream, struct pdfObject * obj);
52 | char * ASCII85Decode(char * stream, struct pdfObject * obj);
53 | char * CCITTFaxDecode(char* stream, struct pdfObject * obj);
54 |
55 | // LZWDdecode functions.
56 | struct LZWdico * initDico(int code, char * entry);
57 | struct LZWdico * initDico_(int code, char * entry, int len);
58 | int addInDico(struct LZWdico * dico, int code, char * entry);
59 | void freeDico(struct LZWdico * dico);
60 | char * getEntryInDico(struct LZWdico * dico, int code);
61 | unsigned short readData(char ** data, unsigned int * partial_code, unsigned int * partial_bits, unsigned int code_len);
62 | void printDico(struct LZWdico * dico);
63 |
64 | // CCITTFaxDecode functions.
65 | int getRunLengthCodeInTable(char ** table, char * bits, int table_size);
66 | int getMakeUpCodeInTable(char ** table, char *bits, int table_size);
67 |
68 |
69 | // ASCII85Decode functions
70 | char * getTuple(char * data, int len);
71 |
72 |
73 | // CCITTFaxDecode
74 |
75 | extern char * WHITE_RUN_LENGTH_TERMINATING_CODES[];
76 | extern char * BLACK_RUN_LENGTH_TERMINATING_CODES[];
77 | extern char * WHITE_MAKE_UP_CODES[];
78 | extern char * BLACK_MAKE_UP_CODES[];
79 | extern int WHITE_BLACK_MAKE_UP_CODES_VALUES[];
80 |
81 |
82 | #endif
--------------------------------------------------------------------------------
/tools/perl_poc/lib/analysis/CVEs.pm:
--------------------------------------------------------------------------------
1 | package CVEs;
2 |
3 | use strict;
4 |
5 | my $DEBUG = "no";
6 |
7 | # This function detects
8 | sub CVE_2010_2883_Detection{
9 |
10 | my $ref = shift;
11 | my %pdfObjects = %{$ref};
12 | my $fontfile;
13 | my $status = "none";
14 |
15 | print "\n\n:::CVE_2010_2883_Detection:::\n" unless $DEBUG eq "no";
16 |
17 | # Get font descriptors objects
18 | my @objs = values(%pdfObjects);
19 | foreach(@objs){
20 |
21 | if( exists($_->{"type"}) && $_->{"type"} eq "/FontDescriptor" ){
22 | print "Found FontDescriptor object :: $_->{ref}\n" unless $DEBUG eq "no";
23 |
24 | if(exists($_->{"fontfile"}) && $_->{"fontfile"} =~ /(\d+\s\d\sR)/){
25 | $fontfile = $1 ;
26 | $fontfile =~ s/R/obj/;
27 | print "font File found :: $fontfile\n" unless $DEBUG eq "no";
28 | }else{
29 | next;
30 | }
31 |
32 | # Get the font file stream
33 | if(exists($pdfObjects{$fontfile}) && exists($pdfObjects{$fontfile}->{"stream_d"}) && length($pdfObjects{$fontfile}->{"stream_d"}) > 0 ){
34 |
35 | my $fontstream = $pdfObjects{$fontfile}->{"stream_d"};
36 | #print "font stream = $fontstream\n";
37 |
38 | # Check the length of the decoded stream /!\
39 | #my $realen = length();
40 | print "Lenght1 = ".$pdfObjects{$fontfile}->{"length1"}."\n" unless ($DEBUG eq "no" or ! exists($pdfObjects{$fontfile}->{"length1"})) ;
41 | print "Real length = ".length($fontstream)."\n" unless $DEBUG eq "no";
42 | if(exists($pdfObjects{$fontfile}->{"length1"}) && $pdfObjects{$fontfile}->{"length1"} != length($fontstream)){
43 | print "Warning :: Font File decoded stream Length is Wrong :: ".$pdfObjects{$fontfile}->{"length1"}." :: ".length($fontstream)."\n" unless $DEBUG eq "no";
44 | #$TESTS_CAT_3{"CVE_2010_2883"} = "BAD_FONT_FILE_LENGTH";
45 | $status = "BAD_FONT_FILE_LENGTH";
46 | }
47 |
48 | # Check TrueType required tables
49 | # - cmap - glyf - head - hhea - hmtx - loca - maxp - name - post
50 | # Detect the SING ()Smart INdependent Glyphlets) string
51 | if($fontstream =~ /SING/ ){
52 | print "Warning :: Found SING (Smart INdependent Glyphlets) :: Possible CVE_2010_2883\n" unless $DEBUG eq "yes";
53 | #$TESTS_CAT_3{"CVE_2010_2883"} = "DETECTED";
54 | $status = "DETECTED";
55 |
56 | # TODO combine with previous test (bad_font_file_length) to detect CVE
57 | }
58 |
59 |
60 | }else{
61 | print "Warning :: CVE_2010_2883_Detection :: Font File Object $fontfile is not defined :\n" unless $DEBUG eq "no";
62 | }
63 |
64 | }
65 | }
66 |
67 | return $status;
68 | }
69 |
70 | 1;
71 |
72 | __END__
73 |
74 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ARMADITO PDF ANALYZER
2 | =====================
3 | [](https://travis-ci.org/armadito/armadito-mod-pdf)
4 |
5 |
7 |
8 |
9 | Armadito module PDF is an heuristic module for PDF documents analysis.
10 |
11 | Copyright (C) Teclib', 2015, 2016
12 |
13 | See Online documentation at : http://armadito-av.readthedocs.io/en/latest/
14 |
15 | License : GPLv3
16 |
17 | What is it?
18 | -----------
19 |
20 | Armadito PDF analyzer is a module for PDF documents scanning that includes:
21 |
22 | * a PDF parser
23 |
24 | * an heuristic analyzer that computes the document confidence level
25 |
26 | Licensing
27 | ---------
28 |
29 | Armadito PDF analyzer is licensed under the GPLv3 https://www.gnu.org/licenses/license-list.html#GNUGPLv3
30 |
31 | Dependencies
32 | ------------
33 |
34 | > miniz.c
35 |
36 | FEATURES
37 | --------
38 |
39 | ==> Parsing <==
40 |
41 | * Remove PostScript comments in the content of the document.
42 | * Get PDF version in header (Ex: %PDF-1.7).
43 | * Get trailers and xref table or xref objects.
44 | * Get objects informations described in the document (reference, dictionary, type, stream, filters, etc).
45 | * Extract objects embedded in stream objects.
46 | * Decode object streams encoded with filters : FlateDecode, ASCIIHexDecode, ASCII85Decode, LZWDecode, CCITTFaxDecode
47 |
48 | ==> Analysis <==
49 |
50 | * Tests based on PDF document structure (accodring to PDF specifications):
51 | - Check the PDF header version (from version 1.1 to 1.7).
52 | - Check if the content of the document is encrypted.
53 | - Check that the document contains non-empty pages.
54 | - Check object collision in object declaration.
55 | - Check trailers format.
56 | - Check xref table and xref object.
57 | - Check the presence of malicious Postscript comments (which could cause parsing errors).
58 |
59 |
60 | * Tests based on PDF objects content:
61 | - Get potentially malicious active contents (JavaScripts, Embedded files, Forms, URI, etc.)
62 | - JavaScript content analysis (malicious keywords, pattern repetition, unicode strings, etc).
63 | - Info object content analysis (search potentially malicious strings).
64 | - Check if object dictionary is hexa obfuscated.
65 |
66 |
67 | ==> Notation <==
68 |
69 | * A suspicious coefficient is attributed to each test.
70 | * Calc the suspicious coefficient of the pdf document.
71 |
72 |
73 | LIMITATIONS
74 | -----------
75 |
76 | - Supported PDF versions are: %PDF-1.1 to %PDF-1.7.
77 | - PDF documents with encrypted content are not supported.
78 | - Removing comments is skipped for document > 2MB
79 |
--------------------------------------------------------------------------------
/module/modulePDF.c:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 | #include "modulePDF.h"
23 |
24 | struct modulePDF_data {
25 | const char *tmp_dir;
26 | int late_days;
27 | int critical_days;
28 | };
29 |
30 | static enum a6o_mod_status modulePDF_init(struct a6o_module *module) {
31 |
32 | // This module doesn't need initialization.
33 | a6o_log(A6O_LOG_MODULE, A6O_LOG_LEVEL_INFO, "module PDF initialized successfully!");
34 | return A6O_MOD_OK;
35 | }
36 |
37 | static enum a6o_mod_status modulePDF_close(struct a6o_module *module) {
38 |
39 | // This modules doesn't need close instruction.
40 | return A6O_MOD_OK;
41 | }
42 |
43 | static enum a6o_update_status modulePDF_info(struct a6o_module *module, struct a6o_module_info *info){
44 |
45 | time_t ts = 0;
46 | struct tm timeptr = {0, 30, 8, 8, 5, 116}; // 01/03/2016 9:30
47 |
48 | ts=mktime(&timeptr);
49 | info->mod_update_ts = ts;
50 |
51 | return A6O_UPDATE_OK;
52 | }
53 |
54 |
55 | static enum a6o_file_status modulePDF_scan(struct a6o_module *module, int fd, const char *path, const char *mime_type, char **pmod_report) {
56 |
57 | enum a6o_file_status status = A6O_FILE_CLEAN;
58 | int ret = 0;
59 |
60 |
61 | // launch analysis
62 | ret = analyzePDF_ex(fd,path);
63 |
64 | if (ret == -1) {
65 | status = A6O_FILE_IERROR;
66 | }
67 | else if (ret == -2) {
68 | status = A6O_FILE_UNDECIDED; // Not supported files (encrypted contents or bad header).
69 | }
70 | else if (ret < MALICIOUS_COEF) {
71 | status = A6O_FILE_CLEAN;
72 | }
73 | else if (ret >= MALICIOUS_COEF) {
74 | status = A6O_FILE_SUSPICIOUS;
75 |
76 | *pmod_report = os_strdup("ModulePDF!SuspiciousPDF");
77 | }
78 |
79 | return status;
80 | }
81 |
82 |
83 | static const char *modulePDF_mime_types[] = {
84 | "application/pdf",
85 | NULL,
86 | };
87 |
88 | struct a6o_module module = {
89 | .init_fun = modulePDF_init,
90 | .conf_table = NULL,
91 | .post_init_fun = NULL,
92 | .scan_fun = modulePDF_scan,
93 | .close_fun = modulePDF_close,
94 | .info_fun = modulePDF_info,
95 | .supported_mime_types = modulePDF_mime_types,
96 | .name = "modulePDF",
97 | .size = sizeof(struct modulePDF_data),
98 | };
99 |
--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
1 | :: Change Log ::
2 | All notable changes to the project Armadito-pdf will be documented in this file.
3 | This project adheres to [Semantic versionning](http://semver.org)
4 |
5 |
6 | Mon, 27 May 2016 :: 0.10.1 :: (ufausther)
7 | -----------------------------------------
8 | *** New version 0.10.1 pushed in github (DEV branch).
9 |
10 |
11 | Wed, 1 June 2016 :: 0.11.0 :: (ufausther)
12 | -----------------------------------------
13 |
14 | + add custom log function (cli_log with macro definition.)
15 | + add "filename" field to pdfDocument struct.
16 | + modif function printAnalysisReport(struct pdfDocument * pdf) prototype.
17 | + add errors count in analysis report.
18 | + add functions headers.
19 | + remove duplicated call to checkMagicNumber() function
20 | + rename function getNumber_a(...) => getNumber_s(...)
21 | + improve error handling.
22 | + FlateDecode filter : increase destination buffer size on MZ_BUFF_ERROR error.
23 |
24 | Fri, 03 June 2016 :: 0.11.1 :: (ufausther)
25 | ------------------------------------------
26 |
27 | + FlateDecode filter : increase destination buffer size on MZ_BUFF_ERROR error.
28 |
29 |
30 | Mon, 06 June 2016 :: 0.11.1 :: (ufausther)
31 | ------------------------------------------
32 |
33 | + translate/update README.md
34 |
35 | Tue, 07 June 2016 :: 0.12.0 :: (ufausther)
36 | ------------------------------------------
37 |
38 | + Add header files (armaditopdf.h - pdfStructs.h - pdfParsing.h - pdfAnalysis.h - utils.h ) and corresponding sources files.
39 | + folders reorganisation.
40 | + define version string in header (#define a6o_pdf_ver)
41 | + Add change log file.
42 |
43 | Tue, 07 June 2016 :: 0.12.1 :: (ufausther)
44 | ------------------------------------------
45 |
46 | + bug fix: getDelimitedStringContent() temporary buffer was not updated.
47 |
48 |
49 | Wed, 08 June 2016 :: 0.12.1 :: (ufausther)
50 | ------------------------------------------
51 |
52 | + modif for linux compatibility.
53 |
54 | Thu, 09 June 2016 :: 0.12.2 :: (ufausther)
55 | ------------------------------------------
56 |
57 | + fix some crashes detected with fuzzing
58 |
59 | Tue, 14 June 2016 :: 0.12.3 :: (ufausther)
60 | ------------------------------------------
61 |
62 | + fix crash :: getStreamFilters :: malformed dictionary.
63 | + recursive scan fix :: scan was stopped on error.
64 | + checkXref :: skip white spaces at the end of the line.
65 |
66 | Wed, 15 June 2016 :: 0.12.3 :: (ufausther)
67 | ------------------------------------------
68 |
69 | + fix craches :: extractObjectFromObjStream() :: invalid offset (/First) and number of object (/N) in dictionary
70 |
71 |
72 | Fri, 19 Aug 2016 :: 0.12.4 :: (ufausther)
73 | ------------------------------------------
74 |
75 | + fix warning in function getTrailers :: test file = clam.pdf
76 | + getEmbeddedFile :: looking for FileSpec object is not necessary for embedded file detection.
77 |
78 |
79 | Thu, 9 Feb 2017 :: 0.12.5 :: (ufausther)
80 | ------------------------------------------
81 |
82 | + Fix some memory leaks.
83 | + Fix default log level to warn.
84 | + Minors changes
85 | + pdfParsing: improve code.
86 |
87 |
88 | Mon, 20 Feb 2017 :: 0.12.6 :: (ufausther)
89 | ------------------------------------------
90 |
91 | + Minors changes
92 | + Fix resource leaks
93 | + Get library version with getVersion() function.
94 |
--------------------------------------------------------------------------------
/win32/ArmaditoPDF/ArmaditoPDF/ArmaditoPDF.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 | {de5d87ae-3c70-4a71-877f-f5cef05b237e}
18 |
19 |
20 | {2cf89655-f2a4-4131-a986-3db75eda9ff0}
21 |
22 |
23 |
24 |
25 | Fichiers sources
26 |
27 |
28 | Fichiers sources
29 |
30 |
31 | Fichiers sources
32 |
33 |
34 | Fichiers sources
35 |
36 |
37 | Fichiers sources
38 |
39 |
40 | Fichiers sources
41 |
42 |
43 | Fichiers sources
44 |
45 |
46 | Fichiers sources
47 |
48 |
49 | Fichiers sources
50 |
51 |
52 | Fichiers sources\osdeps
53 |
54 |
55 |
56 |
57 | Fichiers d%27en-tête
58 |
59 |
60 | Fichiers d%27en-tête
61 |
62 |
63 | Fichiers d%27en-tête
64 |
65 |
66 | Fichiers d%27en-tête
67 |
68 |
69 | Fichiers d%27en-tête
70 |
71 |
72 | Fichiers d%27en-tête
73 |
74 |
75 | Fichiers d%27en-tête
76 |
77 |
78 | Fichiers d%27en-tête\osdeps
79 |
80 |
81 |
--------------------------------------------------------------------------------
/configure.ac:
--------------------------------------------------------------------------------
1 |
2 | AC_INIT(armadito-pdf, [0.12.6], ufausther@teclib.com)
3 | AM_INIT_AUTOMAKE([foreign])
4 |
5 | # collect arguments
6 | # debug
7 | AC_ARG_ENABLE([debug],
8 | AS_HELP_STRING([--enable-debug],[enable debugging @<:@default is yes@:>@]),
9 | ,
10 | enable_debug="yes")
11 |
12 | # armadito pdf library
13 | AC_ARG_ENABLE([lib],
14 | AS_HELP_STRING([--enable-lib],[enable armadito-pdf library @<:@default is no@:>@]),
15 | enable_lib="yes",
16 | enable_lib="no")
17 |
18 | # module
19 | AC_ARG_ENABLE([module],
20 | AS_HELP_STRING([--enable-module],[enable armadito-av module @<:@default is no@:>@]),
21 | enable_module="yes",
22 | enable_module="no")
23 |
24 |
25 | # cli tools
26 | AC_ARG_ENABLE([tools],
27 | AS_HELP_STRING([--enable-tools],[enable armadito-pdf cli tools @<:@default is no@:>@]),
28 | [enable_tools="yes"],
29 | [enable_tools="no"])
30 |
31 | PKG_PROG_PKG_CONFIG
32 |
33 | # check for analysis module sources
34 | AC_MSG_CHECKING(for armadito-pdf library sources)
35 | AC_CONFIG_SRCDIR(lib/src/armaditopdf.c)
36 | AC_MSG_RESULT(yes)
37 |
38 | # check for needed programs
39 | AC_PROG_CC([gcc])
40 | AC_PROG_LIBTOOL
41 |
42 | # check for debug arg
43 | AC_MSG_CHECKING(for debug)
44 | AC_MSG_RESULT($enable_debug)
45 | AM_CONDITIONAL([DEBUG], [test "$enable_debug" = "yes"])
46 | if test "$enable_debug" = "yes"; then
47 | AC_DEFINE(DEBUG,1,[Define DEBUG to enable debug messages])
48 | CFLAGS="${CFLAGS} -g"
49 | fi
50 |
51 |
52 | # check for library arg
53 | AC_MSG_CHECKING(for armadito PDF library)
54 | AC_MSG_RESULT($enable_lib)
55 | AM_CONDITIONAL([COND_LIBRARY], [test "$enable_lib" = "yes"])
56 |
57 | # check for module arg
58 | AC_MSG_CHECKING(for armadito module)
59 | AC_MSG_RESULT($enable_module)
60 | AM_CONDITIONAL([COND_MODULE], [test "$enable_module" = "yes"])
61 |
62 | # check for libarmadito if module is enabled.
63 | if test "$enable_module" = "yes"; then
64 | # check for libarmadito
65 | PKG_CHECK_MODULES(LIBARMADITO, libarmadito, [HAVE_LIBARMADITO=yes], [HAVE_LIBARMADITO=no])
66 | # check for libarmadito/armadito.h in source tree
67 | if test "$HAVE_LIBARMADITO" = "no"; then
68 | OLD_CPPFLAGS="$CPPFLAGS"
69 | CPPFLAGS=-I$srcdir/../../armadito-core/libmodule/include
70 | AC_CHECK_HEADER([libarmadito/armadito.h], [], [AC_MSG_ERROR([cannot find Armadito module library])], [])
71 | CPPFLAGS="$OLD_CPPFLAGS"
72 | LIBARMADITO_CFLAGS='-I$(top_srcdir)/../../armadito-core/libmodule/include'
73 | AC_SUBST([LIBARMADITO_CFLAGS])
74 | fi
75 | fi
76 |
77 | # check for tools arg
78 | AC_MSG_CHECKING(for cli tools)
79 | AC_MSG_RESULT($enable_tools)
80 | AM_CONDITIONAL([COND_TOOLS], [test "$enable_tools" = "yes"])
81 |
82 | # check for libarmadito-pdf if module is enabled.
83 | if test "$enable_tools" = "yes"; then
84 |
85 | # check for libarmadito-pdf
86 | PKG_CHECK_MODULES(LIBARMADITO_PDF, libarmadito-pdf, [HAVE_LIBARMADITO_PDF=yes], [HAVE_LIBARMADITO_PDF=no])
87 | if test "$HAVE_LIBARMADITO_PDF" = "no"; then
88 | AC_MSG_ERROR("cannot find ARMADITO-PDF library: skipping armadito-pdf tool build.")
89 | fi
90 | fi
91 |
92 |
93 | # Output files
94 | AC_CONFIG_FILES([
95 | Makefile
96 | ])
97 |
98 | # conditional makefile for library.
99 | AM_COND_IF([COND_LIBRARY],
100 | AC_CONFIG_FILES([
101 | lib/Makefile
102 | ]))
103 |
104 | # conditional makefile for tools.
105 | AM_COND_IF([COND_TOOLS],
106 | AC_CONFIG_FILES([
107 | tools/Makefile
108 | tools/armadito-pdf/Makefile
109 | ]))
110 |
111 | # conditional makefile for module.
112 | AM_COND_IF([COND_MODULE],
113 | AC_CONFIG_FILES([
114 | module/Makefile
115 | ]))
116 |
117 | AC_OUTPUT
118 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: C
2 | sudo: required
3 | dist: trusty
4 | addons:
5 | apt:
6 | packages:
7 | - libtool
8 | - libglib2.0-dev
9 | - libmagic-dev
10 | - libxml2-dev
11 | - libmicrohttpd-dev
12 | - libcurl4-openssl-dev
13 | - libjansson-dev
14 | - libjansson4
15 | - autoconf-archive
16 | coverity_scan:
17 | project:
18 | name: armadito/armadito-mod-pdf
19 | description: Build submitted via Travis CI
20 | notification_email: vhamon@teclib.com
21 | build_command_prepend: "./configure --enable-lib PKG_CONFIG_PATH=/home/travis/build/armadito-core/lib/pkgconfig;
22 | make clean"
23 | build_command: make -j 4
24 | branch_pattern: coverity_scan
25 | sonarqube:
26 | branches :
27 | - DEV
28 | deploy:
29 | skip_cleanup: true
30 | provider: launchpad
31 | slug: "~armadito/armadito-mod-pdf/+git/main"
32 | oauth_token:
33 | secure: d8X9y9hEPB0zgibcvVL2Q2qrDoM3o2/Wh28bZcD/A4TH5pgDK9oYbgtU5ywozK/CScpHEHmRTjZhwq9Mh4xVNni7XIKigpHbAKH0NhR6wKryQwR3VwUgRBKxp/UAnWFSv9RiPT7fG5tOQpHaD+8O8N06vLXZ0p4xV29jwnxPEvYEUt0fKmo4SWI18HQLpoFCuQLchgzvPs1X+iixWms5BmBlDIhnwx79geaMLF6sCc9f4TcvJ8yT9s4VbH/qYhsbCHB8lkRiBY1qQqw4sN67gQhJ9oY5J/AbGMJSdb4nimMhhUfkiME8iQVOx07YOaQJ6pMz5VYWpF7dOLlXdvEgc5caVs/gENmpb270aQHUSILn2A+3NI+LnSW3R3dkrtLU+HX9zVtMvnus/8T1f5KSecLvH+mRU4J6RTl9+MEDYA6sD59Ie2sBPqgljymgq8DJ8yoVqBzPzVCk95n5KTu21xobHgqXn4QfB/vGqQbE3U9XBFloALzsd4kMp+W37JtztVgaTlG+YNslCzWUP+f8pO6wHaRzJeVHC9/h24b5fk87TtvwTtVrxinNFXzj4i42A+6zlsW8BZXJ9I8jgaNhKGiWpHUy/iH9EADo2Y2rtYd09adAelEsIIJ4X+/N2+QfRjnDMGUV+8v3xUbLayQT3nb2G05X8YA19NlDoXZATYU=
34 | oauth_token_secret:
35 | secure: MnRk4gTNMwpSUodIwrEvdPg5mrufFjodZElpWomHg7gNBOfp/rsbzk4MEeu8sqxq1s0lUQUPzw4uiMy7tKy0x3Gn4XAhFMW9WDOtanDQV3qfPiFxlxHpfg2p+Tu5Ol6TnSYRI0l/vC3rN/pEJPOJoWJu6md6LEXX9U5Itub+rcmiMyygz11bMY46jY+vSZhLcT1LmgK6sojyCdIBFOjd+ACpvK+Lu/6Kj/QYhz3txOjMbsEgz+L2giSgs3j9zmpbg4zPFCL2Cq04muXVr3mOW1fmAXWop6DEs0rc9ovkqJwKaNcPWc1Inm5QDaAcR5ckeY/Oy4L/mHe2zv3/d7RpubrdudhFJzYogsHY2r7kGtQYNCfXaj5QHVRxCnvHhvMCiBI4lrqFZEMZii9eXSQJZJk32Cz20RiTvHeJwQsuukizfuHIzCNLwbb4aumR6m3uAL2U3QJE7j2ZfgkSF1boOvCQplCRiBL6kexNLHvTlVGHbt8a+Eptp0KE+7lKXk9o7CONWxLaCVGOe+2zTms7b5/4FYLD4gajoBRUmaTe0KZVlvvKaoQanj1+ay9SA7pIlBMHoTmnoHHZVu8l2lOERXd4VM1bE+zWH5MULy76cSIhuerwJg5rPUbizVpFsAVrk346gUKcGzWkibY2uSvUyVEoef2jiX62uJ9FUmVUgFo=
36 | on:
37 | tags: true
38 | env:
39 | global:
40 | - secure: ZqqOmdvcZ/tUhR87wlAhnDeFgkCywwYlAchKMIIGJPjGpsMEZU/m+kKi49YJEZrC6i7QO/WexCi3TVIFWaIGtWXCyri0rJIIDPdJtiJ3SNvDhwjcB+eNOsNVnv3Hg8OllZV3+GvoKNKNrwYtYraill4C3+8PeDLLpOI6wQmNJWUsLMRHlSXM6ZDS9bxILrNHFobvTdtuM9wkfwh0JfoIWMdYnzmqrRrhT+bEZaDHMHp7GTSiFBL2lvqlutMWMn8fvTLn35M8839WyFPcr/lnFZZZcSKJBqM8GySq1yNr9NNAoUUhhmP0Oope8iks59mkDQRyRbVpkOYQGkJChhGt2/LQE5W0WjpmWS+5cAqYFlqJKs2AFPNI9a94HI/XZjO0RwQqkpbNHc06wbPIYf0h1aPi75xYsC07NhdwvS40NYou82IjnpffdRDrqNcVP4yBASqaQlaDCrr35nPGm993vPCVa7i0NxrqWmtf5KFo/Nk6Lxh3nWFLABLZI8DEIogOCnVF2W+Q9LaSmbUcmsovV2XNdLNYoKETVlZp5F0h7mXjyYsRN8dexPZb8DBYNWCI5PbJS4gf98JjDRxKVuIbWqRg8W/KbMdU78d4aetznVeLkh7SnVyFtR4ImYEod1xFVdulh1F09VAfLBk/J6HTs84uqdW65w1TKHAtZyO/77Q=
41 | - secure: eFWWkuzR9vYPgXPz1f0JyZQFFdipAu5b5fOKXeDyzlb27DG0jQYUeRou6xdtXvH9jKgAtRxwa0IdtV+2cEW+deTr/bxvWlGGLHHnQSgtgvq/BgbDptuTsSVWfRhuNJogNGB5BaMd33lyX5nKrH6Jbz8+qp8DnE2l96kThSbByyamVG/7Q9sXAHEfCUI4UdRSuGijB18ezzzxloDuA3pZPaa6laDXxXSCVRyaY+qREL6qwhWK9LwNdqtSuqcFs5ppIYhz3c0KCMMKb86cZrtJTXolSK6Oe6sjXy+Pj+E1D895jZF2k1YDQ7YLFNhYyFClC85l2NvJBitr5l7FUHhQGsuLfFk3z2yMkJE9UbDOog98EjiRH/VRICReFrEN78D8k8JSziyIXB0FGHoIKEUXV3n9Og++AkFF4qdwxPC/VGGComZBAGC7VMEg67AIuG2vKoCgl7GHvPu/lW0cCZUE0wt+RZOXB6mJv6Fr9qmXWibarv/wA7gItDj1PP8pgxSWn69b4U8aNwoYHNYyBldqK7YRevqwAnaFfohMUmYPJnE80pWLDe2i/4T+Wl8XhuOubG9k3uncXZ6uB4mzVooITYiAzTzesqkU4ix62bm3C0g6h0XawQjzHbGpK0Je2AX7DMlCLyjef+R/4Cj6hP2rsK1IrJbh5McwoSTErydzQU8=
42 | before_install:
43 | - echo -n | openssl s_client -connect scan.coverity.com:443 | sed -ne '/-BEGIN CERTIFICATE-/,/-END
44 | CERTIFICATE-/p' | sudo tee -a /etc/ssl/certs/ca-
45 | - wget https://sonarqube.com/static/cpp/build-wrapper-linux-x86.zip
46 | - unzip build-wrapper-linux-x86.zip
47 | - export PATH=$PWD/build-wrapper-linux-x86:$PATH
48 | compiler:
49 | - gcc
50 | before_script:
51 | - git clone git://github.com/armadito/armadito-av -b DEV
52 | - cd armadito-av/
53 | - "./autogen.sh"
54 | - mkdir -p /home/travis/build/armadito-core
55 | - "./configure --prefix=/home/travis/build/armadito-core"
56 | - make
57 | - make install
58 | - cd /home/travis/build/armadito/armadito-mod-pdf
59 | - "./autogen.sh"
60 | script:
61 | - "./configure --enable-lib PKG_CONFIG_PATH=/home/travis/build/armadito-core/lib/pkgconfig"
62 | - build-wrapper-linux-x86-64 --out-dir build-wrapper-out make clean all
63 | - sonar-scanner -X -Dsonar.host.url=https://sonarqube.com -Dsonar.login=$SONAR_TOKEN
64 |
--------------------------------------------------------------------------------
/lib/src/osdeps.c:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 |
23 |
24 | #include "osdeps.h"
25 | #include
26 | #include "log.h"
27 |
28 |
29 | #ifdef _WIN32
30 |
31 | FILE * os_fopen(const char * filename, const char * mode) {
32 |
33 | FILE * f = NULL;
34 |
35 | fopen_s(&f, filename,mode);
36 |
37 | return f;
38 |
39 |
40 | }
41 |
42 |
43 | int os_scan_dir(char * path, int recurse, dirent_scan_cb dirent_cb, void * data){
44 |
45 | char * rpath = NULL, *entryPath = NULL;
46 | char * escapedPath = NULL;
47 | int ret = 0;
48 | int size = 0;
49 | HANDLE fh = NULL;
50 | WIN32_FIND_DATAA fdata;
51 | WIN32_FIND_DATAA tmp;
52 | int fd = -1;
53 |
54 | if (path == NULL || dirent_cb == NULL){
55 | err_log("scan_dir :: invalid parameter\n");
56 | return -1;
57 | }
58 |
59 | dbg_log("scan_dir :: path = %s\n", path);
60 |
61 | // Check if it is a directory // TODO :: os_scan_dir :: scan a file.
62 | if (!(GetFileAttributesA(path) & FILE_ATTRIBUTE_DIRECTORY)) {
63 | err_log("scan_dir :: (%s) is not a directory\n", path);
64 | return -2;
65 | }
66 |
67 | size = strlen(path) + 3;
68 | rpath = (char*)calloc(size + 1, sizeof(char));
69 | rpath[size] = '\0';
70 | sprintf_s(rpath, size, "%s\\*", path);
71 |
72 | dbg_log("scan_dir :: rpath = %s\n",rpath);
73 |
74 | /*
75 | FindFirstFile note
76 | Be aware that some other thread or process could create or delete a file with this name between the time you query for the result and the time you act on the information. If this is a potential concern for your application, one possible solution is to use the CreateFile function with CREATE_NEW (which fails if the file exists) or OPEN_EXISTING (which fails if the file does not exist).
77 | */
78 | fh = FindFirstFile(rpath, &fdata);
79 | if (fh == INVALID_HANDLE_VALUE) {
80 | warn_log("scan_dir :: FindFirstFileA call failed :: err= [%d]\n", GetLastError());
81 | goto clean;
82 | }
83 |
84 | while (fh != INVALID_HANDLE_VALUE && FindNextFile(fh, &tmp) != FALSE) {
85 |
86 | // exclude paths "." and ".."
87 | if (!strcmp(tmp.cFileName, ".") || !strcmp(tmp.cFileName, ".."))
88 | continue;
89 |
90 | // build the entry complete path.
91 | size = strlen(path) + strlen(tmp.cFileName) + 2;
92 |
93 | entryPath = (char*)calloc(size + 1, sizeof(char));
94 | entryPath[size] = '\0';
95 | sprintf_s(entryPath, size, "%s\\%s", path, tmp.cFileName);
96 | dbg_log("scan_dir :: cfilename = %s\n", &tmp.cFileName);
97 | dbg_log("scan_dir :: entryPath = %s\n", entryPath);
98 |
99 |
100 | // If it is a directory and we do recursive scan
101 | if ((GetFileAttributesA(entryPath) & FILE_ATTRIBUTE_DIRECTORY) && recurse >= 1) {
102 |
103 | ret = os_scan_dir(entryPath, recurse, dirent_cb, data);
104 | if (ret != 0){
105 | free(entryPath);
106 | break;
107 | }
108 | }
109 | else {
110 |
111 | (*dirent_cb)(fd,entryPath);
112 | }
113 |
114 | free(entryPath);
115 | entryPath = NULL;
116 | }
117 |
118 |
119 | // TODO :: os_scan_dir :: get stats.
120 |
121 | clean:
122 | if (rpath != NULL){
123 | free(rpath);
124 | rpath = NULL;
125 | }
126 | FindClose(fh);
127 |
128 | return ret;
129 |
130 | }
131 |
132 |
133 |
134 | #else
135 |
136 | // Linux part for compatibility.
137 | int os_strncat(char *strDest, size_t numberOfElements, const char *strSource, size_t count) {
138 |
139 |
140 | if( strncat(strDest, strSource, count) == NULL){
141 | return -1;
142 | }
143 |
144 |
145 | return 0;
146 |
147 | }
148 |
149 | int os_strncpy(char *strDest, size_t numberOfElements, const char *strSource, size_t count) {
150 |
151 |
152 | if( strncpy(strDest , strSource, count) == NULL){
153 | return -1;
154 | }
155 |
156 | return 0;
157 | }
158 |
159 |
160 | int os_scan_dir(char * path, int recurse, dirent_scan_cb dirent_cb, void * data){
161 |
162 | int ret = 0;
163 |
164 | return ret;
165 | }
166 |
167 |
168 | #endif
--------------------------------------------------------------------------------
/tools/cli_analyzer/main.c:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 |
23 |
24 | #include "armaditopdf.h"
25 | #include "osdeps.h"
26 | #include "log.h"
27 | #include "getopt.h"
28 |
29 |
30 | struct scan_options {
31 | char *path_to_scan;
32 | enum log_level log_level;
33 | };
34 |
35 |
36 | static struct option cli_option_def[] = {
37 | {"help", no_argument, 0, 'h'},
38 | {"version", no_argument, 0, 'v'},
39 | {"log-level", required_argument, 0, 'l'},
40 | {0, 0, 0, 0}
41 | };
42 |
43 |
44 | void Usage(){
45 |
46 | fprintf(stderr, "usage: armadito-pdf [options] FILE|DIR\n");
47 | fprintf(stderr, "\n");
48 | fprintf(stderr, "Armadito PDF scanner\n");
49 | fprintf(stderr, "\n");
50 | fprintf(stderr, "Options:\n");
51 | fprintf(stderr, " --help -h print help and quit\n");
52 | fprintf(stderr, " --version -V print program version\n");
53 | fprintf(stderr, " --log-level=LEVEL | -l LEVEL set log level [debug=X; warning=Y; error=Z]\n");
54 | fprintf(stderr, "\n");
55 |
56 | exit(-1);
57 |
58 | }
59 |
60 | void Version(){
61 |
62 | printf("armadito-pdf v%s (c) 2015 - 2017 by Teclib\n",a6o_pdf_ver);
63 | exit(1);
64 | }
65 |
66 |
67 | int parse_options(int argc, char ** argv, struct scan_options * opts){
68 |
69 | while(1){
70 |
71 | int c, option_index = 0;
72 |
73 | c = getopt_long (argc, argv, "hvil:", cli_option_def, &option_index);
74 |
75 | /* Detect the end of the options. */
76 | if (c == -1){
77 | break;
78 | }
79 |
80 | switch(c){
81 |
82 | case 'h':
83 | Usage();
84 | break;
85 |
86 | case 'v':
87 | Version();
88 | break;
89 |
90 | case 'l':
91 |
92 | if(!strcmp("error",optarg))
93 | opts->log_level = LOG_LEVEL_ERROR;
94 | else if(!strcmp("warn",optarg))
95 | opts->log_level = LOG_LEVEL_WARNING;
96 | else if(!strcmp("info",optarg))
97 | opts->log_level = LOG_LEVEL_INFO;
98 | else if(!strcmp("debug",optarg))
99 | opts->log_level = LOG_LEVEL_DEBUG;
100 | else if(!strcmp("none",optarg))
101 | opts->log_level = LOG_LEVEL_NONE;
102 | else{
103 | fprintf(stderr, "Option Error: Bad log level value\n");
104 | Usage();
105 | abort();
106 | }
107 | break;
108 |
109 | default:
110 | abort();
111 | break;
112 | }
113 |
114 | }
115 |
116 | if (optind < argc){
117 |
118 | opts->path_to_scan = strdup(argv[optind]);
119 |
120 | }else{
121 | fprintf(stderr, "Argument Error: Missing file or directory path\n");
122 | Usage();
123 | }
124 |
125 | return 0;
126 | }
127 |
128 |
129 | // Launch a scan directory
130 | int do_scan(struct scan_options * opts){
131 |
132 | int ret;
133 | FILE * f = NULL;
134 | int fd = -1;
135 |
136 | // analysis with opened file descriptor.
137 | if(!(f = os_fopen(opts->path_to_scan,"rb"))){
138 | err_log("Can't open file %s\n", opts->path_to_scan);
139 | return -1;
140 | }
141 |
142 | fd = os_fileno(f);
143 | ret = analyzePDF_ex(fd, opts->path_to_scan);
144 | fclose(f);
145 |
146 | return ret;
147 | }
148 |
149 |
150 | // launch a task according to options and parameters.
151 | int process_opts(struct scan_options * opts){
152 |
153 |
154 | if(opts == NULL || opts->path_to_scan == NULL){
155 | return -1;
156 | }
157 |
158 | // Set log level
159 | if(opts->log_level > 0)
160 | set_current_log_level(opts->log_level);
161 |
162 | return do_scan(opts);
163 |
164 | }
165 |
166 |
167 | int main (int argc, char ** argv){
168 |
169 | int ret = 0;
170 | struct scan_options * opts = NULL;
171 |
172 | if( !(opts = (struct scan_options*)calloc(1,sizeof(struct scan_options)))){
173 | err_log("Memory allocation failed!\n");
174 | return -1;
175 | }
176 |
177 | opts->log_level = -1;
178 | opts->path_to_scan = NULL;
179 |
180 | parse_options(argc,argv,opts);
181 |
182 | ret = process_opts(opts);
183 |
184 | if(opts->path_to_scan != NULL){
185 | free(opts->path_to_scan);
186 | opts->path_to_scan = NULL;
187 | }
188 |
189 | free(opts);
190 | opts = NULL;
191 |
192 | return ret;
193 |
194 | }
--------------------------------------------------------------------------------
/tools/cli_parser/parser.c:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 |
23 |
24 | #include "armaditopdf.h"
25 |
26 |
27 |
28 | void Banner(){
29 |
30 | printf("----------------------------\n");
31 | printf("-- ARMADITO PDF ANALYZER --\n");
32 | printf("----------------------------\n\n");
33 |
34 | return;
35 | }
36 |
37 |
38 | void Helper(){
39 |
40 | printf("ARMADITO PDF ANALYZER :: No file in parameter\n");
41 | printf("Command : ./pdfAnalyzer [filename]\n\n");
42 |
43 | return;
44 | }
45 |
46 | void Commands(){
47 |
48 | Banner();
49 |
50 | printf("Commands list:\n");
51 | printf("- avscan :: launch a complete analysis and display report\n");
52 | printf("- decode [obj_ref] :: decode object stream\n");
53 | printf("- dump [obj_ref] :: dump object stream\n");
54 | printf("- exit :: exit the parser.\n");
55 | printf("- object [obj_ref] :: display object infos\n");
56 | printf("- quit :: exit the parser.\n");
57 | printf("\n");
58 | printf("objects actions:\n");
59 | printf("- decode [obj_ref] :: decode object stream\n");
60 | printf("- object [obj_ref] :: display object infos\n");
61 | printf("\n");
62 |
63 | return;
64 | }
65 |
66 |
67 |
68 |
69 | int main (int argc, char ** argv){
70 |
71 | int ret;
72 | FILE * f = NULL;
73 | int fd = -1;
74 | struct pdfDocument * pdf = NULL;
75 | struct pdfObject * obj = NULL;
76 | char cmd[512] = {0};
77 | char params[512] = {0};
78 |
79 |
80 | #ifdef DEBUG
81 | Banner();
82 | #endif
83 |
84 | if(argc < 2){
85 | Helper();
86 | return (-1);
87 | }
88 |
89 | //printf ("Analyzing file : %s\n",argv[1]);
90 | if(!(f = os_fopen(argv[1],"rb"))){
91 | printf("[-] Error :: main :: Error while opening file %s\n",argv[1]);
92 | return -1;
93 | }
94 |
95 |
96 | // Initialize the pdfDocument struct
97 | if(!(pdf = initPDFDocument())){
98 | printf("[-] Error :: analyzePDF :: Error while allocating memory for pdfDocument structure\n");
99 | fclose(f);
100 | return -1;
101 | }
102 | pdf->fh = f;
103 |
104 | if ( parsePDF(pdf)< 0){
105 | printf("[-] Error :: parsing error\n");
106 | return -2;
107 | }
108 |
109 |
110 |
111 | while(1){
112 |
113 | printf("enter a command:\nUHPDF>");
114 | scanf("%s",&cmd);
115 | //scanf("%[^\t\r\n]",&cmd);
116 |
117 | if(strncmp(cmd,"quit",4) == 0 || strncmp(cmd,"exit",4) == 0 ){
118 |
119 | break;
120 |
121 | }else if(strncmp(cmd,"help",4) == 0){
122 |
123 | Commands();
124 |
125 | }else if(strncmp(cmd,"avscan",6) == 0){
126 |
127 | printf("[TODO] :: av scan\n");
128 |
129 | }else if(strncmp(cmd,"info",4) == 0){
130 |
131 | printf("[TODO] :: display infos.\n");
132 |
133 | }else if(strncmp(cmd,"obj",3) == 0){
134 |
135 | //printf("[TODO] :: display object. %s \n",params);
136 | printf("Enter an object reference: UHPDF>");
137 | scanf("%10s",params);
138 | //printf("params = %s\n",params );
139 | sprintf(ref, "%s 0 obj",params );
140 | printf("object = %s\n",ref );
141 |
142 | //printf("Decoding object :: %s\n","83 0 obj");
143 | obj = getPDFObjectByRef(pdf,ref);
144 |
145 | if(obj == NULL){
146 | printf("[-] Error :: Object [%s] not found!\n",ref);
147 | continue;
148 | }
149 |
150 | printf("Display object :: %s\n","83 0 obj");
151 |
152 | printObject(obj);
153 |
154 |
155 |
156 | }else if(strncmp(cmd,"decode",6) == 0){
157 |
158 | printf("[TODO] :: display object.\n");
159 |
160 | printf("Enter object reference: UHPDF>");
161 | scanf("%s",params);
162 | //printf("params = %s\n",params );
163 | sprintf(ref, "%s 0 obj",params );
164 | printf("object = %s\n",ref );
165 |
166 | obj = getPDFObjectByRef(pdf,ref);
167 |
168 | if(obj == NULL){
169 | printf("[-] Error :: Object [%s] not found!\n",ref);
170 | continue;
171 | }
172 | decodeObjectStream(obj);
173 |
174 |
175 | }
176 | else{
177 | printf("Command [%s] not found. See Help (command: help)!\n",cmd);
178 | }
179 |
180 |
181 |
182 | }
183 |
184 | //fclose(f);
185 | freePDFDocumentStruct(pdf);
186 |
187 | //system("pause");
188 |
189 | return ret;
190 | }
--------------------------------------------------------------------------------
/tools/armadito-pdf/main.c:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015 - 2017 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 |
23 | #include "libarmadito-pdf/armaditopdf.h"
24 | #include "libarmadito-pdf/osdeps.h"
25 | #include "libarmadito-pdf/log.h"
26 | #include "getopt.h"
27 |
28 | #define TOOL_VERSION "0.13.2"
29 |
30 |
31 | struct scan_options {
32 | char *path_to_scan;
33 | enum log_level log_level;
34 | };
35 |
36 |
37 | static struct option cli_option_def[] = {
38 | {"help", no_argument, 0, 'h'},
39 | {"version", no_argument, 0, 'v'},
40 | {"log-level", required_argument, 0, 'l'},
41 | {0, 0, 0, 0}
42 | };
43 |
44 |
45 | void Usage(){
46 |
47 | fprintf(stderr, "usage: armadito-pdf [options] FILE\n");
48 | fprintf(stderr, "\n");
49 | fprintf(stderr, "Armadito PDF scanner\n");
50 | fprintf(stderr, "\n");
51 | fprintf(stderr, "Options:\n");
52 | fprintf(stderr, " --help -h print help and quit\n");
53 | fprintf(stderr, " --version -V print program version\n");
54 | fprintf(stderr, " --log-level=LEVEL | -l LEVEL set log level [error, warn, info, debug, none]\n");
55 | fprintf(stderr, "\n");
56 |
57 | exit(-1);
58 |
59 | }
60 |
61 | void Version(){
62 |
63 | printf("armadito-pdf v%s (using libarmadito-pdf v%s) \nCopyright (C) 2015 - 2017 by Teclib.\n",TOOL_VERSION,getVersion());
64 | exit(1);
65 | }
66 |
67 |
68 | int parse_options(int argc, char ** argv, struct scan_options * opts){
69 |
70 | while(1){
71 |
72 | int c, option_index = 0;
73 |
74 | c = getopt_long (argc, argv, "hvil:", cli_option_def, &option_index);
75 |
76 | /* Detect the end of the options. */
77 | if (c == -1){
78 | break;
79 | }
80 |
81 | switch(c){
82 |
83 | case 'h':
84 | Usage();
85 | break;
86 |
87 | case 'v':
88 | Version();
89 | break;
90 |
91 | case 'l':
92 |
93 | if(!strcmp("error",optarg))
94 | opts->log_level = LOG_LEVEL_ERROR;
95 | else if(!strcmp("warn",optarg))
96 | opts->log_level = LOG_LEVEL_WARNING;
97 | else if(!strcmp("info",optarg))
98 | opts->log_level = LOG_LEVEL_INFO;
99 | else if(!strcmp("debug",optarg))
100 | opts->log_level = LOG_LEVEL_DEBUG;
101 | else if(!strcmp("none",optarg))
102 | opts->log_level = LOG_LEVEL_NONE;
103 | else{
104 | fprintf(stderr, "Option Error: Bad log level value\n");
105 | Usage();
106 | abort();
107 | }
108 | break;
109 |
110 | default:
111 | abort();
112 | break;
113 | }
114 |
115 | }
116 |
117 | if (optind < argc){
118 |
119 | opts->path_to_scan = strdup(argv[optind]);
120 |
121 | }else{
122 | fprintf(stderr, "Argument Error: Missing file or directory path\n");
123 | Usage();
124 | }
125 |
126 | return 0;
127 | }
128 |
129 |
130 | // Launch a scan directory
131 | int do_scan(struct scan_options * opts){
132 |
133 | int ret;
134 | FILE * f = NULL;
135 | int fd = -1;
136 |
137 | // analysis with opened file descriptor.
138 | if(!(f = os_fopen(opts->path_to_scan,"rb"))){
139 | err_log("Can't open file %s\n", opts->path_to_scan);
140 | return -1;
141 | }
142 |
143 | fd = os_fileno(f);
144 | ret = analyzePDF_ex(fd, opts->path_to_scan);
145 | fclose(f);
146 |
147 | return ret;
148 | }
149 |
150 |
151 | // launch a task according to options and parameters.
152 | int process_opts(struct scan_options * opts){
153 |
154 |
155 | if(opts == NULL || opts->path_to_scan == NULL){
156 | return -1;
157 | }
158 |
159 | // Set log level
160 | if(opts->log_level > 0)
161 | set_current_log_level(opts->log_level);
162 |
163 | return do_scan(opts);
164 |
165 | }
166 |
167 |
168 | int main (int argc, char ** argv){
169 |
170 | int ret = 0;
171 | struct scan_options * opts = NULL;
172 |
173 | if( !(opts = (struct scan_options*)calloc(1,sizeof(struct scan_options)))){
174 | err_log("Memory allocation failed!\n");
175 | return -1;
176 | }
177 |
178 | opts->log_level = -1;
179 | opts->path_to_scan = NULL;
180 |
181 | parse_options(argc,argv,opts);
182 |
183 | ret = process_opts(opts);
184 |
185 | if(opts->path_to_scan != NULL){
186 | free(opts->path_to_scan);
187 | opts->path_to_scan = NULL;
188 | }
189 |
190 | free(opts);
191 | opts = NULL;
192 |
193 | return ret;
194 |
195 | }
--------------------------------------------------------------------------------
/lib/includes/pdfStructs.h:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 |
23 |
24 | #ifndef _pdf_Structs_h_
25 | #define _pdf_Structs_h_
26 |
27 |
28 | #include
29 | #include
30 | #include
31 |
32 |
33 | // PDF object structure
34 | struct pdfObject{
35 |
36 | char * reference; // reference of the object Ex : 12 0 obj
37 | char * content; // The content of the object obj...endobj
38 | char * dico; // The dictionary (if any)
39 | char * type; // The type of the object (if any)
40 | char * stream; // The content stream. stream...endstream
41 | char * filters;
42 | char * decoded_stream;
43 | int offset; // offset (in byte) in the file
44 | int stream_size; // Size in byte of the object's stream
45 | int tmp_stream_size; // temp size of the stream (between two decoding process)
46 | int decoded_stream_size; // Size in byte of the object's decoded stream
47 | int content_size; // size in byte of the object's content
48 | int errors; // errors in parsing
49 |
50 | struct pdfObject* next; // next object in the list.
51 |
52 |
53 | };
54 |
55 |
56 | // PDF Trailer structure
57 | struct pdfTrailer{
58 |
59 | int offset; // offset in the document
60 | char * content; // content of the trailer
61 | char * dico;
62 | struct pdfTrailer* next; // next trailer in the document
63 |
64 | };
65 |
66 |
67 | // PDF Cross-reference table structure
68 | struct pdfXRef{
69 |
70 | int offset; // offset in the document
71 | char * content; // content of the XRef
72 | struct pdfXRef* next; // next trailer in the document
73 |
74 | };
75 |
76 |
77 | // Suit of tests according to the PDF structure specifications.
78 | struct testsPDFStruct{
79 |
80 | int bad_header; // when the PDF header is incorrect
81 | int encrypted; // when the document is encrypted
82 | int empty_page_content; // when all pages are empty of content
83 | int object_collision; // when two objects have the same reference in the document.
84 | int bad_trailer; // when the trailer is in an incorrect form
85 | int bad_xref_offset; // when the offset of the xref table is incorrect;
86 | int bad_obj_offset; // When at least an object's offset in the reference table is incorrect
87 | int obfuscated_object; // when an object dictionary is obfuscated within hexa
88 | int multiple_headers; // when several headers are found in the document.
89 | int large_file;
90 | int comments; // If PostScript comments are found in pdf.
91 | int malicious_comments; // Malicious comments found (potentially defeat pdf parsers).
92 |
93 | };
94 |
95 |
96 | // Suit of tests for PDF objects content
97 | struct testsPDFObjAnalysis{
98 |
99 | int active_content; // presence of js, embedded files, or forms.
100 | int shellcode; // presence of shellcode in an object stream content
101 | int pattern_high_repetition; // high scale repetition of a pattern in a stream content
102 | int dangerous_keyword_high; // potentially dangerous keyword (high level)
103 | int dangerous_keyword_medium; // potentially dangerous keyword (medium level)
104 | int dangerous_keyword_low; // potentially dangerous keyword (lowlevel)
105 | int time_exceeded; // when the analysis of an object stream exceed a given duration.
106 |
107 | int js; // number of js content
108 | int xfa; // number of xfa objects
109 | int ef; // number of ef objects
110 |
111 |
112 | };
113 |
114 |
115 | // PDF Document structure
116 | struct pdfDocument{
117 |
118 | FILE * fh; // File handle of the document
119 | int fd;
120 | char * fname;
121 | char * content;
122 | struct pdfObject * objects; // List of objects
123 | int coef; // Suspicious coefficient
124 | int size; // size in bytes of the PDF
125 | char * version; // PDF specification version
126 | struct pdfTrailer* trailers;
127 | struct pdfXRef* xref;
128 | struct testsPDFStruct * testStruct;
129 | struct testsPDFObjAnalysis * testObjAnalysis;
130 | double scan_time; // time elapsed in second for parse or scan.
131 | int errors; // treatment errors
132 |
133 | };
134 |
135 |
136 |
137 | /* pdf structures functions prototypes */
138 |
139 | struct pdfDocument* initPDFDocument();
140 | struct pdfObject* initPDFObject();
141 | struct pdfTrailer* initPDFTrailer();
142 | struct testsPDFStruct * initTestsPDFStruct();
143 | struct testsPDFObjAnalysis * initTestsPDFObjAnalysisStruct();
144 |
145 | void freePDFDocumentStruct(struct pdfDocument * pdf);
146 | void freePDFObjectStruct(struct pdfObject * obj);
147 | void freePDFTrailerStruct(struct pdfTrailer * trailer);
148 |
149 | int addObjectInList(struct pdfObject* obj, struct pdfDocument* pdf);
150 | int addTrailerInList(struct pdfDocument * pdf, struct pdfTrailer * trailer);
151 |
152 |
153 | #endif
154 |
--------------------------------------------------------------------------------
/win32/ArmaditoPDF/ArmaditoPDF/ArmaditoPDF.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | Win32
7 |
8 |
9 | Release
10 | Win32
11 |
12 |
13 |
14 | {667A295C-61CD-47A7-AAFC-5B7F6088CDB5}
15 | Win32Proj
16 | ArmaditoPDF
17 |
18 |
19 |
20 | Application
21 | true
22 | v120
23 | MultiByte
24 |
25 |
26 | Application
27 | false
28 | v120
29 | true
30 | MultiByte
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 | true
44 |
45 |
46 | false
47 |
48 |
49 |
50 |
51 |
52 | Level3
53 | Disabled
54 | WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)
55 | true
56 | $(SolutionDir)..\..\lib\includes;%(AdditionalIncludeDirectories)
57 |
58 |
59 | Console
60 | true
61 |
62 |
63 |
64 |
65 | Level3
66 |
67 |
68 | MaxSpeed
69 | true
70 | true
71 | WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)
72 | true
73 | $(SolutionDir)..\..\includes;%(AdditionalIncludeDirectories)
74 |
75 |
76 | Console
77 | true
78 | true
79 | true
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
--------------------------------------------------------------------------------
/lib/Spec.txt:
--------------------------------------------------------------------------------
1 | ::::::::::::::::::: PDF ANALYZER SPECIFICATIONS :::::::::::::::::::::::
2 |
3 | Author: Ulrich FAUSTHER
4 | Modification date: 21/01/2015
5 | -----------------------------------------------------------------------
6 |
7 | Plan:
8 |
9 | I- Description
10 |
11 | II- Parsing du PDF
12 | II-a PDF Header
13 | II-b Objets
14 | II-c Trailer
15 | II-d Cross-reference table
16 |
17 |
18 | III- Analyse Anti-malware
19 |
20 | III-a Format PDF
21 | III-b Analyse des objets
22 | III-c CVEs detection
23 | III-d Coefficient de suspicion
24 | III-e Cas non pris en compte
25 |
26 | IV- Réécriture de PDF
27 |
28 | IV-a Principes de réécriture de PDF
29 | IV-b Cas non pris en compte
30 |
31 | -----------------------------------------------------------------------
32 |
33 | I- Description
34 |
35 |
36 | II- Parsing du PDF
37 |
38 | Cette étape a pour but d'extraire tous les informations nécessaires à l'analyse antivirale du PDF.
39 |
40 | ------- II-a PDF Header
41 |
42 |
43 | Vérification de l'entête du document à analyser.
44 | L'entête du document doit etre de la forme: %PDF-x.y (où x-y représente la version du PDF) Exemple : %PDF-1.7 (version 1.7 des spécifications).
45 |
46 | /!\NB: L'analyseur prend egalement en compte le format XDP (XML Data Package). Qui représente la forme XML d'un PDF. Dans ce format, le contenu du PDF est encodé dans le XML en base64.
47 | L'anlyseur va alors extraire le PDF packagé dans le XML, puis vérifier le header. A noter que la suite de l'analyse se fera sur le PDF extrait.
48 |
49 | Dans le cas où l'entête serait incorrect, l'analyse s'arrete en produisant un rapport d'erreur.
50 |
51 | ------- II-b Objets
52 |
53 |
54 | Récupération de tous les objets qui constituent le document PDF et parsing des attributs.
55 | Les attributs de l'objet récupérés sont rangés dans une structure (table de hashage).
56 | Les principaux attributs d'un objects qui sont récupérés sont:
57 | * La réference (Ex: 16 0 obj);
58 | * L'offset en octet du début de l'objet dans le document;
59 | * Le contenu integrale de l'objet (11 0 obj......endobj);
60 |
61 | Si présent:
62 | * Le dictionaire de l'objects (<<...>>);
63 | * Le type de l'objet.
64 | * Puis d'autres attributs specifiques au type d'objet. etc...
65 |
66 | /!\NB: A partir de la version 1.5 de PDF : Récupération des objets intégrés dans des objets de type /ObjStm (object stream).
67 | Dans ce cas, des attributs sont rajoutés à la structure de l'objet à savoir ( objstm = l'objet stream dans lequel il est integré; objStmOff = l'offset dans l'object stream );
68 |
69 |
70 | Decodage des streams presentes dans les objects; Les filtres implémentés sont:
71 | * FlateDecode
72 | * ASCIIHexDecode
73 | * ASCII85Decode
74 | * LZWDecode
75 | * CCITTFaxDecode
76 | * DCTDecode
77 |
78 |
79 |
80 |
81 | ------- II-c Trailer
82 |
83 |
84 | Récupération des trailers du PDF. (TODO:: Récupérer seulement le "dernier" Trailer mis à jour)
85 | Conformément aux specifications PDF (jusqu'a la version 1.4 incluse), le trailer du document est défini sous la forme:
86 | trailer
87 | <>
88 | startxref
89 | xref_table_offset
90 | %%EOF
91 | A partir de la version 1.5 de PDF, le trailer peut etre de la forme:
92 | startxref
93 | xref_stream_offset
94 | %%EOF
95 |
96 | Dans le cas où la première forme du trailer n'aurait pas été retrouvée, essayer de l'obtenir sous la deuxième forme.
97 |
98 | /!\NB: Pour les documents PDF ayant été mis à jour, on peut retrouver plusieurs trailers.
99 |
100 |
101 | ------- II-d Cross-reference table
102 |
103 |
104 | Récupération de la table de référence des objects.
105 | La récupération de la table de référence des objects se fera lors de la vérification de sa confirmité (voir partie III-a).
106 |
107 | Conformément aux spécifications PDF (jusqu'à la version 1.4 incluse); la table de reference des objets est de la forme:
108 | xref
109 | 0 3
110 | 0000000000 65535 f
111 | 0000000009 00000 n
112 | 0000000098 00000 n
113 |
114 | A partir de la version 1.5, la table de référence peut etre représentée via des objets de type (/XRef).
115 | /!\NB: Pour les documents PDF ayant été mis à jour, on peut retrouver plusieurs trailers.
116 |
117 |
118 |
119 | III- Analyse Anti-malware
120 |
121 | Objectif: Déterminer le niveau de suspicion (ou de dangerosité) du document analysé.
122 | Pour se faire, plusieurs tests divisés en 3 catégories seront effectuées sur le document.
123 | * Tests portant sur la structure du document.
124 | * Tests analysant le contenu des objets.
125 | * Tests d'identification de exploitation de vulnérabilités (CVE).
126 |
127 |
128 | ------- III-a Format PDF
129 |
130 |
131 | L'objectif de cette suite de tests est de déterminer si le document respecte bien les spécifications sur la structure du PDF.
132 |
133 | * Vérification du trailer
134 | Vérifier qu'un trailer a bien été trouvé dans le document.
135 | Si oui, vérifier les paramètres du dictionaire du trailer (Ex: /root 1 0 R :: vérifier que l'objet '1 0 obj' référencé est bien de type /Catalog).
136 |
137 | * Vérification de la table de reference des objets (Xref table).
138 | Vérifier que l'offset de la table de reference (ou de l'objet de type XRef) défini dans le trailer.
139 | Si ok, vérifier les entrées de la table de réference (par rapport aux offsets des objets).
140 |
141 | * Vérification du contenu du document.
142 | Vérifier que le document ne contienne pas uniquement que des pages vides.
143 | /!\NB: Un document PDF dont toutes les pages sont vides est considéré comme suspect.
144 |
145 | * Collisions d'objets
146 | Vérifier qu'un object ne soit pas défini plusieurs fois dans le PDF (avec une table de reference incorrecte).
147 |
148 |
149 | * Detection de PDF dont le contenu est chiffré.
150 | /!\NB: Dans ce cas, ne pas continuer le traitement du document.
151 |
152 |
153 |
154 | ------- III-b Analyse des objets
155 |
156 | L'objectif de cette suite de tests est de detecter des éléments potentiellement dangereux dans le contenu des objects du PDF.
157 |
158 | * Recherche d'objets potentiellement dangereux.
159 | -> javascript,
160 | -> embedded file,
161 | -> formulaire XFA,
162 | -> action de type /Launch,
163 | -> URI
164 | -> etc.
165 |
166 | * Recherche de pattern potentiellement dangereux dans les objects.
167 | -> shellcode,
168 | -> pattern à forte répetition,
169 | -> unicode strings,
170 | -> mots clés potentiellement dangereux (heapSpray, payload, shellcode, etc.),
171 | -> javascript fonctions (StringfromChar, byteToChar, eval, unescape, split, etc.),
172 | -> path traversal URI.
173 |
174 |
175 | ------- III-c CVEs exploit detection
176 |
177 |
178 | L'objectif de ces tests est de determiner une possible exploitation d'une vulnérabilité CVE.
179 |
180 | Les vulnérabilités implémentées sont:
181 | * CVE-2010-2883
182 |
183 |
184 | ------- III-d Coefficient de suspicion
185 |
186 | Afin de définir un coefficient de suspicion du document analysé, un coefficient va etre attribué au resultat de chaque test:
187 | * $EMPTY_PAGES_WITH_ACTIVE_CONTENT = 99;
188 | * $EMPTY_PAGES_CONTENT = 70;
189 | * $OBJECT_COLLISION_PLUS_BAD_XREF = 90;
190 | * $OBJECT_COLLISION = 10;
191 | * $BAD_XREF_OFFSET = 30;
192 | * $TRAILER_NOT_FOUND = 30;
193 | * $BAD_TRAILER = 40;
194 | * $OBFUSCATED_OBJECTS = 40;
195 | * $ACTIVE_CONTENT = 40;
196 | * $SHELLCODE = 40;
197 | * $PATTERN_REPETITION = 40;
198 | * $DANGEROUS_PATTERN_HIGH = 90;
199 | * $DANGEROUS_PATTERN_MEDIUM = 40;
200 | * $DANGEROUS_PATTERN_LOW = 20;
201 | * $TIME_EXCEEDED = 20;
202 | * $MALICIOUS_URI = 50;
203 | * $MULTIPLE_HEADERS = 50;
204 |
205 |
206 | NB: Ces coefficients doivent etre calibrés afin d'obtenir de meilleurs resultats.
207 |
208 | Le coefficient de suspicion est la somme de tous les résultats des tests positifs. Plus le coefficient est élevé, plus le document est suspect.
209 |
210 |
211 | III-e Cas non pris en compte
212 |
213 | IV- Réécriture de PDF
214 |
215 | IV-a Principes de réécriture de PDF
216 | IV-b Cas non pris en compte
217 |
--------------------------------------------------------------------------------
/lib/src/armaditopdf.c:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 |
23 |
24 | #include "armaditopdf.h"
25 | #include "pdfParsing.h"
26 | #include "pdfAnalysis.h"
27 | #include "osdeps.h"
28 | #include "log.h"
29 | #include
30 |
31 |
32 |
33 | char * getVersion(){
34 | return a6o_pdf_ver;
35 | }
36 |
37 |
38 | /*
39 | printAnalysisReport() :: print a report of the analysis (debug only).
40 | parameters:
41 | - struct pdfDocument * pdf
42 | returns:
43 | - none.
44 | */
45 | // TODO :: printAnalysisReport :: filter report informations by log level.
46 | void printAnalysisReport(struct pdfDocument * pdf){
47 |
48 |
49 | if (!print_report || pdf == NULL){
50 | return;
51 | }
52 |
53 | printf("\n\n");
54 | printf("----------------------------------\n");
55 | printf("-- ARMADITO PDF ANALYZER REPORT --\n");
56 | printf("----------------------------------\n\n");
57 |
58 | printf("Filename = %s\n",pdf->fname);
59 | if (pdf->version)
60 | printf("PDF version = %s\n",pdf->version);
61 |
62 | printf("size = %d bytes\n", pdf->size);
63 |
64 | printf("\n\n");
65 | printf("::: PDF Document Structure Tests :::\n\n");
66 |
67 |
68 | printf("bad_header = %d\n", pdf->testStruct->bad_header);
69 | printf("encrypted = %d\n", pdf->testStruct->encrypted);
70 | printf("empty_page_content = %d\n", pdf->testStruct->empty_page_content);
71 | printf("object_collision = %d\n", pdf->testStruct->object_collision);
72 | printf("bad_trailer = %d\n", pdf->testStruct->bad_trailer);
73 | printf("bad_xref_offset = %d\n", pdf->testStruct->bad_xref_offset);
74 | printf("bad_obj_offset = %d\n", pdf->testStruct->bad_obj_offset);
75 | printf("obfuscated_object = %d\n", pdf->testStruct->obfuscated_object);
76 | printf("multiple_headers = %d\n", pdf->testStruct->multiple_headers);
77 | printf("postscript_comments = %d\n", pdf->testStruct->comments);
78 | printf("malicious_comments = %d\n", pdf->testStruct->malicious_comments);
79 |
80 | printf("\n\n");
81 | printf("::: PDF Object Analysis Tests :::\n\n");
82 |
83 | printf("active_content = %d\n", pdf->testObjAnalysis->active_content);
84 | printf(" - js content = %d\n", pdf->testObjAnalysis->js);
85 | printf(" - xfa content = %d\n", pdf->testObjAnalysis->xfa);
86 | printf(" - ef content = %d\n", pdf->testObjAnalysis->ef);
87 | printf("shellcode = %d\n", pdf->testObjAnalysis->shellcode);
88 | printf("pattern_high_repetition = %d\n", pdf->testObjAnalysis->pattern_high_repetition);
89 | printf("dangerous_keyword_high = %d\n", pdf->testObjAnalysis->dangerous_keyword_high);
90 | printf("dangerous_keyword_medium = %d\n", pdf->testObjAnalysis->dangerous_keyword_medium);
91 | printf("dangerous_keyword_low = %d\n", pdf->testObjAnalysis->dangerous_keyword_low);
92 | printf("time_exceeded = %d\n", pdf->testObjAnalysis->time_exceeded);
93 |
94 |
95 | printf("\n\n");
96 | printf("::: Suspicious Coefficient :::\n\n");
97 | printf("errors = %d\n", pdf->errors);
98 |
99 | if(pdf->testStruct->bad_header > 0)
100 | printf("Coef = BAD_HEADER\n");
101 | else
102 | if(pdf->testStruct->large_file > 0)
103 | printf("Coef = %d (LARGE_FILE)\n",pdf->coef);
104 | else
105 | if(pdf->testStruct->encrypted > 0)
106 | printf("Coef = Encrypted_PDF\n");
107 | else
108 | printf("Coef = %d\n",pdf->coef);
109 |
110 |
111 | printf("-------------------------------------------------------\n");
112 | //printf("-------------------------------------------------------\n");
113 | printf("Execution time : %.2lf sec \n",pdf->scan_time);
114 | printf("-------------------------------------------------------\n");
115 | printf("-------------------------------------------------------\n\n");
116 |
117 | return;
118 |
119 | }
120 |
121 |
122 | // This function calc the suspicious coefficient according to the tests results
123 | // TODO Improve this fucntion by calc the coef with the operation coef += test_result * test_coef
124 | int calcSuspiciousCoefficient(struct pdfDocument * pdf){
125 |
126 | // check parameters
127 | if(pdf == NULL){
128 | return -1;
129 | }
130 |
131 | // PDF Document Structure tests
132 | /*
133 | EMPTY_PAGE_CONTENT 99
134 | OBJECT_COLLISION 10
135 | BAD_TRAILER 40
136 | BAD_XREF_OFFSET 30
137 | BAD_OBJ_OFFSET 20
138 | OBFUSCATED_OBJECT 50
139 | MULTIPLE_HEADERS 50
140 | */
141 |
142 | pdf->coef = 0;
143 |
144 | if(pdf->testStruct->encrypted > 0 ){
145 | pdf->coef = -2;
146 | return -2;
147 | }
148 |
149 | if(pdf->testStruct->empty_page_content > 0){
150 | pdf->coef = EMPTY_PAGE_CONTENT;
151 | return 0;
152 | }
153 |
154 | if(pdf->testStruct->object_collision > 0 && ( pdf->testStruct->bad_obj_offset > 0 || pdf->testStruct->bad_xref_offset > 0 )){
155 | pdf->coef += OBJECT_COLLISION_AND_BAD_XREF;
156 | }else{
157 |
158 | if(pdf->testStruct->object_collision > 0){
159 | pdf->coef += OBJECT_COLLISION;
160 | }
161 |
162 | if(pdf->testStruct->bad_obj_offset > 0){
163 | pdf->coef += BAD_OBJ_OFFSET;
164 | }
165 |
166 | if( pdf->testStruct->bad_xref_offset > 0){
167 | pdf->coef += BAD_XREF_OFFSET;
168 | }
169 | }
170 |
171 | if(pdf->testStruct->bad_trailer > 0){
172 | pdf->coef += BAD_TRAILER;
173 | }
174 |
175 | if(pdf->testStruct->multiple_headers > 0){
176 | pdf->coef += MULTIPLE_HEADERS;
177 | }
178 |
179 | if(pdf->testStruct->obfuscated_object > 0){
180 | pdf->coef += OBFUSCATED_OBJECT;
181 | }
182 |
183 | if(pdf->testStruct->malicious_comments > 0){
184 | pdf->coef += MALICIOUS_COMMENTS;
185 | }
186 |
187 |
188 | // PDF Objects Analysis tests
189 | /*
190 | ACTIVE_CONTENT 40
191 | SHELLCODE 40
192 | PATTERN_HIGH_REPETITION 40
193 | DANGEROUS_KEYWORD_HIGH 90
194 | DANGEROUS_KEYWORD_MEDIUM 40
195 | DANGEROUS_KEYWORD_LOW 20
196 | TIME_EXCEEDED 20
197 | */
198 |
199 |
200 | if(pdf->testObjAnalysis->active_content > 0){
201 | pdf->coef += ACTIVE_CONTENT;
202 | }
203 |
204 | if(pdf->testObjAnalysis->shellcode > 0){
205 | pdf->coef += SHELLCODE;
206 | }
207 |
208 | if(pdf->testObjAnalysis->pattern_high_repetition > 0){
209 | pdf->coef += PATTERN_HIGH_REPETITION;
210 | }
211 |
212 | if(pdf->testObjAnalysis->dangerous_keyword_high > 0){
213 | pdf->coef += DANGEROUS_KEYWORD_HIGH;
214 | }
215 |
216 | if(pdf->testObjAnalysis->dangerous_keyword_medium > 0){
217 | pdf->coef += DANGEROUS_KEYWORD_MEDIUM;
218 | }
219 |
220 | if(pdf->testObjAnalysis->dangerous_keyword_low > 0){
221 | pdf->coef += DANGEROUS_KEYWORD_LOW;
222 | }
223 |
224 | if(pdf->testObjAnalysis->time_exceeded > 0){
225 | pdf->coef += TIME_EXCEEDED;
226 | }
227 |
228 |
229 | return 0;
230 |
231 | }
232 |
233 |
234 | /*
235 | analyzePDF_ex() :: Analyze pdf extension function
236 | parameters:
237 | - int fd (file descriptor of the file to analyze)
238 | - char * filename (file name of the file).
239 | returns:
240 | - the suspicious coefficient (>=0) on success.
241 | - an error code (<0) on error.
242 | */
243 | int analyzePDF_ex(int fd, char * filename){
244 |
245 | int ret = 0;
246 | struct pdfDocument * pdf = NULL;
247 | time_t start_time =0, end_time = 0;
248 | double time_elapsed = 0;
249 | int res = 0;
250 | FILE * fh = NULL;
251 |
252 |
253 | if (fd < 0 && filename == NULL){
254 | err_log("analyzePDF_ex :: invalid parameters!",0);
255 | return -1;
256 | }
257 |
258 | dbg_log("analyzePDF_ex :: Analyzing file :: [%s]\n", filename);
259 |
260 | // open the file if fd is invalid
261 | if (fd < 0 && !(fh = os_fopen(filename, "rb"))){
262 | err_log("analyzePDF_ex :: Can't open file %s\n", filename);
263 | return -1;
264 | }
265 |
266 |
267 | // Initialize pdfDocument struct
268 | if (!(pdf = initPDFDocument())){
269 | err_log("analyzePDF_ex :: pdfDocument initialization failed!\n");
270 |
271 | if(fh != NULL)
272 | fclose(fh);
273 |
274 | return -1;
275 | }
276 |
277 | pdf->fh = fh;
278 | pdf->fd = fd;
279 | pdf->fname = os_strdup(filename);
280 |
281 | // start time initialization.
282 | time(&start_time);
283 |
284 | // Parse pdf document content.
285 | if ((ret = parsePDF(pdf)) < 0){
286 | err_log("analyzePDF_ex :: parsing PDF document failed\n");
287 | goto clean;
288 | }
289 |
290 | /* this is for debug purpose only */
291 | // printPDFObjects(pdf);
292 | // printObjectReferences(pdf);
293 |
294 |
295 | // PDF objects analysis.
296 | if ((ret = getDangerousContent(pdf)) < 0){
297 | err_log("analyzePDF_ex :: get dangerous content failed\n");
298 | goto clean;
299 | }
300 |
301 |
302 | // Document structure analysis
303 | if((ret = documentStructureAnalysis(pdf))< 0){
304 | err_log("analyzePDF_ex :: document structure Analysis failed\n");
305 | goto clean;
306 | }
307 |
308 |
309 | clean:
310 |
311 | time(&end_time);
312 | time_elapsed = difftime(end_time, start_time);
313 |
314 | pdf->scan_time = time_elapsed;
315 |
316 | // calc supicious coefficient of the document.
317 | calcSuspiciousCoefficient(pdf);
318 |
319 | // print report. (debug only)
320 | printAnalysisReport(pdf);
321 |
322 | if (ret >= 0){
323 | ret = pdf->coef;
324 | dbg_log("[armaditoPDF] Coef = %d\n", ret);
325 | }
326 |
327 | if (pdf != NULL){
328 | freePDFDocumentStruct(pdf);
329 | }
330 |
331 |
332 | return ret;
333 |
334 |
335 | }
--------------------------------------------------------------------------------
/lib/src/pdfStructs.c:
--------------------------------------------------------------------------------
1 | /***
2 |
3 | Copyright (C) 2015, 2016 Teclib'
4 |
5 | This file is part of Armadito module PDF.
6 |
7 | Armadito module PDF is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | Armadito module PDF is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with Armadito module PDF. If not, see .
19 |
20 | ***/
21 |
22 |
23 | #include "pdfStructs.h"
24 | #include "log.h"
25 |
26 |
27 | /*
28 | freePDFObjectStruct() :: free the allocated memory PDF object structure.
29 | parameters:
30 | - struct pdfObject * pdf (the pdf object list pointer).
31 | returns:
32 | - none
33 | */
34 | void freePDFObjectStruct(struct pdfObject * obj){
35 |
36 | struct pdfObject * tmp = NULL;
37 |
38 | if(obj == NULL){
39 | return ;
40 | }
41 |
42 | while(obj != NULL){
43 |
44 | tmp = obj;
45 | obj = obj->next;
46 |
47 | // free all elements
48 | free(tmp->reference);
49 | free(tmp->content);
50 | free(tmp->dico);
51 | free(tmp->type);
52 | free(tmp->stream);
53 | free(tmp->filters);
54 | free(tmp->decoded_stream);
55 |
56 | free(tmp);
57 | tmp = NULL;
58 |
59 | }
60 |
61 | return;
62 |
63 | }
64 |
65 |
66 | /*
67 | freePDFTrailerStruct() :: free the allocated memory PDF trailer structure.
68 | parameters:
69 | - struct pdfTrailer * pdf (the pdf trailer list pointer).
70 | returns:
71 | - none
72 | */
73 | void freePDFTrailerStruct(struct pdfTrailer * trailer){
74 |
75 | struct pdfTrailer * tmp = NULL;
76 |
77 | if(trailer == NULL){
78 | return ;
79 | }
80 |
81 | while(trailer!= NULL){
82 |
83 | tmp = trailer;
84 | trailer = trailer->next;
85 |
86 | free(tmp->dico);
87 | free(tmp->content);
88 |
89 | free(tmp);
90 | tmp = NULL;
91 |
92 | }
93 |
94 | return;
95 |
96 | }
97 |
98 |
99 | /*
100 | freePDFDocumentStruct() :: free the allocated memory PDF document structure.
101 | parameters:
102 | - struct pdfDocument * pdf (the pdf document pointer).
103 | returns:
104 | - none
105 | */
106 | void freePDFDocumentStruct(struct pdfDocument * pdf){
107 |
108 |
109 | if(pdf == NULL){
110 | return ;
111 | }
112 |
113 | if (pdf->fname != NULL){
114 | free(pdf->fname);
115 | pdf->fname = NULL;
116 | }
117 |
118 | // Free objects
119 | if (pdf->objects != NULL){
120 | freePDFObjectStruct(pdf->objects);
121 | }
122 |
123 | // Free trailer
124 | if (pdf->trailers != NULL){
125 | freePDFTrailerStruct(pdf->trailers);
126 | }
127 |
128 | if (pdf->fh != NULL){
129 | fclose(pdf->fh);
130 | pdf->fh = NULL;
131 | }
132 |
133 | if (pdf->version != NULL){
134 | free(pdf->version);
135 | pdf->version = NULL;
136 | }
137 |
138 | if (pdf->content != NULL){
139 | free(pdf->content);
140 | pdf->content = NULL;
141 | }
142 |
143 | if (pdf->testStruct != NULL){
144 | free(pdf->testStruct);
145 | pdf->testStruct = NULL;
146 | }
147 |
148 | if (pdf->testObjAnalysis != NULL){
149 | free(pdf->testObjAnalysis);
150 | pdf->testObjAnalysis = NULL;
151 | }
152 |
153 |
154 | free(pdf);
155 | pdf = NULL;
156 |
157 |
158 | return ;
159 |
160 | }
161 |
162 |
163 | /*
164 | addObjectInList() :: add an object in the pdf document object list
165 | parameters:
166 | - struct pdfObject * obj (pdf object pointer)
167 | - struct pdfDocument * pdf (pdf document pointer)
168 | returns: (int)
169 | - 0 on success.
170 | - an error code (<0) on error.
171 | */
172 | int addObjectInList(struct pdfObject* obj, struct pdfDocument* pdf){
173 |
174 | struct pdfObject* tmp = NULL;
175 |
176 | if(obj == NULL || pdf == NULL){
177 | err_log("addObjectInList :: invalid parameter\n");
178 | return -1;
179 | }
180 |
181 | if(pdf->objects == NULL){
182 | pdf->objects = obj;
183 | }else{
184 |
185 | tmp = pdf->objects;
186 |
187 | // Object collision detection
188 | if(strncmp(tmp->reference,obj->reference,strlen(tmp->reference)) == 0 && strncmp(tmp->reference,obj->reference,strlen(obj->reference)) == 0){
189 |
190 | warn_log("addObjectInList :: Object collision :: %s\n", obj->reference);
191 | pdf->testStruct->object_collision ++;
192 | }
193 |
194 |
195 | while(tmp->next != NULL){
196 |
197 | // Object collision detection
198 | tmp = tmp->next;
199 |
200 | if(strncmp(tmp->reference,obj->reference,strlen(tmp->reference)) == 0 && strncmp(tmp->reference,obj->reference,strlen(obj->reference)) == 0){
201 | warn_log("addObjectInList :: Object collision :: %s\n", obj->reference);
202 | pdf->testStruct->object_collision ++;
203 | }
204 | }
205 | tmp->next = obj;
206 |
207 | }
208 |
209 | return 0;
210 | }
211 |
212 |
213 | /*
214 | initTestsPDFStruct() :: Initialize PDF Tests structure.
215 | parameters:
216 | - none
217 | returns: (struct testsPDFStruct *)
218 | - the testsPDFStruct pointer on success.
219 | - NULL on error.
220 | */
221 | struct testsPDFStruct * initTestsPDFStruct(){
222 |
223 | struct testsPDFStruct * testStruct = NULL;
224 |
225 | if( !(testStruct = (struct testsPDFStruct *)calloc(1,sizeof(struct testsPDFStruct)) ) ){
226 | err_log("initTestsPDFStruct :: memory allocation failed\n");
227 | return NULL;
228 | }
229 |
230 | testStruct->bad_header = 0;
231 | testStruct->encrypted = 0;
232 | testStruct->empty_page_content = 0;
233 | testStruct->object_collision = 0;
234 | testStruct->bad_trailer = 0;
235 | testStruct->bad_xref_offset = 0;
236 | testStruct->bad_obj_offset = 0;
237 | testStruct->obfuscated_object = 0;
238 | testStruct->multiple_headers = 0;
239 | testStruct->large_file = 0;
240 | testStruct->comments = 0;
241 | testStruct->malicious_comments = 0;
242 |
243 | return testStruct;
244 | }
245 |
246 |
247 | /*
248 | initTestsPDFObjAnalysisStruct() :: Initialize PDF Tests structure.
249 | parameters:
250 | - none
251 | returns: (struct testsPDFObjAnalysis *)
252 | - the testsPDFObjAnalysis pointer on success.
253 | - NULL on error.
254 | */
255 | struct testsPDFObjAnalysis * initTestsPDFObjAnalysisStruct(){
256 |
257 | struct testsPDFObjAnalysis * testObjAnalysis = NULL;
258 |
259 | if( !(testObjAnalysis = (struct testsPDFObjAnalysis *)calloc(1,sizeof(struct testsPDFObjAnalysis)) ) ){
260 | err_log("initTestsPDFObjAnalysisStruct :: memory allocation failed\n");
261 | return NULL;
262 | }
263 |
264 | testObjAnalysis->active_content = 0;
265 | testObjAnalysis->shellcode = 0;
266 | testObjAnalysis->pattern_high_repetition = 0;
267 | testObjAnalysis->dangerous_keyword_high = 0;
268 | testObjAnalysis->dangerous_keyword_medium = 0;
269 | testObjAnalysis->dangerous_keyword_low = 0;
270 | testObjAnalysis->time_exceeded = 0;
271 |
272 | testObjAnalysis->js = 0;
273 | testObjAnalysis->xfa = 0;
274 | testObjAnalysis->ef = 0;
275 |
276 | return testObjAnalysis;
277 | }
278 |
279 |
280 | /*
281 | initPDFDocument() :: Initialize pdfDocument structure.
282 | parameters:
283 | - none
284 | returns: (struct pdfDocument *)
285 | - the pdfDocument pointer on success.
286 | - NULL on error.
287 | */
288 | struct pdfDocument* initPDFDocument(){
289 |
290 | struct pdfDocument* pdf = NULL;
291 | int err = 0;
292 |
293 | if( (pdf = (struct pdfDocument *)calloc(1,sizeof(struct pdfDocument))) == NULL ){
294 | err_log("initPDFDocument :: memory allocation failed\n");
295 | err++;
296 | goto clean;
297 | }
298 |
299 | if( (pdf->testStruct = initTestsPDFStruct()) == NULL ){
300 | err_log("initPDFDocument :: testsPDFstruct initialization failed!\n");
301 | err++;
302 | goto clean;
303 | }
304 |
305 | if( (pdf->testObjAnalysis = initTestsPDFObjAnalysisStruct()) == NULL ){
306 | err_log("initPDFDocument :: testsPDFObjAnalysisStruct initialization failed!\n");
307 | err++;
308 | goto clean;
309 | }
310 |
311 | // Initialize entries
312 | pdf->fh = NULL;
313 | pdf->fd = -1;
314 | pdf->fname = NULL;
315 | pdf->content = NULL;
316 | pdf->objects =NULL;
317 | pdf->coef = 0;
318 | pdf->size = 0;
319 | pdf->version = NULL;
320 | pdf->trailers = NULL;
321 | pdf->xref = NULL;
322 | pdf->errors = 0;
323 | pdf->scan_time=0;
324 |
325 | clean:
326 | if (err != 0){
327 | if (pdf != NULL){
328 | freePDFDocumentStruct(pdf);
329 | pdf = NULL;
330 | }
331 | }
332 |
333 | return pdf;
334 |
335 | }
336 |
337 |
338 | /*
339 | initPDFObject() :: Initialize pdfObject object structure.
340 | parameters:
341 | - none
342 | returns: (struct pdfObject *)
343 | - the pdfObject pointer on success.
344 | - NULL on error.
345 | */
346 | struct pdfObject* initPDFObject(){
347 |
348 | struct pdfObject* obj = NULL;
349 |
350 |
351 | if( !(obj = (struct pdfObject*)calloc(1,sizeof(struct pdfObject)) ) ){
352 | err_log("initPDFObject :: memory allocation failed\n");
353 | return NULL;
354 | }
355 |
356 | // Initialize entries
357 | obj->reference = NULL;
358 | obj->content = NULL;
359 | obj->dico = NULL;
360 | obj->type = NULL;
361 | obj->stream = NULL;
362 | obj->filters = NULL;
363 | obj->decoded_stream = NULL;
364 | obj->offset = 0;
365 | obj->next = NULL;
366 | obj->stream_size = 0;
367 | obj->tmp_stream_size = 0;
368 | obj->content_size = 0;
369 | obj->decoded_stream_size = 0;
370 | obj->errors = 0;
371 |
372 | return obj;
373 |
374 | }
375 |
376 |
377 | /*
378 | initPDFTrailer() :: Initialize pdf trailer structure
379 | parameters:
380 | - none
381 | returns: (struct pdfTrailer *)
382 | - the pdfTrailer pointer on success.
383 | - NULL on error.
384 | */
385 | struct pdfTrailer* initPDFTrailer(){
386 |
387 | struct pdfTrailer* trailer = NULL;
388 |
389 | if( !(trailer = (struct pdfTrailer *)calloc(1,sizeof(struct pdfTrailer)) ) ){
390 | err_log("initPDFTrailer :: memory allocation failed\n");
391 | return NULL;
392 | }
393 |
394 | // Initialize entries
395 | trailer->offset = 0;
396 | trailer->content = NULL;
397 | trailer->dico = NULL;
398 | trailer->next = NULL;
399 |
400 | return trailer;
401 |
402 | }
403 |
404 |
405 | /*
406 | addTrailerInList() :: add a trailer in the list of trailers
407 | parameters:
408 | - struct pdfDocument * pdf
409 | - struct pdfTrailer * trailer
410 | returns: (int)
411 | - 0 on success
412 | - -1 on error.
413 | */
414 | int addTrailerInList(struct pdfDocument * pdf, struct pdfTrailer * trailer){
415 |
416 | struct pdfTrailer * tmp = NULL;
417 |
418 | if(pdf == NULL || trailer == NULL){
419 | err_log("addTrailerInList :: invalid parameters\n");
420 | return -1;
421 | }
422 |
423 |
424 | if(pdf->trailers == NULL){
425 | pdf->trailers = trailer;
426 | }else{
427 |
428 | tmp = pdf->trailers;
429 | while(tmp->next != NULL){
430 | tmp = tmp->next;
431 | }
432 | tmp->next = trailer;
433 |
434 | }
435 |
436 | return 0;
437 | }
--------------------------------------------------------------------------------
/tools/perl_poc/lib/analysis/ObjectAnalysis.pm:
--------------------------------------------------------------------------------
1 | package ObjectAnalysis;
2 |
3 | use strict;
4 |
5 | use lib::conf::Config;
6 |
7 | my $DEBUG = "no";
8 |
9 |
10 | # This function analyzes uri (for example detect a path traversal pattern in URI object.)
11 | # TODO to improve
12 | sub URI_analysis{
13 |
14 | my $obj_ref = shift;
15 |
16 |
17 | #print "Warning :: URI_analysis :: $obj_ref->{ref}\n";
18 |
19 |
20 | my $test = "../../../myPath";
21 | #my $test = "..\..\..\myPath";
22 |
23 |
24 | # Path traversal detection
25 | #if($obj_ref->{uri} =~ /([\.\.\/|\.\.\\])+/){
26 | #if($test =~ /(\.\.\/)+/){
27 | if($obj_ref->{uri} =~ /(\.\.\/)+/){
28 | #if($obj_ref->{uri} =~ /(\.\.\/|\.\.\\)+/){
29 | print "Warning :: URI_analysis :: Found path traversal in $obj_ref->{ref} URI :: $obj_ref->{uri}\n";
30 |
31 | if(exists($main::TESTS_CAT_2{"Malicious URI"})){
32 | $main::TESTS_CAT_2{"Malicious URI"} ++;
33 | }else{
34 | $main::TESTS_CAT_2{"Malicious URI"} =1;
35 | }
36 |
37 | }
38 |
39 |
40 | # potential dangerous pattern :: system32
41 | if($obj_ref->{uri} =~ /(system32)+/){
42 | print "Warning :: URI_analysis :: Found potentially dangerous pattern in $obj_ref->{ref} URI :: $obj_ref->{uri}\n";
43 | #$main::%TESTS_CAT_2{"Malicious URI"} ++;
44 |
45 | if(exists($main::TESTS_CAT_2{"Malicious URI"})){
46 | $main::TESTS_CAT_2{"Malicious URI"} ++;
47 | }else{
48 | $main::TESTS_CAT_2{"Malicious URI"} =1;
49 | }
50 | }
51 |
52 |
53 |
54 | }
55 |
56 |
57 |
58 | # The basic analysis consists to parse the content of object and detect all potential dangerous patterns.
59 | # Returns "none" - "high" - "medium" - or "low"
60 | sub DangerousKeywordsResearch{
61 |
62 | #
63 | #$TESTS_CAT_2{"Dangerous Pattern High"} ;
64 | #$TESTS_CAT_2{"Dangerous Pattern Medium"};
65 | #$TESTS_CAT_2{"Dangerous Pattern Low"};
66 |
67 | my ($obj_ref,$content) = @_;
68 |
69 | if(!$content){
70 | #print "Error :: DangerousKeywordsResearch :: empty content\n";
71 | return "none";
72 | }
73 |
74 |
75 |
76 | # keywords (HIGH) :: HeapSpray - heap - spray - hack - shellcode - shell - Execute - exe - exploit - pointers - memory - exportDataObject -app.LaunchURL -byteToChar - system32 - payload
77 | if( $content =~ /(HeapSpray|heap|spray|hack|shellcode|shell|Execute|pointers|byteToChar|system32|payload|console)/si ){
78 | #$TESTS_CAT_2{"Dangerous Pattern High"} ++;
79 | print "Dangerous Pattern \(High\) found :: $1 :: in $obj_ref->{ref} \n";
80 | return "High";
81 | }
82 |
83 | # Unicode detection
84 | my @rep_unicode = ($content =~ /(\%u[a-f0-9]{4})/gi);
85 | my $count = @rep_unicode;
86 | print "unicode string = $count :: @rep_unicode\n" if ($count > 0);
87 |
88 | if($count > 10){
89 | print "Warning :: DangerousKeywordsResearch :: Found unicode strings :: @rep_unicode\n";
90 | return "High";
91 | }
92 |
93 | # TODO combinaison between unicode and medium
94 |
95 |
96 | # Javascript keywords (MEDIUM) :: substring - toSring - split - eval - String.replace - unescape - exportDataObject - StringfromChar - util.print
97 | if( $content =~ /(toString|substring|split|eval|addToolButton|String\.replace|unescape|exportDataObject|StringfromChar|util\.print)/si ){
98 | #$TESTS_CAT_2{"Dangerous Pattern Medium"} ++;
99 | print "Dangerous Pattern \(Medium\) found :: $1 :: in $obj_ref->{ref} \n";
100 | return "Medium";
101 | }
102 |
103 |
104 | # javascript keywords ::
105 | #
106 | #
107 | # NOP detection "90"
108 | #
109 | # %u... like %u4141%u4141%u63a5%u4a80%u0000
110 |
111 |
112 |
113 | # TODO Look for JavaScript in XFA block Ex:
114 |
115 | return "none";
116 | }
117 |
118 |
119 | # This function detect the wide repetition of an unknown pattern
120 | # Test1 files :: unknown pattern repetition
121 | # 618b5fcf762bc7397a22e568753858c9
122 | # 6254e7e17d9796028bdc56ba81022617
123 | # 6bffa8f1f0155a554fcdca6a1839576e
124 | # 8e88d64028093d2ef6a633c83ee28e44
125 | # b400e8d3635f91176e1d56a38e6aa590
126 | # c8c39082dfca15d5ded02ca050a96112
127 | # de8bcc90ecd0049a1ab4e5a5087359b4
128 | # fa2ddb10d9184dba0f90c88b7786f6ec
129 | sub Unknown_Pattern_Repetition_Detection{
130 |
131 |
132 | my $result = 0;
133 | my $objcontent = shift;
134 | my %h; # hash table containing the results.
135 | my $cpt=5; # number of characteres repetition to detect
136 |
137 | my $timeout = 5;
138 | my $nb_rep_max = 200;
139 |
140 | if(!$objcontent){
141 | return 0;
142 | }
143 |
144 | my $start_time = time - $^T;
145 |
146 | # Remove a white characters for a better processing
147 | $objcontent =~ s/\s//g;
148 |
149 | # split into array
150 | my @a =split('',$objcontent);
151 |
152 | for (my $i = 0 ; $i<= $#a-$cpt ; $i++){
153 |
154 | my $pat;
155 |
156 | # generate pattern according to number of caracter
157 | for (my $y=0 ; $y<$cpt ; $y++){
158 | $pat .= $a[$i+$y];
159 | }
160 |
161 | # if the pattern is already in the table
162 | if(!exists($h{"$pat"})){
163 |
164 | my $count = 0;
165 | # count the number of repetition in the content
166 | my @rep = ($objcontent =~ /\Q$pat/g);
167 | $count = @rep;
168 | #print "pat = $pat :: count = $count :: rep = $#repp \n";
169 | $h{"$pat"} = $count;
170 |
171 | if($count > $Config::MAX_REP_DETECTION){
172 | print "FOUND = $pat => $count\n\n" unless $DEBUG eq "yes";
173 | $result ++;
174 | return $result;
175 | }
176 |
177 | }
178 |
179 | my $time_elapsed = time - $^T;
180 | if($time_elapsed-$start_time > $Config::ANALYSIS_TIMEOUT ){
181 |
182 | print "TIME_EXCEEDED\n";
183 | return -1;
184 | }
185 |
186 | }
187 |
188 |
189 | # print
190 | my $sum=0;
191 | my $nb =0;
192 | while ((my $key, my $value) = each %h) {
193 | $sum+= $value;
194 | $nb ++;
195 | #print "$key => $value\n";
196 | }
197 |
198 | if($nb==0){
199 | return 0;
200 | }
201 |
202 | # Calcul de l'ecart-type
203 | my $moyenne =0 ;# moyenne
204 | my $var =0; # variance
205 | my $et = 0; # ecart type
206 |
207 |
208 | $moyenne = $sum/$nb;
209 |
210 |
211 | print "100% => $sum :: cpt => $cpt :: m => $moyenne \n" unless $DEBUG eq "no";
212 |
213 | while ((my $key, my $value) = each %h) {
214 |
215 | my $pourcent = ($value*100)/$sum;
216 | $var += ($value-$moyenne)*($value-$moyenne);
217 | }
218 |
219 |
220 | $var = $var/$nb;
221 | $et = sqrt($var);
222 |
223 | print "moyenne = $moyenne :: nb = $nb :: variance = $var :: ecartype = $et\n" unless $DEBUG eq "no";
224 |
225 | while ((my $key, my $value) = each %h) {
226 |
227 | if($value > 2*$et && $value > $nb_rep_max){
228 | print "FOUND = $key => $value :: \n\n" unless $DEBUG eq "yes";
229 | $result ++ ;
230 | }
231 | }
232 |
233 | #print "END\n\n";
234 | return $result;
235 |
236 | }
237 |
238 |
239 | sub Unknown_Pattern_Repetition_Detection__{
240 |
241 |
242 | my $result = 0;
243 | my @found;
244 | my $objcontent = shift;
245 | my %h; # hash table containing the results.
246 | my $cpt=5; # number of characteres repetition to detect
247 | my $rep; # The number of repetition to reach to trigger an alert
248 |
249 | if(!$objcontent){
250 | return;
251 | }
252 |
253 | # Remove a white characters for a better processing
254 | $objcontent =~ s/\s//g;
255 |
256 | # split into array
257 | my @a =split('',$objcontent);
258 |
259 | for (my $i = 0 ; $i<= $#a-$cpt ; $i++){
260 |
261 | #my $pat = $a[$i].$a[$i+1];
262 | my $pat;
263 |
264 | # generate pattern according to number of caracter
265 | for (my $y=0 ; $y<$cpt ; $y++){
266 | $pat .= $a[$i+$y];
267 | }
268 |
269 | # if the pattern is already in the table
270 | if(exists($h{"$pat"})){
271 | next;
272 | }
273 |
274 | for (my $j = $i+$cpt ; $j<= $#a-$cpt ; $j++){
275 |
276 | my $pat2;
277 | # generate pattern according to number of caracter
278 | for (my $y=0 ; $y<$cpt ; $y++){
279 | $pat2 .= $a[$j+$y];
280 | }
281 |
282 | if($pat eq $pat2 && $i!=$j){
283 |
284 |
285 | # add in repetition hash table
286 | if(exists($h{"$pat"})){ # If the pattern as already been detected
287 | # add in offset array
288 | # search if the offset is already in the array
289 | my $in=0;
290 | my @tmp=@{$h{"$pat"}};
291 | foreach(@tmp){
292 | if($_ == $j){
293 | $in = 1;
294 | }
295 | }
296 |
297 | push($h{"$pat"}, $j) unless $in == 1;
298 | }else{
299 | my @tmp_array;
300 | push @tmp_array, $i;
301 | push @tmp_array, $j;
302 | $h{"$pat"}= \@tmp_array;
303 | }
304 |
305 | }
306 | }
307 | }
308 |
309 |
310 |
311 | my $sum=0;
312 | my $nb =0;
313 | while ((my $key, my $value) = each %h) {
314 |
315 | my @arr= @{$value};
316 | $sum+= $#arr+1;
317 | $nb ++;
318 | }
319 |
320 | # Calcul de l'ecart-type
321 |
322 | my $moyenne =0 ;# moyenne
323 | my $var =0; # variance
324 | my $et = 0; # ecart type
325 |
326 |
327 | if($nb > 0){
328 | $moyenne = $sum/$nb;
329 | }
330 | print "100% => $sum :: cpt => $cpt :: m => $moyenne \n" unless $DEBUG eq "no";
331 |
332 | while ((my $key, my $value) = each %h) {
333 | my @arr= @{$value};
334 | my $rep = $#arr+1;
335 | my $pourcent = ($rep*100)/$sum;
336 | #print "$key => $rep ::: $pourcent %\n\n";
337 |
338 | $var += ($rep-$moyenne)*($rep-$moyenne);
339 |
340 | }
341 |
342 | if($nb > 0){
343 | $var = $var/$nb;
344 | $et = sqrt($var);
345 | }
346 |
347 |
348 | print "moyenne = $moyenne :: nb = $nb :: variance = $var :: ecartype = $et\n" unless $DEBUG eq "no";
349 |
350 | while ((my $key, my $value) = each %h) {
351 |
352 | my @arr= @{$value};
353 | my $rep = $#arr+1;
354 |
355 | if($rep > 2*$et && $rep > 30){
356 | print "FOUND = $key => $rep\n\n" unless $DEBUG eq "yes";
357 | $result ++ ;
358 | }
359 | }
360 |
361 |
362 | #if($result > 0){
363 | # $TESTS_CAT_2{"Pattern Repetition"} = "DETECTED";
364 | #}
365 |
366 | return $result;
367 |
368 |
369 | }
370 |
371 |
372 |
373 |
374 | # This function detect a shellcode or suite of hexa insertion
375 | # Test2 files :: shellcode or hexa insertion
376 | # 5c08ea688165940008949a86805ff1d0
377 | # 5f27adfa55628ea4674348351e241be8
378 | # 73b0e8c5a7e5814c723295313ce0262d
379 | # 75c1ae242d07bb738a5d9a9766c2a7de
380 | # 7bcb4c9c35e01bd985f74aec66c19876
381 | # 84d860a4c9e8d2baec983ef35789449a
382 | # ab3f72df228715e6265cb222c586254e
383 | # b823473c7206d64fa3ce20c4669b707d
384 | # d785f43c523bf36d1678da84fa84617f
385 | # edab6ed2809f739b67667e8fed689992
386 | sub Shellcode_Detection{
387 |
388 | my $objcontent = shift;
389 | my $res = 0;
390 | my @found;
391 |
392 | if(!$objcontent){
393 | return 0;
394 | }
395 |
396 | # Remove white space for a better processing
397 | $objcontent =~ s/\s//g;
398 |
399 |
400 | # Shellcode detection // ou repetition de chiffres, separated by an element (,_\-...)
401 |
402 | # 73b0e8c5a7e5814c723295313ce0262d
403 | # 5f27adfa55628ea4674348351e241be8
404 | # 5c08ea688165940008949a86805ff1d0
405 | # 73b0e8c5a7e5814c723295313ce0262d
406 | # 7bcb4c9c35e01bd985f74aec66c19876
407 | # d785f43c523bf36d1678da84fa84617f
408 | # 75c1ae242d07bb738a5d9a9766c2a7de
409 | # ab3f72df228715e6265cb222c586254e
410 | # b823473c7206d64fa3ce20c4669b707d
411 | if( $objcontent =~ /(([\d]{1,2}[\/,%\$@^_]{1,2}){100})/ig){
412 | print "\n\n:::TEST 2:::\n" unless $DEBUG eq "no";
413 | print "DANGEROUS PATTERN 1 FOUND !!\n" unless $DEBUG eq "no";
414 | $res ++;
415 | push @found, $1;
416 | #print "$1\n";
417 |
418 | # TODO look for "split" pattern (or medium dangerous pattern)
419 |
420 |
421 | }
422 |
423 |
424 | if( $objcontent =~ /(([\d]{1,}[\/,%\$@^_-]{1,2}){100})/ig){
425 | print "\n\n:::TEST 2:::\n" unless $DEBUG eq "no";
426 | print "DANGEROUS PATTERN 1.1 FOUND !!\n" unless $DEBUG eq "no";
427 | $res ++;
428 | push @found, $1;
429 | print "$1\n" unless $DEBUG eq "no";
430 |
431 | # TODO look for "split" pattern (or medium dangerous pattern)
432 |
433 | }
434 |
435 | #pat = 9804c-9686c7351c-7254c27757c-27643c18532c-18500c32447c-32352c28309c-28201c10773c-10724c12582c-12521c
436 | # 84d860a4c9e8d2baec983ef35789449a
437 | #if( $objcontent =~ /([\dABCDEF\-]{100})/ig){
438 | if( $objcontent =~ /(([\dABCDEF]{2,}[-]){100})/ig){
439 | print "\n\n:::TEST 2:::\n" unless $DEBUG eq "no";
440 | print "DANGEROUS PATTERN 2 FOUND !!\n" unless $DEBUG eq "no";
441 | $res ++;
442 | push @found, $1;
443 | print "$1\n" unless $DEBUG eq "no";
444 | }
445 |
446 | # edab6ed2809f739b67667e8fed689992
447 | #if( $objcontent =~ /([\d\/A-z,]{100})/ig){
448 |
449 | #if($res eq "true"){
450 | # $TESTS_CAT_2{"Shellcode"} = "DETECTED";
451 | #}
452 |
453 |
454 | return $res;
455 |
456 | }
457 |
458 |
459 |
460 | 1;
461 |
--------------------------------------------------------------------------------
/tools/perl_poc/lib/analysis/DocumentStruct.pm:
--------------------------------------------------------------------------------
1 | package DocumentStruct;
2 |
3 | use strict;
4 | use MIME::Base64 ();
5 |
6 | my $DEBUG = "no";
7 |
8 |
9 | # Check the magic number of a PDF file
10 | sub CheckMagicNumber{
11 |
12 | my $file_ref= shift;
13 | my $file = $file_ref;
14 |
15 | my $len=8;
16 | my $offset=0;
17 | my $ver="undef";
18 |
19 |
20 |
21 | seek ($file, 0, 0);
22 | read $file, $ver, $len, $offset or print "read failed :: $!\n";
23 |
24 |
25 | if( $ver =~ /\%PDF-\d\.\d/){
26 | print "PDF header : OK\n" unless $DEBUG eq "no";
27 |
28 |
29 | # Check if there is several headers in file
30 | seek ($file, 0, 0);
31 | my $content = do { local $/; <$file>};
32 |
33 | my @pdf_headers = $content =~ /\%PDF-\d\.\d/sg;
34 | my $num = @pdf_headers;
35 |
36 | if($num > 1){
37 | print "Warning :: CheckMagicNumber :: There are $num pdf headers in this file\n";
38 | $main::TESTS_CAT_1{"Multiple Headers"} = $num;
39 | }
40 |
41 | return ($ver,"OK");
42 | }
43 |
44 | # Check string
45 | seek ($file, 0, 0); # rewind file
46 | my $content = do { local $/; <$file>};
47 | #print "$content\n";
48 | if($content =~ /(.*)<\/chunk><\/document><\/pdf>/si){
49 | #if($content =~ /(.*)<\/chunk>/si){
50 | print "This document is an XML Data Package (XDP)\n" unless $DEBUG eq "no";
51 | my $chunkContent = $1;
52 | #print "chunkContent = $chunkContent\n";
53 |
54 | #decode base64 content
55 | my $decodedContent = MIME::Base64::decode($chunkContent) or print "Error while decoding base64 :: $!\n";
56 | #print "decoded content = $decodedContent\n";
57 |
58 | # write content in a new file
59 | close($file);
60 | open $file, ">tmp.pdf" or die "open failed in tmp.pdf : $! ";
61 | binmode $file;
62 | print $file $decodedContent;
63 | #print "file handle = $file\n";
64 | close($file);
65 | open $file, "{"type"}) && $_->{"type"} eq "/Pages" ){
110 |
111 | print "FOUND Pages object :: $_->{ref} :: \n" unless $DEBUG eq "yes";
112 |
113 | # Get kid node pages
114 | my @pages = $_->{"kids"} =~ /(\d+\s\d\sR)/sg;
115 | #print @pages;
116 |
117 | foreach(@pages){
118 | my $page_ref = $_;
119 | $page_ref =~ s/R/obj/;
120 | print "page ref = $page_ref\n";
121 |
122 | # if the page exists and the /Content parameter is set
123 | if(exists($pdfObjects->{$page_ref}) && exists($pdfObjects->{$page_ref}->{"pagecontent"}) ){
124 |
125 | # Check if it's not an empty content
126 | #my $p_content = $pdfObjects{$page_ref}->{"pagecontent"};
127 |
128 |
129 | # If the Contents fiels is an array
130 | my @pcontents = $pdfObjects->{$page_ref}->{"pagecontent"} =~ /(\d+\s\d\sR)/sg;
131 |
132 | foreach (@pcontents){
133 |
134 | my $content_page_obj = $_;
135 | $content_page_obj =~ s/R/obj/;
136 |
137 | print ":: page content = $content_page_obj :: \n";#" $pdfObjects{$contentp}->{content}\n";
138 |
139 | if(exists($pdfObjects->{$content_page_obj}) && exists($pdfObjects->{$content_page_obj}->{"stream"}) && length($pdfObjects->{$content_page_obj}->{"stream"}) > 0 ){
140 | $ret ++;
141 | print "Page $page_ref is not empty => OK\n"unless $DEBUG eq "no";
142 |
143 | }elsif(! exists($pdfObjects->{$content_page_obj})){
144 | print "Warning : Content Object ($content_page_obj) of page $page_ref doesn\'t exist\n" unless $DEBUG eq "no";
145 |
146 | }elsif( exists($pdfObjects->{$content_page_obj}->{content}) ){
147 |
148 | # Trigger the case when the object represents an array of objects Ex: [422 0 R 423 0 R 424 0 R 425 0 R 426 0 R 427 0 R 428 0 R 429 0 R]
149 |
150 | my @content_page_array = $pdfObjects->{$content_page_obj}->{"content"} =~ /(\d+\s\d\sR)/sg;
151 |
152 | foreach(@content_page_array){
153 |
154 | my $content_page_obj_2 = $_;
155 | $content_page_obj_2 =~ s/R/obj/;
156 | #print " Found obj :: $content_page_obj_2\n";
157 |
158 | if(exists($pdfObjects->{$content_page_obj_2})){
159 |
160 | # TODEBUG print "";
161 | my $test = $pdfObjects->{$content_page_obj_2} ;
162 |
163 | print "DEBUG :: ".$pdfObjects->{$content_page_obj_2}->{stream}."\n";
164 |
165 | if( exists($pdfObjects->{$content_page_obj_2}->{"stream"}) && length($pdfObjects->{$content_page_obj_2}->{"stream"}) > 0 ){
166 | $ret ++;
167 | print "Found content of the page $page_ref in obj $content_page_obj_2 => OK\n"unless $DEBUG eq "no";
168 | }else{
169 | print "Warning :: Page content Object ($content_page_obj_2) is empty !!!!\n" unless $DEBUG eq "yes";
170 | }
171 |
172 | }else{
173 | print "Warning :: Empty_Pages_Document_detection :: Page content Object ($content_page_obj_2) is not defined\n" unless $DEBUG eq "yes";
174 | }
175 | }
176 |
177 |
178 | }else{
179 | print "Warning :: Empty_Pages_Document_detection :: The Stream of the Content Object is empty\n" unless $DEBUG eq "yes";
180 |
181 | }
182 |
183 | }
184 |
185 |
186 | }elsif(! exists($pdfObjects->{$page_ref})){
187 | print "Warning :: Empty_Pages_Document_detection :: Page $page_ref does\'nt exist.\n" unless $DEBUG eq "o";
188 | }else{
189 | print "Warning :: Empty_Pages_Document_detection :: Page $page_ref is empty\n" unless $DEBUG eq "o";
190 | }
191 |
192 |
193 | }
194 |
195 | }
196 |
197 | # TODO Verify that the number of treated pages is the number of pages in the document.
198 |
199 | }
200 |
201 | return $ret;
202 |
203 | }
204 |
205 |
206 | sub Empty_Pages_Document_detection{
207 |
208 | #my $ref = shift;
209 | my $pdfObjects = shift;
210 |
211 | #print "DEBUG = $ref\n";
212 |
213 | #my %pdfObjects = %{$ref};
214 |
215 |
216 | my $ret=0;
217 | my $numPages =0; # Number of pages found
218 | my $active_content =0; # Number of js, embedded files
219 |
220 | print "\n\n ::: Empty Pages With Active Content detection ::: \n" unless $DEBUG eq "no";
221 |
222 | my @objs = values(%{$pdfObjects});
223 | foreach(@objs){
224 |
225 |
226 | if( exists($_->{"type"}) && $_->{"type"} eq "/Pages" ){
227 |
228 | print "FOUND Pages object :: $_->{ref} :: \n" unless $DEBUG eq "no";
229 |
230 | # Get kid node pages
231 | my @pages = $_->{"kids"} =~ /(\d+\s\d\sR)/sg;
232 | #print @pages;
233 |
234 | foreach(@pages){
235 | my $page_ref = $_;
236 | $page_ref =~ s/R/obj/;
237 | #print "page ref = $page_ref\n";
238 |
239 | # if the page exists and the /Content parameter is set
240 | if($pdfObjects->{$page_ref}->{"type"} eq "/Page" && exists($pdfObjects->{$page_ref}) && exists($pdfObjects->{$page_ref}->{"pagecontent"}) ){
241 |
242 | # Check if it's not an empty content
243 | #my $p_content = $pdfObjects{$page_ref}->{"pagecontent"};
244 |
245 |
246 | # If the Contents fiels is an array
247 | my @pcontents = $pdfObjects->{$page_ref}->{"pagecontent"} =~ /(\d+\s\d\sR)/sg;
248 |
249 | foreach (@pcontents){
250 |
251 | my $content_page_obj = $_;
252 | $content_page_obj =~ s/R/obj/;
253 |
254 | #print ":: page content = $content_page_obj :: \n";#" $pdfObjects{$contentp}->{content}\n";
255 |
256 | if(exists($pdfObjects->{$content_page_obj}) && exists($pdfObjects->{$content_page_obj}->{"stream"}) && length($pdfObjects->{$content_page_obj}->{"stream"}) > 0 ){
257 | $ret ++;
258 | print "Page $page_ref is not empty => OK\n"unless $DEBUG eq "no";
259 |
260 | }elsif(! exists($pdfObjects->{$content_page_obj})){
261 | print "Warning : Content Object ($content_page_obj) of page $page_ref doesn\'t exist\n" unless $DEBUG eq "yes";
262 |
263 | }elsif( exists($pdfObjects->{$content_page_obj}->{content}) ){
264 |
265 | # Trigger the case when the object represents an array of objects Ex: [422 0 R 423 0 R 424 0 R 425 0 R 426 0 R 427 0 R 428 0 R 429 0 R]
266 |
267 | my @content_page_array = $pdfObjects->{$content_page_obj}->{"content"} =~ /(\d+\s\d\sR)/sg;
268 |
269 | foreach(@content_page_array){
270 |
271 | my $content_page_obj_2 = $_;
272 | $content_page_obj_2 =~ s/R/obj/;
273 | #print " Found obj :: $content_page_obj_2\n";
274 |
275 | if(exists($pdfObjects->{$content_page_obj_2})){
276 |
277 | # TODEBUG print "";
278 | #my $test = $pdfObjects->{$content_page_obj_2};
279 |
280 | #print "DEBUG :: ".$test->{stream}."\n";
281 |
282 | if( exists($pdfObjects->{$content_page_obj_2}->{"stream"}) && length($pdfObjects->{$content_page_obj_2}->{"stream"}) > 0 ){
283 | $ret ++;
284 | print "Found content of the page $page_ref in obj $content_page_obj_2 => OK\n"unless $DEBUG eq "no";
285 | }else{
286 | print "Warning :: Page content Object ($content_page_obj_2) is empty \n" unless $DEBUG eq "yes";
287 | }
288 |
289 | }else{
290 | print "Warning :: Empty_Pages_Document_detection :: Page content Object ($content_page_obj_2) is not defined\n" unless $DEBUG eq "yes";
291 | }
292 | }
293 |
294 |
295 | }else{
296 | print "Warning :: Empty_Pages_Document_detection :: The Stream of the Content Object is empty\n" unless $DEBUG eq "yes";
297 |
298 | }
299 |
300 | }
301 |
302 |
303 | }elsif(! exists($pdfObjects->{$page_ref})){
304 | print "Warning :: Empty_Pages_Document_detection :: Page $page_ref does\'nt exist.\n" unless $DEBUG eq "o";
305 | }elsif( (! exists($pdfObjects->{$page_ref}->{"pagecontent"})) && $pdfObjects->{$page_ref}->{"type"} eq "/Page" ){
306 | print "Warning :: Empty_Pages_Document_detection :: Page $page_ref is empty\n" unless $DEBUG eq "o";
307 | }
308 |
309 |
310 | }
311 |
312 | }
313 |
314 | # TODO Verify that the number of treated pages is the number of pages in the document.
315 |
316 | }
317 |
318 | return $ret;
319 |
320 | }
321 |
322 |
323 |
324 |
325 |
326 |
327 | # This function check if the xref table is conform
328 | # TODO return 0 if failed and 1 if sucess and the error status
329 | sub Check_xref{
330 |
331 | my ($trailer, $fh, $pdfObjects_ref) = @_;
332 | my $xref_offset;
333 | my $len=4; # "xref" string length.
334 | my $res;
335 | my $ret = 0;
336 |
337 | my %pdfObjects = %{$pdfObjects_ref};
338 |
339 | # Get the startxref offset in the trailer
340 | if ($trailer =~ /startxref\s*(\d+)\s*%%EOF/){
341 | $xref_offset = $1;
342 | }else{
343 | #return (0,$BAD_XREF_OFFSET);
344 | return 0;
345 | }
346 | print "\nxref_offset = $xref_offset\n" unless $DEBUG eq "no";
347 |
348 |
349 | # Test XRef keyword
350 | seek ($fh, $xref_offset, 0); # Go to the xref offset
351 | read ($fh, $res, $len) or print "Check_xref :: read failed :: $!\n";
352 | print "res = $res\n" unless $DEBUG eq "no";
353 |
354 |
355 | if($res ne "xref"){ # Test for object stream reference
356 | $len = 10;
357 | seek ($fh, $xref_offset, 0); # Go to the xref offset
358 | read ($fh, $res, $len) or print "Check_xref :: read failed :: $!\n";
359 | print "res2 = $res\n" unless $DEBUG eq "no";
360 |
361 | if($res =~ /^(\d+\s\d\sobj)/){
362 | # TODO decode xref stream.
363 | #print "";
364 | # Check if the object is well a XRef type object
365 | my $obj_ref= $1;
366 |
367 | if(exists($pdfObjects{$obj_ref}) && $pdfObjects{$obj_ref}->{"type"} eq "/XRef"){
368 | return 1;
369 | }else{
370 | return 0;
371 | }
372 |
373 |
374 | }else{
375 | #print "BAD xref offset!!\n";
376 | #return $BAD_XREF_OFFSET;
377 | #return (0,$BAD_XREF_OFFSET);
378 | return 0;
379 | }
380 |
381 | }
382 |
383 | # Get xref entries
384 | my $xref_content=$res;
385 | #print "Offset position = ".tell($fh)."\n" unless $DEBUG eq "no";
386 | my $i=5;
387 | while(!( $xref_content =~ /trailer$/)){
388 |
389 | read ($fh, $xref_content, 1, $i) or print "Check_xref :: read failed :: $!\n";
390 | $i++;
391 | }
392 |
393 | print "$xref_content\n" unless $DEBUG eq "no";
394 |
395 | # nnnnnnnnnn ggggg n eol
396 | # nnnnnnnnnn is a 10-digit byte offset
397 | # ggggg is a 5-digit generation number
398 | # n is a literal keyword identifying this as an in-use entry
399 | # my @xref_entries = $xref_content =~ /(\d{10}\s\d{5}\s[f|n]\n)/;
400 | my $first_obj;
401 | my $number_of_entries;
402 | if($xref_content =~ /(\d{1,3})\s(\d{1,3})/g){
403 | $first_obj = $1;
404 | $number_of_entries=$2;
405 | print "$first_obj :: $number_of_entries\n\n" unless $DEBUG eq "no";
406 | }
407 | my @xref_entries = $xref_content =~ /(\d{10}\s\d{5}\s[f|n])/g;
408 |
409 | # @pdf_objects;
410 |
411 | # Check object's offets
412 | my $id=0;
413 | foreach(@xref_entries){
414 |
415 | if(/(\d{10})\s(\d{5})\s([f|n])/){
416 |
417 | #print "\n$1::$2::$3\n";
418 | my $off = $1;
419 | my $gen = $2;
420 | my $free = $3;
421 |
422 | my $digit = length($id);
423 |
424 | $len = 6+length($id); # len depends of the number of digit of ref id
425 |
426 | seek ($fh, $off, 0);
427 | read ($fh, $res, $len) or print "Check_xref :: read failed :: off=$off :: len=$len\n";
428 | chomp $res;
429 |
430 | #print "res = $res\n" if $id == 100;
431 |
432 | if($res =~/($id\s0\sobj)/ or $free ne "n"){
433 |
434 | my $obj_ref = $1;
435 |
436 | # save the object's offset
437 | if(exists($pdfObjects{$obj_ref}) ){
438 | print "object $obj_ref is at offset $off\n" unless $DEBUG eq "no";
439 | $pdfObjects{$obj_ref}->{"offset"} = $off ;
440 | }
441 |
442 |
443 | }else{
444 | print "WRONG Object offset :: $id $gen obj :: offset $off\n"unless $DEBUG eq "yes";
445 | #$ret = $BAD_OBJ_OFFSET;
446 | #return (0,$BAD_OBJ_OFFSET);
447 | return 0;
448 | }
449 | $id ++;
450 |
451 | }
452 | }
453 |
454 |
455 | return 1;
456 | }
457 |
458 |
459 |
460 |
461 |
462 | 1;
463 | __END__
464 |
--------------------------------------------------------------------------------
/tools/perl_poc/lib/utils/CleanRewriting.pm:
--------------------------------------------------------------------------------
1 | package CleanRewriting;
2 |
3 | use strict;
4 |
5 | use lib::utils::Filters;
6 | use File::Basename;
7 |
8 | #use bytes;
9 |
10 | my $DEBUG = "no";
11 |
12 |
13 | # This function remove the JavaScript content of an object (not an object stream)
14 | sub RemoveJSContentFromObj{
15 |
16 | my ($obj,$pdfObjects) = @_;
17 |
18 |
19 | print "The object is at offset $pdfObjects->{$obj}->{objStmOff} in object stream\n";
20 |
21 |
22 | my $len = length($pdfObjects->{$obj}->{js});
23 |
24 | print "js len = $len\n";
25 |
26 |
27 | my $comment = "";
28 | my $pat= $pdfObjects->{$obj}->{content};
29 | for(my $i =0; $i <$len-2 ; $i++){
30 | $comment.=" ";
31 | }
32 | $comment="(".$comment.")";
33 |
34 | # Get the offset of the js content
35 | my $off = index($pdfObjects->{$obj}->{content}, $pdfObjects->{$obj}->{js});
36 |
37 |
38 | print "verif1 :: $pdfObjects->{$obj}->{content}\n";
39 |
40 | # Replace js content by empty string
41 | substr($pdfObjects->{$obj}->{content},$off,$len,$comment);
42 |
43 | print "verif2 :: $pdfObjects->{$obj}->{content}\n";
44 |
45 | return 0;
46 | }
47 |
48 | sub RemoveJSContentFromXFA{
49 |
50 | my ($obj,$pdfObjects) = @_;
51 |
52 | #print " Content = $pdfObjects->{$obj}->{stream_d}\n\n" if $obj eq "26 0 obj";
53 |
54 | #
55 | #
56 | #my @js_content = $pdfObjects->{$obj}->{stream_d} =~ /(javascript)/gi ;
57 | #my @js_content = $pdfObjects->{$obj}->{stream_d} =~ /(