├── tests
    ├── travis
    │   ├── do_run.sh
    │   ├── basic.sh
    │   └── https.sh
    ├── test.sh
    ├── core_https.sh
    ├── internal.sh
    ├── internal_https.sh
    └── core.sh
├── .gitignore
├── cmake
    └── modules
    │   ├── FindApr.cmake
    │   └── FindApache.cmake
├── deps
    └── libinjection
    │   ├── libinjection_xss.h
    │   ├── libinjection_html5.h
    │   ├── libinjection.h
    │   ├── libinjection_sqli.h
    │   ├── libinjection_xss.c
    │   └── libinjection_html5.c
├── .travis.yml
├── JsonValidator.hpp
├── CMakeLists.txt
├── Util.h
├── mod_defender.hpp
├── RuntimeScanner.hpp
├── README.md
├── JsonValidator.cpp
├── RuleParser.h
├── Util.cpp
├── mod_defender_body.cpp
├── mod_defender.cpp
└── RuleParser.cpp


/tests/travis/do_run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export DEFENDER_HOME=`pwd`
4 | echo running tests/travis/$RUN, home: $DEFENDER_HOME
5 | bash $DEFENDER_HOME/tests/travis/$RUN


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.o
 2 | *.so
 3 | *.a
 4 | .idea/
 5 | conf/
 6 | cmake-build-debug/
 7 | lib/
 8 | build/
 9 | CMakeFiles/
10 | CMakeCache.txt
11 | cmake_install.cmake
12 | Makefile


--------------------------------------------------------------------------------
/cmake/modules/FindApr.cmake:
--------------------------------------------------------------------------------
 1 | find_path(APR_INC
 2 |         NAMES apr.h
 3 |         HINTS
 4 |         /usr/include/apr-1
 5 |         /usr/include/apr-1.0
 6 |         /usr/local/include/apr-1
 7 |         /usr/local/include/apr-1.0)
 8 | include(FindPackageHandleStandardArgs)
 9 | find_package_handle_standard_args(APR DEFAULT_MSG APR_INC)
10 | 


--------------------------------------------------------------------------------
/deps/libinjection/libinjection_xss.h:
--------------------------------------------------------------------------------
 1 | #ifndef LIBINJECTION_XSS
 2 | #define LIBINJECTION_XSS
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | /**
 9 |  * HEY THIS ISN'T DONE
10 |  */
11 | 
12 | /* pull in size_t */
13 | 
14 | #include <string.h>
15 | 
16 |   int libinjection_is_xss(const char* s, size_t len, int flags);
17 | 
18 | #ifdef __cplusplus
19 | }
20 | #endif
21 | #endif
22 | 


--------------------------------------------------------------------------------
/cmake/modules/FindApache.cmake:
--------------------------------------------------------------------------------
 1 | find_path(APACHE_INC
 2 |         NAMES httpd.h
 3 |         HINTS
 4 |         /usr/include/apache2
 5 |         /usr/include
 6 |         /usr/local/include/apache2
 7 |         /usr/local/include/apache22
 8 |         /usr/local/include/apache24
 9 |         /usr/home/vlt-sys/Engine/include)
10 | include(FindPackageHandleStandardArgs)
11 | find_package_handle_standard_args(APACHE DEFAULT_MSG APACHE_INC)
12 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: required
 2 | 
 3 | os: linux
 4 | dist: trusty
 5 | 
 6 | language: cpp
 7 | compiler: gcc
 8 | 
 9 | addons:
10 |   apt:
11 |     packages:
12 |       - apache2
13 |       - apache2-dev
14 |       - g++-6
15 |       - gcc-6
16 |     sources:
17 |       - ubuntu-toolchain-r-test
18 | 
19 | before_install:
20 | 
21 | install:
22 |   - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-6 90
23 |   - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-6 90
24 | 
25 | matrix:
26 |   allow_failures:
27 |   exclude:
28 |     - compiler: "gcc"
29 | 
30 |   include:
31 |     - os: linux
32 |       compiler: "gcc"
33 |       env: RUN="basic.sh"
34 | 
35 |     - os: linux
36 |       compiler: "gcc"
37 |       env: RUN="https.sh"
38 | 
39 | script:
40 |   - /bin/bash ./tests/travis/do_run.sh
41 | 
42 | after_script:
43 |   - sudo cat /var/log/apache2/error.log
44 | #  - sudo cat /var/log/apache2/defender_match.log
45 |   - sudo cat /var/log/apache2/defender_json_match.log
46 | 


--------------------------------------------------------------------------------
/tests/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ "$#" -ne 1 ]; then
 4 | 	echo "Usage: $0 <host>"
 5 | 	exit 0
 6 | fi
 7 | HOST=$1
 8 | curl_ret="-s -o /dev/null -w %{http_code}"
 9 | 
10 | PASS_MESSAGE="[ \033[0;32mPASS\033[0m ]"
11 | FAIL_MESSAGE="[ \033[0;31mFAIL\033[0m ]"
12 | 
13 | check_block() {
14 | 	if ([ $2 == 0 ] && ([ $1 == 200 ] || [ $1 == 404 ])) ||
15 | 		([ $2 == 1 ] && [ $1 == 403 ]) &&
16 | 		[ $1 -lt 500 ]
17 | 	then
18 | 		printf "$PASS_MESSAGE"
19 | 		return 1
20 | 	else
21 | 		printf "$FAIL_MESSAGE"
22 | 		return 0
23 | 	fi
24 | }
25 | 
26 | check_status_code() {
27 | 	if ([ $1 == $2 ]) then
28 | 		printf "$PASS_MESSAGE"
29 | 		return 1
30 | 	else
31 | 		printf "$FAIL_MESSAGE"
32 | 		return 0
33 | 	fi
34 | }
35 | 
36 | url_encode() {
37 | 	local string="$1"
38 | 	local strlen=${#string}
39 | 	local encoded=""
40 | 	local pos c o
41 | 
42 | 	for ((pos=0; pos<strlen; pos++)); do
43 | 		c=${string:$pos:1}
44 | 		case "$c" in
45 | 			[-_.~a-zA-Z0-9] )
46 | 				o="$c";;
47 | 			* )
48 | 				printf -v o '%%%02x' "'$c"
49 | 		esac
50 | 		encoded+="$o"
51 | 	done
52 | 	echo "$encoded"
53 | }


--------------------------------------------------------------------------------
/deps/libinjection/libinjection_html5.h:
--------------------------------------------------------------------------------
 1 | #ifndef LIBINJECTION_HTML5
 2 | #define LIBINJECTION_HTML5
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | /* pull in size_t */
 9 | 
10 | #include <stddef.h>
11 | 
12 | enum html5_type {
13 |     DATA_TEXT
14 |     , TAG_NAME_OPEN
15 |     , TAG_NAME_CLOSE
16 |     , TAG_NAME_SELFCLOSE
17 |     , TAG_DATA
18 |     , TAG_CLOSE
19 |     , ATTR_NAME
20 |     , ATTR_VALUE
21 |     , TAG_COMMENT
22 |     , DOCTYPE
23 | };
24 | 
25 | enum html5_flags {
26 |   DATA_STATE
27 |   , VALUE_NO_QUOTE
28 |   , VALUE_SINGLE_QUOTE
29 |   , VALUE_DOUBLE_QUOTE
30 |   , VALUE_BACK_QUOTE
31 | };
32 | 
33 | struct h5_state;
34 | typedef int (*ptr_html5_state)(struct h5_state*);
35 | 
36 | typedef struct h5_state {
37 |     const char* s;
38 |     size_t len;
39 |     size_t pos;
40 |     int is_close;
41 |     ptr_html5_state state;
42 |     const char* token_start;
43 |     size_t token_len;
44 |     enum html5_type token_type;
45 | } h5_state_t;
46 | 
47 | 
48 | void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags);
49 | int libinjection_h5_next(h5_state_t* hs);
50 | 
51 | #ifdef __cplusplus
52 | }
53 | #endif
54 | #endif
55 | 


--------------------------------------------------------------------------------
/JsonValidator.hpp:
--------------------------------------------------------------------------------
 1 | /*                       _        _       __                _
 2 |  *   _ __ ___   ___   __| |    __| | ___ / _| ___ _ __   __| | ___ _ __
 3 |  *  | '_ ` _ \ / _ \ / _` |   / _` |/ _ \ |_ / _ \ '_ \ / _` |/ _ \ '__|
 4 |  *  | | | | | | (_) | (_| |  | (_| |  __/  _|  __/ | | | (_| |  __/ |
 5 |  *  |_| |_| |_|\___/ \__,_|___\__,_|\___|_|  \___|_| |_|\__,_|\___|_|
 6 |  *                       |_____|
 7 |  *  Copyright (c) 2017 Annihil
 8 |  *  Released under the GPLv3
 9 |  */
10 | 
11 | #ifndef MOD_DEFENDER_JSONVALIDATOR_H
12 | #define MOD_DEFENDER_JSONVALIDATOR_H
13 | 
14 | #include "Util.h"
15 | 
16 | class RuntimeScanner;
17 | 
18 | /*
19 | ** To avoid getting DoS'ed, define max depth
20 | ** for JSON parser, as it is recursive
21 | */
22 | #define JSON_MAX_DEPTH 10
23 | 
24 | /*
25 | ** this structure is used only for json parsing.
26 | */
27 | typedef struct {
28 |     str_t json;
29 |     u_char *src;
30 |     unsigned long off = 0, len = 0;
31 |     u_char c;
32 |     int depth = 0;
33 |     str_t ckey;
34 | } json_t;
35 | 
36 | class JsonValidator {
37 |     friend class RuntimeScanner;
38 | private:
39 |     RuntimeScanner& scanner;
40 |     bool jsonObj(json_t &js);
41 |     bool jsonVal(json_t &js);
42 |     bool jsonArray(json_t &js);
43 |     bool jsonQuoted(json_t &js, str_t *ve);
44 |     bool jsonForward(json_t &js);
45 |     bool jsonSeek(json_t &js, unsigned char seek);
46 | public:
47 |     JsonValidator(RuntimeScanner& scanner) : scanner(scanner) {}
48 |     void jsonParse(u_char *src, unsigned long len);
49 | };
50 | 
51 | #endif //MOD_DEFENDER_JSONVALIDATOR_H
52 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.2)
 2 | project(mod_defender)
 3 | set(CMAKE_BUILD_TYPE Release)
 4 | set(CMAKE_SHARED_LIBRARY_PREFIX "")
 5 | 
 6 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/modules")
 7 | 
 8 | set(CMAKE_CXX_FLAGS "-W -Wall -Wextra")
 9 | 
10 | message("FLAGS = ${CMAKE_CXX_FLAGS}")
11 | 
12 | find_package(Apache)
13 | find_package(Apr)
14 | 
15 | include_directories(deps ${APACHE_INC} ${APR_INC})
16 | 
17 | set(CMAKE_CXX_STANDARD 11)
18 | 
19 | file(GLOB SOURCE_FILES *.cpp deps/libinjection/*.c)
20 | add_library(mod_defender SHARED ${SOURCE_FILES})
21 | 
22 | if (AUTO)
23 |     set(STOP_APACHE_CMD sudo systemctl stop apache2)
24 |     set(START_APACHE_CMD sudo systemctl start apache2)
25 |     set(AP_MODS_AV /etc/apache2/mods-available)
26 |     set(AP_MODS_DIR /usr/lib/apache2/modules)
27 |     
28 |     if (${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
29 |         set(STOP_APACHE_CMD service apache24 restart)
30 |         set(START_APACHE_CMD service apache24 restart)
31 |         set(AP_MODS_DIR /usr/local/libexec/apache24/)
32 | 
33 |         if (EXISTS "/usr/local/etc/rc.d/vulture")
34 |             set(STOP_APACHE_CMD "")
35 |             set(START_APACHE_CMD "")
36 |             set(AP_MODS_DIR /usr/home/vlt-sys/Engine/modules/)
37 |         endif ()
38 |     endif ()
39 | 
40 |     add_custom_command(
41 |             TARGET mod_defender
42 |             POST_BUILD
43 |             COMMAND ${STOP_APACHE_CMD}
44 |             COMMAND cp $<TARGET_FILE:mod_defender> ${AP_MODS_DIR}
45 |             COMMAND ${START_APACHE_CMD}
46 |             COMMENT "Copying module then restarting Apache")
47 | endif ()
48 | 


--------------------------------------------------------------------------------
/tests/travis/basic.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | sudo mkdir /etc/defender/
 6 | sudo wget -O /etc/defender/core.rules https://raw.githubusercontent.com/nbs-system/naxsi/master/naxsi_config/naxsi_core.rules
 7 | sudo sed -i "s/select|union|update|delete|insert|table|from|ascii|hex|unhex|drop/\\\b(select|union|update|delete|insert|table|from|ascii|hex|unhex|drop)\\\b/" /etc/defender/core.rules
 8 | 
 9 | printf \
10 |     "LoadModule defender_module /usr/lib/apache2/modules/mod_defender.so
11 |       <IfModule defender_module>
12 |       Include /etc/defender/core.rules
13 |       </IfModule>" | sudo tee /etc/apache2/mods-available/defender.load
14 | 
15 | sudo apachectl -v
16 | sudo apachectl -M
17 | sudo a2enmod defender
18 | sudo service apache2 stop
19 | 
20 | printf \
21 |     "<VirtualHost *:80>
22 |       LogLevel notice
23 |       ErrorLog \${APACHE_LOG_DIR}/error.log
24 |       AllowEncodedSlashes On
25 |       <Location />
26 |         <IfModule defender_module>
27 |         Defender On
28 |         MatchLog \${APACHE_LOG_DIR}/defender_match.log
29 |         JSONMatchLog \${APACHE_LOG_DIR}/defender_json_match.log
30 |         RequestBodyLimit 8388608
31 |         LearningMode Off
32 |         ExtensiveLog Off
33 |         LibinjectionSQL Off
34 |         LibinjectionXSS Off
35 |         CheckRule \"\$SQL >= 8\" BLOCK
36 |         CheckRule \"\$RFI >= 8\" BLOCK
37 |         CheckRule \"\$TRAVERSAL >= 4\" BLOCK
38 |         CheckRule \"\$EVADE >= 4\" BLOCK
39 |         CheckRule \"\$XSS >= 8\" BLOCK
40 |         CheckRule \"\$UPLOAD >= 8\" BLOCK
41 |         </IfModule>
42 |       </Location>
43 |     </VirtualHost>" | sudo tee /etc/apache2/sites-available/000-default.conf
44 | 
45 | cmake -H. -Bbuild
46 | cmake --build build
47 | sudo cp build/mod_defender.so /usr/lib/apache2/modules/
48 | sudo service apache2 start
49 | cd tests/
50 | bash core.sh localhost
51 | bash internal.sh localhost


--------------------------------------------------------------------------------
/deps/libinjection/libinjection.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2012-2016 Nick Galbreath
 3 |  * nickg@client9.com
 4 |  * BSD License -- see COPYING.txt for details
 5 |  *
 6 |  * https://libinjection.client9.com/
 7 |  *
 8 |  */
 9 | 
10 | #ifndef LIBINJECTION_H
11 | #define LIBINJECTION_H
12 | 
13 | #ifdef __cplusplus
14 | # define LIBINJECTION_BEGIN_DECLS    extern "C" {
15 | # define LIBINJECTION_END_DECLS      }
16 | #else
17 | # define LIBINJECTION_BEGIN_DECLS
18 | # define LIBINJECTION_END_DECLS
19 | #endif
20 | 
21 | LIBINJECTION_BEGIN_DECLS
22 | 
23 | /*
24 |  * Pull in size_t
25 |  */
26 | #include <string.h>
27 | 
28 | /*
29 |  * Version info.
30 |  *
31 |  * This is moved into a function to allow SWIG and other auto-generated
32 |  * binding to not be modified during minor release changes.  We change
33 |  * change the version number in the c source file, and not regenerated
34 |  * the binding
35 |  *
36 |  * See python's normalized version
37 |  * http://www.python.org/dev/peps/pep-0386/#normalizedversion
38 |  */
39 | const char* libinjection_version(void);
40 | 
41 | /**
42 |  * Simple API for SQLi detection - returns a SQLi fingerprint or NULL
43 |  * is benign input
44 |  *
45 |  * \param[in] s  input string, may contain nulls, does not need to be null-terminated
46 |  * \param[in] slen input string length
47 |  * \param[out] fingerprint buffer of 8+ characters.  c-string,
48 |  * \return 1 if SQLi, 0 if benign.  fingerprint will be set or set to empty string.
49 |  */
50 | int libinjection_sqli(const char* s, size_t slen, char fingerprint[]);
51 | 
52 | /** ALPHA version of xss detector.
53 |  *
54 |  * NOT DONE.
55 |  *
56 |  * \param[in] s  input string, may contain nulls, does not need to be null-terminated
57 |  * \param[in] slen input string length
58 |  * \return 1 if XSS found, 0 if benign
59 |  *
60 |  */
61 | int libinjection_xss(const char* s, size_t slen);
62 | 
63 | LIBINJECTION_END_DECLS
64 | 
65 | #endif /* LIBINJECTION_H */
66 | 


--------------------------------------------------------------------------------
/tests/core_https.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source ./test.sh
 4 | 
 5 | declare -a tests=(
 6 | 	# " -d a=blah" 0
 7 | 	)
 8 | 
 9 | # BODY BODY_NAME URL ARGS ARGS_NAME $HEADERS_VAR:Cookie
10 | declare -a core_rules_tests=(
11 | 	# SQL Injections IDs:1000-1099
12 | 	"blah"					0 0 0 0 0 0
13 | 	"select+from"			1 1 1 1 1 1
14 | 	"selected+fromage"		0 0 0 0 0 0
15 | 	"\""					1 1 1 1 1 1
16 | 	"0x0x0x0x"				1 1 1 1 1 1
17 | 	"/*"					1 1 1 1 1 1
18 | 	"*/"					1 1 1 1 1 1
19 | 	"|"						1 1 1 1 1 1
20 | 	"&&"					1 1 1 1 1 1
21 | 	"----"					1 1 1 1 1 1
22 | 	";"						1 1 1 1 1 0
23 | 	"===="					1 1 0 1 1 0
24 | 	"("						1 1 1 1 1 1
25 | 	")"						1 1 1 1 1 1
26 | 	"'"						1 1 1 1 1 1
27 | 	",,"					1 1 1 1 1 1
28 | 	"##"					1 1 1 1 1 1
29 | 	"@@@@"					1 1 1 1 1 1
30 | 
31 | 	# OBVIOUS RFI IDs:1100-1199
32 | 	"http://"				1 1 0 1 1 1
33 | 	"https://"				1 1 0 1 1 1
34 | 	"ftp://"				1 1 0 1 1 1
35 | 	"sftp://"				1 1 0 1 1 1
36 | 	"zlib://"				1 1 0 1 1 1
37 | 	"data://"				1 1 0 1 1 1
38 | 	"glob://"				1 1 0 1 1 1
39 | 	"phar://"				1 1 0 1 1 1
40 | 	"file://"				1 1 0 1 1 1
41 | 	"gopher://"				1 1 0 1 1 1
42 | 
43 | 	# Directory traversal IDs:1200-1299
44 | 	"...."					1 1 1 1 1 1
45 | 	"/etc/passwd"			1 1 1 1 1 1
46 | 	"c:\\"					1 1 1 1 1 1
47 | 	"cmd.exe"				1 1 1 1 1 1
48 | 	"\\"					1 1 1 1 1 1
49 | 
50 | 	# Cross Site Scripting IDs:1300-1399
51 | 	"<"						1 1 1 1 1 1
52 | 	">"						1 1 1 1 1 1
53 | 	"[["					1 1 1 1 1 1
54 | 	"]]"					1 1 1 1 1 1
55 | 	"~~"					1 1 1 1 1 1
56 | 	"\`"					1 1 1 1 1 1
57 | 	"%20"					1 1 1 1 1 1
58 | 
59 | 	# Evading tricks IDs: 1400-1500
60 | 	"&#"					1 1 1 1 1 1
61 | 	"%U"					1 1 1 1 1 1
62 | 	)
63 | for ((i=0; i<${#core_rules_tests[@]}; i+=7)); do
64 | 	pattern=${core_rules_tests[$i]}
65 | 	tests+=(" --data-urlencode x=$pattern" ${core_rules_tests[$i+1]})
66 | 	tests+=(" -d $(url_encode "$pattern")=x" ${core_rules_tests[$i+2]})
67 | 	tests+=($(url_encode "$pattern") ${core_rules_tests[$i+3]})
68 | 	tests+=("?x="$(url_encode "$pattern") ${core_rules_tests[$i+4]})
69 | 	tests+=("?$(url_encode "$pattern")=x" ${core_rules_tests[$i+5]})
70 | 	tests+=(" -b x=$pattern" ${core_rules_tests[$i+6]})
71 | done
72 | 
73 | tests_size=${#tests[@]}
74 | test_count=$((tests_size / 2))
75 | test_passed=0
76 | 
77 | for ((i=0; i<$tests_size; i+=2)); do
78 | 	req="curl --cacert $ca_path https://$HOST/${tests[$i]}"
79 | 	expected_action=${tests[$i+1]}
80 | 	status_code=`$req $curl_ret`
81 | 	test_msg=`check_block $status_code $expected_action`
82 | 	test_passed=$((test_passed + $?))
83 | 	printf "%-95s %s\n" "$req" "$status_code  $test_msg"
84 | done
85 | 
86 | echo $test_passed/$test_count "tests passed" \($(((test_passed * 100) / test_count))%\)
87 | exit $(($test_passed != $test_count))


--------------------------------------------------------------------------------
/tests/internal.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source ./test.sh
 4 | 
 5 | test_passed=0
 6 | test_count=0
 7 | 
 8 | status_code=$(printf %1000000s | tr " " "a" | curl $HOST --data-binary @- $curl_ret)
 9 | test_msg=`check_block $status_code 0`
10 | test_passed=$((test_passed + $?))
11 | test_count=$((test_count + 1))
12 | echo -e "sent 1MB                                      " "$req" "$status_code  $test_msg"
13 | 
14 | status_code=$(printf "%2000000s" | tr " " "a" | curl $HOST --data-binary @- --limit-rate 350k $curl_ret)
15 | test_msg=`check_block $status_code 0`
16 | test_passed=$((test_passed + $?))
17 | test_count=$((test_count + 1))
18 | echo -e "sent 2MB @ 350kb/s                            " "$req" "$status_code  $test_msg"
19 | 
20 | status_code=$(printf %1000s | tr " " "a" | curl $HOST --data-binary @- -H "Transfer-Encoding: chunked" $curl_ret)
21 | test_msg=`check_status_code $status_code 501`
22 | test_passed=$((test_passed + $?))
23 | test_count=$((test_count + 1))
24 | echo -e "sent 1kB with transfer-encoding: chunked      " "$req" "$status_code  $test_msg"
25 | 
26 | status_code=$(curl $HOST -X POST -H 'Content-Length:' $curl_ret)
27 | test_msg=`check_block $status_code 1`
28 | test_passed=$((test_passed + $?))
29 | test_count=$((test_count + 1))
30 | echo -e "sent POST request without content-length      " "$req" "$status_code  $test_msg"
31 | 
32 | # Not working on Travis
33 | # status_code=$(printf "%2000000s" | tr " " "a" | curl $HOST --data-binary @- --limit-rate 100k $curl_ret)
34 | # test_msg=`check_status_code $status_code 500`
35 | # test_passed=$((test_passed + $?))
36 | # test_count=$((test_count + 1))
37 | # echo -e "sent 2MB @ 100kb/s (timeout by mod_reqtimeout)" "$req" "$status_code  $test_msg"
38 | 
39 | status_code=$(printf %10000000s | tr " " "a" | curl $HOST --data-binary @- $curl_ret)
40 | test_msg=`check_block $status_code 1`
41 | test_passed=$((test_passed + $?))
42 | test_count=$((test_count + 1))
43 | echo -e "sent 10MB (too big)                           " "$req" "$status_code  $test_msg"
44 | 
45 | status_code=$(printf "x=%2000000s+select+from" | tr " " "a" | curl $HOST --data-binary @- $curl_ret)
46 | test_msg=`check_block $status_code 1`
47 | test_passed=$((test_passed + $?))
48 | test_count=$((test_count + 1))
49 | echo -e "x=<200*a>+select+from                         " "$req" "$status_code  $test_msg"
50 | 
51 | status_code=$(printf "%2000000s+select+from=x" | tr " " "a" | curl $HOST --data-binary @- $curl_ret)
52 | test_msg=`check_block $status_code 1`
53 | test_passed=$((test_passed + $?))
54 | test_count=$((test_count + 1))
55 | echo -e "<200*a>+select+from=x                         " "$req" "$status_code  $test_msg"
56 | 
57 | echo $test_passed/$test_count "tests passed" \($(((test_passed * 100) / test_count))%\)
58 | exit $(($test_passed != $test_count))


--------------------------------------------------------------------------------
/tests/internal_https.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source ./test.sh
 4 | 
 5 | test_passed=0
 6 | test_count=0
 7 | 
 8 | curl_options="--cacert $ca_path https://$HOST"
 9 | 
10 | status_code=$(printf %1000000s | tr " " "a" | curl $curl_options --data-binary @- $curl_ret)
11 | test_msg=`check_block $status_code 0`
12 | test_passed=$((test_passed + $?))
13 | test_count=$((test_count + 1))
14 | echo -e "sent 1MB                                      " "$req" "$status_code  $test_msg"
15 | 
16 | status_code=$(printf "%2000000s" | tr " " "a" | curl $curl_options --data-binary @- --limit-rate 350k $curl_ret)
17 | test_msg=`check_block $status_code 0`
18 | test_passed=$((test_passed + $?))
19 | test_count=$((test_count + 1))
20 | echo -e "sent 2MB @ 350kb/s                            " "$req" "$status_code  $test_msg"
21 | 
22 | status_code=$(printf %1000s | tr " " "a" | curl $curl_options --data-binary @- -H "Transfer-Encoding: chunked" $curl_ret)
23 | test_msg=`check_block $status_code 0`
24 | test_passed=$((test_passed + $?))
25 | test_count=$((test_count + 1))
26 | echo -e "sent 1kB with transfer-encoding: chunked      " "$req" "$status_code  $test_msg"
27 | 
28 | status_code=$(curl $curl_options -X POST -H 'Content-Length:' $curl_ret)
29 | test_msg=`check_block $status_code 1`
30 | test_passed=$((test_passed + $?))
31 | test_count=$((test_count + 1))
32 | echo -e "sent POST request without content-length      " "$req" "$status_code  $test_msg"
33 | 
34 | # Not working on Travis
35 | # status_code=$(printf "%2000000s" | tr " " "a" | curl $curl_options --data-binary @- --limit-rate 100k $curl_ret)
36 | # test_msg=`check_status_code $status_code 500`
37 | # test_passed=$((test_passed + $?))
38 | # test_count=$((test_count + 1))
39 | # echo -e "sent 2MB @ 100kb/s (timeout by mod_reqtimeout)" "$req" "$status_code  $test_msg"
40 | 
41 | status_code=$(printf %10000000s | tr " " "a" | curl $curl_options --data-binary @- $curl_ret)
42 | test_msg=`check_block $status_code 1`
43 | test_passed=$((test_passed + $?))
44 | test_count=$((test_count + 1))
45 | echo -e "sent 10MB (too big)                           " "$req" "$status_code  $test_msg"
46 | 
47 | status_code=$(printf "x=%2000000s+select+from" | tr " " "a" | curl $curl_options --data-binary @- $curl_ret)
48 | test_msg=`check_block $status_code 1`
49 | test_passed=$((test_passed + $?))
50 | test_count=$((test_count + 1))
51 | echo -e "x=<200*a>+select+from                         " "$req" "$status_code  $test_msg"
52 | 
53 | status_code=$(printf "%2000000s+select+from=x" | tr " " "a" | curl $curl_options --data-binary @- $curl_ret)
54 | test_msg=`check_block $status_code 1`
55 | test_passed=$((test_passed + $?))
56 | test_count=$((test_count + 1))
57 | echo -e "<200*a>+select+from=x                         " "$req" "$status_code  $test_msg"
58 | 
59 | echo $test_passed/$test_count "tests passed" \($(((test_passed * 100) / test_count))%\)
60 | exit $(($test_passed != $test_count))


--------------------------------------------------------------------------------
/tests/travis/https.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | sudo mkdir /etc/apache2/ssl
 6 | cd /etc/apache2/ssl
 7 | # Create the PKI used by Apache (https tests)
 8 | sudo openssl genrsa -out ca.key 4096
 9 | echo -e "FR\nNord\nLille\nVultureProject\nTravis tests\nAC_racine\nsupport@vultureproject.org\n\n" | sudo openssl req -sha256 -new -x509 -key ./ca.key -out ./ca.crt
10 | sudo openssl genrsa -out localhost.key 4096
11 | echo -e "FR\nNord\nLille\nVultureProject\nTravis tests\nlocalhost\nsupport@vultureproject.org\n\n" | sudo openssl req -sha256 -new -key ./localhost.key -out ./localhost.csr
12 | sudo openssl x509 -req -sha256 -days 1 -in ./localhost.csr -CA ./ca.crt -CAkey ./ca.key -CAcreateserial -out ./localhost.crt
13 | 
14 | 
15 | sudo mkdir /etc/defender/
16 | sudo wget -O /etc/defender/core.rules https://raw.githubusercontent.com/nbs-system/naxsi/master/naxsi_config/naxsi_core.rules
17 | sudo sed -i "s/select|union|update|delete|insert|table|from|ascii|hex|unhex|drop/\\\b(select|union|update|delete|insert|table|from|ascii|hex|unhex|drop)\\\b/" /etc/defender/core.rules
18 | 
19 | printf \
20 |     "LoadModule defender_module /usr/lib/apache2/modules/mod_defender.so
21 |       <IfModule defender_module>
22 |       Include /etc/defender/core.rules
23 |       </IfModule>" | sudo tee /etc/apache2/mods-available/defender.load
24 | 
25 | sudo apachectl -v
26 | sudo apachectl -M
27 | sudo a2enmod ssl
28 | sudo a2enmod defender
29 | sudo service apache2 stop
30 | 
31 | printf \
32 |     "<IfModule mod_ssl.c>
33 |     <VirtualHost localhost:443>
34 |         ServerName localhost
35 |         LogLevel notice
36 |         AllowEncodedSlashes On
37 |         ErrorLog \${APACHE_LOG_DIR}/error.log
38 |         SSLEngine on
39 |         SSLCertificateFile /etc/apache2/ssl/localhost.crt
40 |         SSLCertificateKeyFile /etc/apache2/ssl/localhost.key
41 |         SSLCACertificateFile /etc/apache2/ssl/ca.crt
42 |         <Location />
43 |             <IfModule defender_module>
44 |                 Defender On
45 |                 MatchLog \${APACHE_LOG_DIR}/defender_match.log
46 |                 JSONMatchLog \${APACHE_LOG_DIR}/defender_json_match.log
47 |                 RequestBodyLimit 8388608
48 |                 LearningMode Off
49 |                 ExtensiveLog Off
50 |                 LibinjectionSQL Off
51 |                 LibinjectionXSS Off
52 |                 CheckRule \"\$SQL >= 8\" BLOCK
53 |                 CheckRule \"\$RFI >= 8\" BLOCK
54 |                 CheckRule \"\$TRAVERSAL >= 4\" BLOCK
55 |                 CheckRule \"\$EVADE >= 4\" BLOCK
56 |                 CheckRule \"\$XSS >= 8\" BLOCK
57 |                 CheckRule \"\$UPLOAD >= 8\" BLOCK
58 |             </IfModule>
59 |         </Location>
60 |     </VirtualHost>
61 | </IfModule>" | sudo tee /etc/apache2/sites-available/ssl-default.conf
62 | 
63 | sudo a2ensite ssl-default
64 | cd $DEFENDER_HOME
65 | cmake -H. -Bbuild
66 | cmake --build build
67 | sudo cp build/mod_defender.so /usr/lib/apache2/modules/
68 | sudo service apache2 start
69 | cd tests/
70 | bash core_https.sh localhost
71 | bash internal_https.sh localhost


--------------------------------------------------------------------------------
/tests/core.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | source ./test.sh
  4 | 
  5 | declare -a tests=(
  6 |     # " -d a=blah" 0
  7 |     )
  8 | 
  9 | # BODY BODY_NAME URL ARGS ARGS_NAME $HEADERS_VAR:Cookie
 10 | declare -a core_rules_tests=(
 11 |     # SQL Injections IDs:1000-1099
 12 |     "blah"                  0 0 0 0 0 0  0 0 0 0 0
 13 |     "select+from"           1 1 1 1 1 1  1 1 1 1 1
 14 |     "selected+fromage"      0 0 0 0 0 0  0 0 0 0 0
 15 |     "\\\""                  1 1 1 1 1 1  1 1 1 1 1
 16 |     "0x0x0x0x"              1 1 1 1 1 1  1 1 1 1 1
 17 |     "/*"                    1 1 1 1 1 1  1 1 1 1 1
 18 |     "*/"                    1 1 1 1 1 1  1 1 1 1 1
 19 |     "|"                     1 1 1 1 1 1  1 1 1 1 1
 20 |     "&&"                    1 1 1 1 1 1  0 0 1 0 0
 21 |     "----"                  1 1 1 1 1 1  1 1 1 1 1
 22 |     ";"                     1 1 1 1 1 0  1 1 1 1 1
 23 |     "===="                  1 1 0 1 1 0  1 1 0 1 1
 24 |     "("                     1 1 1 1 1 1  1 1 1 1 1
 25 |     ")"                     1 1 1 1 1 1  1 1 1 1 1
 26 |     "'"                     1 1 1 1 1 1  1 1 1 1 1
 27 |     ",,"                    1 1 1 1 1 1  1 1 1 1 1
 28 |     "##"                    1 1 1 1 1 1  1 1 0 0 0
 29 |     "@@@@"                  1 1 1 1 1 1  1 1 1 1 1
 30 | 
 31 |     # OBVIOUS RFI IDs:1100-1199
 32 |     "http://"               1 1 0 1 1 1  1 1 0 1 1
 33 |     "https://"              1 1 0 1 1 1  1 1 0 1 1
 34 |     "ftp://"                1 1 0 1 1 1  1 1 0 1 1
 35 |     "sftp://"               1 1 0 1 1 1  1 1 0 1 1
 36 |     "zlib://"               1 1 0 1 1 1  1 1 0 1 1
 37 |     "data://"               1 1 0 1 1 1  1 1 0 1 1
 38 |     "glob://"               1 1 0 1 1 1  1 1 0 1 1
 39 |     "phar://"               1 1 0 1 1 1  1 1 0 1 1
 40 |     "file://"               1 1 0 1 1 1  1 1 0 1 1
 41 |     "gopher://"             1 1 0 1 1 1  1 1 0 1 1
 42 | 
 43 |     # Directory traversal IDs:1200-1299
 44 |     "...."                  1 1 1 1 1 1  1 1 1 1 1
 45 |     "/etc/passwd"           1 1 1 1 1 1  1 1 1 1 1
 46 |     "c:\\\\"                1 1 1 1 1 1  1 1 1 1 1
 47 |     "cmd.exe"               1 1 1 1 1 1  1 1 1 1 1
 48 |     "\\\\"                  1 1 1 1 1 1  1 1 1 1 1
 49 | 
 50 |     # Cross Site Scripting IDs:1300-1399
 51 |     "<"                     1 1 1 1 1 1  1 1 1 1 1
 52 |     ">"                     1 1 1 1 1 1  1 1 1 1 1
 53 |     "[["                    1 1 1 1 1 1  1 1 1 1 1
 54 |     "]]"                    1 1 1 1 1 1  1 1 1 1 1
 55 |     "~~"                    1 1 1 1 1 1  1 1 1 1 1
 56 |     "\\\`"                  1 1 1 1 1 1  1 1 1 1 1
 57 |     "%20"                   1 1 1 1 1 1  0 0 0 0 0
 58 |     "%00<script>alert('abcd');</script>"                   1 1 1 1 1 1  1 1 0 1 1
 59 | 
 60 |     # Evading tricks IDs: 1400-1500
 61 |     "&#"                    1 1 1 1 1 1  0 0 0 0 0
 62 |     "%U"                    1 1 1 1 1 1  1 1 400 1 1
 63 |     )
 64 | 
 65 | 
 66 | test_count=0
 67 | test_passed=0
 68 | 
 69 | check_url() {
 70 |     # URL = $1
 71 |     # OPTIONS = $2
 72 |     # Expected action = $3
 73 |     req="curl \"$HOST/$1\" $2"
 74 |     expected_action="$3"
 75 |     status_code=$(echo "$req $curl_ret" | bash)
 76 |     # If expected code is not 0 or 1 -> it is an http code
 77 |     if ([ $expected_action -ne 0 ] && [ $expected_action -ne 1 ])
 78 |     then
 79 |         test_msg=$(check_status_code $status_code $expected_action)
 80 |     else
 81 |         test_msg=$(check_block $status_code $expected_action)
 82 |     fi
 83 |     test_passed=$((test_passed + $?))
 84 |     test_count=$((test_count + 1))
 85 |     printf "%-60s %s\n" "$req" "$status_code  $test_msg"
 86 | }
 87 | 
 88 | for ((i=0; i<${#core_rules_tests[@]}; i+=12)); do
 89 |     pattern=${core_rules_tests[$i]}
 90 |     # URL encoded
 91 |     check_url "" " --data-urlencode \"x=$pattern\"" ${core_rules_tests[$i+1]}
 92 |     no_escaped="$(echo "$pattern" | sed 's/\\\(.\{1\}\)/\1/g')"
 93 |     check_url "" " --data-raw \"$(url_encode "$no_escaped")=x\"" ${core_rules_tests[$i+2]}
 94 |     check_url "$(url_encode "$no_escaped")" "" ${core_rules_tests[$i+3]}
 95 |     check_url "?x=$(url_encode "$no_escaped")" "" ${core_rules_tests[$i+4]}
 96 |     check_url "?$(url_encode "$no_escaped")=x" "" ${core_rules_tests[$i+5]}
 97 |     check_url "" " -b \"x=$pattern\"" ${core_rules_tests[$i+6]}
 98 |     # Do NOT URL encode
 99 |     check_url "" " --data-raw \"x=$pattern\"" ${core_rules_tests[$i+7]}
100 |     check_url "" " --data-raw \"$pattern=x\"" ${core_rules_tests[$i+8]}
101 |     check_url "$pattern" " -g " ${core_rules_tests[$i+9]}
102 |     check_url "?x=$pattern" " -g " ${core_rules_tests[$i+10]}
103 |     check_url "?$pattern=x" " -g " ${core_rules_tests[$i+11]}
104 | done
105 | 
106 | # Print results
107 | echo $test_passed/$test_count "tests passed" \($(((test_passed * 100) / test_count))%\)
108 | exit $(($test_passed != $test_count))
109 | 
110 | 


--------------------------------------------------------------------------------
/Util.h:
--------------------------------------------------------------------------------
  1 | /*                       _        _       __                _
  2 |  *   _ __ ___   ___   __| |    __| | ___ / _| ___ _ __   __| | ___ _ __
  3 |  *  | '_ ` _ \ / _ \ / _` |   / _` |/ _ \ |_ / _ \ '_ \ / _` |/ _ \ '__|
  4 |  *  | | | | | | (_) | (_| |  | (_| |  __/  _|  __/ | | | (_| |  __/ |
  5 |  *  |_| |_| |_|\___/ \__,_|___\__,_|\___|_|  \___|_| |_|\__,_|\___|_|
  6 |  *                       |_____|
  7 |  *  Copyright (c) 2017 Annihil
  8 |  *  Released under the GPLv3
  9 |  */
 10 | 
 11 | #ifndef MOD_DEFENDER_UTIL_H
 12 | #define MOD_DEFENDER_UTIL_H
 13 | 
 14 | #define UNESCAPE_URI       1
 15 | #define UNESCAPE_REDIRECT  2
 16 | 
 17 | #include <vector>
 18 | #include <algorithm>
 19 | #include <functional>
 20 | #include <cctype>
 21 | #include <locale>
 22 | #include <iostream>
 23 | #include <sstream>
 24 | #include <iomanip>
 25 | #include <sys/types.h>
 26 | #include <unistd.h>
 27 | #include <string.h>
 28 | #include <sys/time.h>
 29 | 
 30 | 
 31 | using std::vector;
 32 | using std::string;
 33 | using std::stringstream;
 34 | using std::ostringstream;
 35 | using std::endl;
 36 | using std::istringstream;
 37 | using std::pair;
 38 | 
 39 | 
 40 | // Shell colors
 41 | #define KNRM  "\x1B[0m"
 42 | #define KRED  "\x1B[31m"
 43 | #define KGRN  "\x1B[32m"
 44 | #define KYEL  "\x1B[33m"
 45 | #define KBLU  "\x1B[34m"
 46 | #define KMAG  "\x1B[35m"
 47 | #define KCYN  "\x1B[36m"
 48 | #define KWHT  "\x1B[37m"
 49 | 
 50 | typedef struct {
 51 |     size_t len = 0;
 52 |     u_char *data;
 53 | } str_t;
 54 | 
 55 | namespace Util {
 56 |     inline string &ltrim(string &s) { // trim from start
 57 |         s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
 58 |         return s;
 59 |     }
 60 | 
 61 |     inline string &rtrim(string &s) { // trim from end
 62 |         s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
 63 |         return s;
 64 |     }
 65 | 
 66 |     inline string &trim(string &s) { // trim from both ends
 67 |         return ltrim(rtrim(s));
 68 |     }
 69 | 
 70 |     inline unsigned long countSubstring(const string &str, const string &sub) {
 71 |         if (sub.length() == 0) return 0;
 72 |         unsigned long count = 0;
 73 |         for (size_t offset = str.find(sub); offset != std::string::npos;
 74 |              offset = str.find(sub, offset + sub.length())) {
 75 |             ++count;
 76 |         }
 77 |         return count;
 78 |     }
 79 | 
 80 |     inline unsigned long countSubstring(const char *str, size_t len, const char *pattern, size_t patternLen) {
 81 |         char *p;
 82 |         unsigned long count = 0;
 83 |         unsigned long idx = 0;
 84 |         while ((p = (char *) memmem(str + idx, len - idx, pattern, patternLen)) != NULL) {
 85 |             count++;
 86 |             idx = (p - str) + patternLen;
 87 |         }
 88 |         return count;
 89 |     }
 90 | 
 91 |     inline unsigned long countSubstring(const char *str, const char *pattern, size_t patternLen) {
 92 |         unsigned long count = 0;
 93 |         char *p = (char *) str;
 94 |         while ((p = strstr(p, pattern)) != NULL) {
 95 |             count++;
 96 |             p += patternLen;
 97 |         }
 98 |         return count;
 99 |     }
100 | 
101 |     inline bool caseEqual(const string &str1, const string &str2) {
102 |         if (str1.size() != str2.size()) {
103 |             return false;
104 |         }
105 |         for (string::const_iterator c1 = str1.begin(), c2 = str2.begin(); c1 != str1.end(); ++c1, ++c2) {
106 |             if (tolower(*c1) != tolower(*c2)) {
107 |                 return false;
108 |             }
109 |         }
110 |         return true;
111 |     }
112 | 
113 |     int naxsi_unescape_uri(u_char **dst, u_char **src, size_t size, unsigned int type);
114 | 
115 |     /* unescape routine, returns number of nullbytes present */
116 |     inline int naxsi_unescape(str_t *str) {
117 |         u_char *dst, *src;
118 |         u_int nullbytes = 0, bad = 0, i;
119 | 
120 |         dst = str->data;
121 |         src = str->data;
122 | 
123 |         bad = (u_int) naxsi_unescape_uri(&src, &dst, str->len, 0);
124 |         str->len = src - str->data;
125 |         //tmp hack fix, avoid %00 & co (null byte) encoding :p
126 |         for (i = 0; i < str->len; i++)
127 |             if (str->data[i] == 0x0) {
128 |                 nullbytes++;
129 |                 str->data[i] = '0';
130 |             }
131 |         return (nullbytes + bad);
132 |     }
133 | 
134 |     inline char *strnchr(const char *s, int c, unsigned long len) {
135 |         unsigned long cpt = 0;
136 |         for (cpt = 0; cpt < len && s[cpt]; cpt++)
137 |             if (s[cpt] == c)
138 |                 return ((char *) s + cpt);
139 |         return (NULL);
140 |     }
141 | 
142 |     vector<string> split(const string &s, char delim);
143 |     pair<string, string> splitAtFirst(const string &s, string delim);
144 |     std::vector<string>
145 |     parseRawDirective(std::string raw_directive);
146 |     vector<int> splitToInt(string &s, char delimiter);
147 |     string apacheTimeFmt();
148 |     string naxsiTimeFmt();
149 |     string formatLog(int loglevel, const string &clientIp);
150 |     string escapeQuotes(const string &before);
151 |     string unescape(const string &s);
152 | }
153 | 
154 | #endif //MOD_DEFENDER_UTIL_H
155 | 


--------------------------------------------------------------------------------
/mod_defender.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * \file     mod_defender.hpp
  3 |  * \author   Kevin Guillemot
  4 |  * \version  1.0
  5 |  * \date     30/03/2018
  6 |  * \license  GPLv3
  7 |  * \brief    Header file of the mod_defender module
  8 |  */
  9 | 
 10 | #ifndef MOD_DEFENDER_HPP
 11 | #define MOD_DEFENDER_HPP
 12 | 
 13 | 
 14 | /*************************/
 15 | /* Inclusion of .H files */
 16 | /*************************/
 17 | 
 18 | #include <http_request.h>
 19 | #include <http_protocol.h>
 20 | #include <http_config.h>
 21 | #include <http_log.h>
 22 | #include <apr_strings.h>
 23 | #include <apr_lib.h>
 24 | #include <util_script.h>
 25 | #include "RuleParser.h"
 26 | #include "RuntimeScanner.hpp"
 27 | 
 28 | 
 29 | /*************/
 30 | /* Constants */
 31 | /*************/
 32 | 
 33 |     /*---------------------------*/
 34 |     /* MODULE-part needed macros */
 35 |     /*---------------------------*/
 36 | 
 37 | /**
 38 |  *  Extra Apache 2.4+ C++ module declaration.
 39 |  *  Needed cause of C++ use.
 40 |  */
 41 | #ifdef APLOG_USE_MODULE
 42 | APLOG_USE_MODULE(defender);
 43 | #endif
 44 | 
 45 | extern module AP_MODULE_DECLARE_DATA defender_module;
 46 | 
 47 | /**
 48 |  * \def MAX_BB_SIZE
 49 |  *      The maximum length of post body processed
 50 |  */
 51 | #define MAX_BB_SIZE 0x7FFFFFFF
 52 | 
 53 | /**
 54 |  * \def CHUNK_CAPACITY
 55 |  *      The maximum length of a chunk
 56 |  */
 57 | #define CHUNK_CAPACITY 8192
 58 | 
 59 | /**
 60 |  * \def IF_STATUS_NONE
 61 |  *      The status of the body to be processed
 62 |  */
 63 | #define IF_STATUS_NONE 0
 64 | 
 65 | /**
 66 |  * \def IF_STATUS_WANTS_TO_RUN
 67 |  *      The status of the body to be processed
 68 |  */
 69 | #define IF_STATUS_WANTS_TO_RUN 1
 70 | 
 71 | /**
 72 |  * \def IF_STATUS_COMPLETE
 73 |  *      The status of the body to be processed
 74 |  */
 75 | #define IF_STATUS_COMPLETE 2
 76 | 
 77 | /**
 78 |  * \def SLASHES
 79 |  *      The slash as string, used to urlencode/decode
 80 |  */
 81 | #define SLASHES "/"
 82 | 
 83 | 
 84 | /**************/
 85 | /* Structures */
 86 | /**************/
 87 | 
 88 | /**
 89 |  * \struct  dir_config_t mod_defender.h
 90 |  *          Regroup all server directives in a structure
 91 |  */
 92 | typedef struct {
 93 |     RuleParser *parser;
 94 |     vector<pair<string, string>> tmpCheckRules;
 95 |     vector<string> tmpBasicRules;
 96 |     char *loc_path;
 97 |     apr_file_t *matchlog_file;
 98 |     apr_file_t *jsonmatchlog_file;
 99 |     unsigned long requestBodyLimit;
100 |     bool libinjection_sql;
101 |     bool libinjection_xss;
102 |     bool defender;
103 |     bool learning;
104 |     bool extensive;
105 |     bool useenv;
106 | } dir_config_t;
107 | 
108 | /**
109 |  * \struct  chunk_t mod_defender.h
110 |  *          Chunk structure used to save/restore brigades
111 |  */
112 | typedef struct {
113 |     char                    *data;
114 |     apr_size_t               length;
115 |     unsigned int             is_permanent;
116 | } chunk_t;
117 | 
118 | /**
119 |  * \struct  defender_t mod_defender.h
120 |  *          Defender structure used to save/restore brigades
121 |  */
122 | typedef struct {
123 |     int fixups_done;
124 |     int body_error;
125 |     const char *body_error_msg;
126 |     unsigned int status;
127 |     unsigned int started_forwarding;
128 |     unsigned int stream_changed;
129 |     apr_size_t           stream_input_length;
130 |     char                *stream_input_data;
131 |     unsigned int         if_seen_eos;
132 |     int                  body_chunk_position;
133 |     unsigned int         body_chunk_offset;
134 |     apr_pool_t *body_pool;
135 |     apr_array_header_t *body_chunks;
136 |     chunk_t *body_chunk;
137 |     apr_size_t         body_length;
138 |     chunk_t *body_chunk_current;
139 |     char *body_buffer;
140 |     unsigned int         body_should_exist;
141 |     unsigned int         body_read;
142 | } defender_t;
143 | 
144 | /**
145 |  * \struct  defender_config_t mod_defender.h
146 |  *          Custom definition to hold any configuration data we may need.
147 |  */
148 | typedef struct {
149 |     RuntimeScanner *vpRuntimeScanner;
150 |     defender_t *def;
151 | } defender_config_t;
152 | 
153 | 
154 | /************************/
155 | /* Functions signatures */
156 | /************************/
157 | 
158 | /**
159 |  * \brief   Initialize all variables used to forward request body.
160 |  * \param   def             Defender structure.
161 |  * \param   char**          Error message pointer.
162 |  * \param   r               Apache request structure to work on.
163 |  * \return  apr_status_t    Return status code of function.
164 |  */
165 | apr_status_t body_retrieve_start(defender_t *def, char **error_msg, request_rec *r);
166 | 
167 | /**
168 |  * \brief   Retrieve stocked chunk of request body and return it.
169 |  * \param   def             Defender structure.
170 |  * \param   chunk_t**       List of chunks to add the chunk onto.
171 |  * \param   nbytes          Chunk max bytes length.
172 |  * \param   char**          Error message pointer.
173 |  * \param   r               Apache request structure to work on.
174 |  * \return  apr_status_t    Return status code of function.
175 |  */
176 | apr_status_t body_retrieve(defender_t *def, chunk_t **chunk, long int nbytes, char **error_msg, request_rec *r);
177 | 
178 | /**
179 |  * \brief   Initialize all variables used to forward request body.
180 |  * \param   def             Defender structure.
181 |  * \param   char**          Error message pointer.
182 |  * \param   r               Apache request structure to work on.
183 |  * \param   body_limit      Value of requestBodyLimit directive, to not exceed.
184 |  * \return  apr_status_t    Return status code of function.
185 |  */
186 | apr_status_t read_request_body(defender_t *def, char **error_msg, request_rec *r, unsigned long body_limit);
187 | 
188 | /**
189 |  * \brief   Initialize all variables used to forward request body.
190 |  * \param   data            Defender structure, as void*, called by apache hook.
191 |  * \return  apr_status_t    Return status code of function.
192 |  */
193 | apr_status_t body_clear(void *data);
194 | 
195 | 
196 | #endif //MOD_DEFENDER_HPP
197 | 


--------------------------------------------------------------------------------
/RuntimeScanner.hpp:
--------------------------------------------------------------------------------
  1 | /*                       _        _       __                _
  2 |  *   _ __ ___   ___   __| |    __| | ___ / _| ___ _ __   __| | ___ _ __
  3 |  *  | '_ ` _ \ / _ \ / _` |   / _` |/ _ \ |_ / _ \ '_ \ / _` |/ _ \ '__|
  4 |  *  | | | | | | (_) | (_| |  | (_| |  __/  _|  __/ | | | (_| |  __/ |
  5 |  *  |_| |_| |_|\___/ \__,_|___\__,_|\___|_|  \___|_| |_|\__,_|\___|_|
  6 |  *                       |_____|
  7 |  *  Copyright (c) 2017 Annihil
  8 |  *  Released under the GPLv3
  9 |  */
 10 | 
 11 | #ifndef RUNTIMESCANNER_HPP
 12 | #define RUNTIMESCANNER_HPP
 13 | 
 14 | #include <map>
 15 | #include <vector>
 16 | #include <set>
 17 | #include <algorithm>
 18 | #include <iostream>
 19 | #include <sstream>
 20 | #include <unordered_map>
 21 | #include <fstream>
 22 | #include <functional>
 23 | #include <cstdarg>
 24 | #include "RuleParser.h"
 25 | #include "JsonValidator.hpp"
 26 | 
 27 | //#define DEBUG_RUNTIME_PROCESSRULE
 28 | #ifdef DEBUG_RUNTIME_PROCESSRULE
 29 | #define DEBUG_RUNTIME_PR(x) do { std::cerr << x; } while (0)
 30 | #else
 31 | #define DEBUG_RUNTIME_PR(x)
 32 | #endif
 33 | 
 34 | //#define DEBUG_RUNTIME_BASESTR_RULE_SET
 35 | #ifdef DEBUG_RUNTIME_BASESTR_RULE_SET
 36 | #define DEBUG_RUNTIME_BRS(x) do { std::cerr << x; } while (0)
 37 | #else
 38 | #define DEBUG_RUNTIME_BRS(x)
 39 | #endif
 40 | 
 41 | #define PASS -1
 42 | #define STOP 403
 43 | /* used for reading input blocks */
 44 | #define READ_BLOCKSIZE 2048
 45 | 
 46 | using namespace Util;
 47 | using std::pair;
 48 | using std::make_pair;
 49 | using std::vector;
 50 | using std::set;
 51 | using std::string;
 52 | using std::cerr;
 53 | using std::stringstream;
 54 | using std::endl;
 55 | using std::regex;
 56 | using std::sregex_iterator;
 57 | using std::regex_match;
 58 | using std::distance;
 59 | using std::unordered_map;
 60 | using std::transform;
 61 | using std::function;
 62 | 
 63 | const std::string empty = string();
 64 | 
 65 | enum METHOD {
 66 |     METHOD_GET = 0,
 67 |     METHOD_POST,
 68 |     METHOD_PUT,
 69 |     UNSUPPORTED_METHOD,
 70 | };
 71 | 
 72 | enum CONTENT_TYPE {
 73 |     CONTENT_TYPE_UNSUPPORTED = 0,
 74 |     CONTENT_TYPE_URL_ENC, // application/x-www-form-urlencoded
 75 |     CONTENT_TYPE_MULTIPART, // multipart/form-data
 76 |     CONTENT_TYPE_APP_JSON, // application/json
 77 | };
 78 | 
 79 | enum TRANSFER_ENCODING {
 80 |     TRANSFER_ENCODING_UNSUPPORTED = 0,
 81 |     TRANSFER_ENCODING_CHUNKED
 82 | };
 83 | 
 84 | enum LOG_LVL {
 85 |     LOG_LVL_EMERG = 0,
 86 |     LOG_LVL_ALERT,
 87 |     LOG_LVL_CRIT,
 88 |     LOG_LVL_ERR,
 89 |     LOG_LVL_WARNING,
 90 |     LOG_LVL_NOTICE,
 91 |     LOG_LVL_INFO,
 92 |     LOG_LVL_DEBUG
 93 | };
 94 | 
 95 | typedef struct {
 96 |     string zone;
 97 |     set<unsigned long> ruleId;
 98 |     string varname;
 99 |     string content;
100 | } match_info_t; 
101 | 
102 | class RuntimeScanner {
103 |     friend class JsonValidator;
104 | private:
105 |     RuleParser& parser;
106 |     stringstream matchVars;
107 |     unsigned int rulesMatchedCount = 0;
108 |     string uri;
109 |     vector<pair<string, string>> headers;
110 |     vector<pair<string, string>> get;
111 |     string rawContentType;
112 | 
113 | public:
114 |     METHOD method = UNSUPPORTED_METHOD;
115 |     CONTENT_TYPE contentType = CONTENT_TYPE_UNSUPPORTED;
116 |     TRANSFER_ENCODING transferEncoding = TRANSFER_ENCODING_UNSUPPORTED;
117 |     bool transferEncodingProvided = false;
118 |     unsigned long contentLength = 0;
119 |     bool contentLengthProvided = false;
120 |     string body;
121 |     unsigned long bodyLimit = 0;
122 |     bool bodyLimitExceeded = false;
123 |     
124 |     int pid = 0;
125 |     long connectionId = 0;
126 |     string threadId;
127 |     string clientIp;
128 |     string requestedHost;
129 |     string serverHostname;
130 |     string fullUri;
131 |     string protocol;
132 |     string softwareVersion;
133 | 
134 |     LOG_LVL logLevel = LOG_LVL_EMERG;
135 |     void *errorLogFile;
136 |     void *learningLogFile;
137 |     void *learningJSONLogFile;
138 |     
139 |     bool learning;
140 |     bool extensiveLearning;
141 |     bool libinjSQL;
142 |     bool libinjXSS;
143 | 
144 |     unordered_map<string, unsigned long> matchScores;
145 |     unordered_map<string, match_info_t> matchInfos;
146 | 
147 |     bool block = false;
148 |     bool drop = false;
149 |     bool allow = false;
150 |     bool log = false;
151 | 
152 |     function<int(void *file, const void *buf, size_t *len)> writeLogFn;
153 | 
154 |     RuntimeScanner(RuleParser &parser) : parser(parser) {}
155 |     void setUri(char *uri);
156 |     void addHeader(char* key, char* val);
157 |     void addGETParameter(char* key, char* val);
158 |     void streamToFile(const stringstream &ss, void *file);
159 |     int processHeaders();
160 |     int processBody();
161 |     void logg(int priority, void *file, const char *fmt, ...);
162 |     void applyRuleAction(const rule_action_t &rule_action);
163 |     void checkLibInjection(MATCH_ZONE zone, const string &name, const string &value);
164 |     void basestrRuleset(MATCH_ZONE zone, const string &name, const string &value,
165 |                         const vector<http_rule_t> &rules);
166 |     bool processRuleBuffer(const string &str, const http_rule_t &rl, unsigned long &nbMatch);
167 |     void applyCheckRule(const http_rule_t &rule, unsigned long nbMatch, const string &name, const string &value,
168 |                         MATCH_ZONE zone, bool targetName);
169 |     void applyRuleMatch(const http_rule_t &rule, unsigned long nbMatch, MATCH_ZONE zone, const string &name,
170 |                         const string &value, bool targetName);
171 |     void writeLearningLog();
172 |     void writeExtensiveLog(const http_rule_t &rule, MATCH_ZONE zone, const string &name,
173 |                            const string &value, bool targetName);
174 |     void writeJSONLearningLog();
175 |     bool parseFormDataBoundary(unsigned char **boundary, unsigned long *boundary_len);
176 |     void multipartParse(u_char *src, unsigned long len);
177 |     bool contentDispositionParser(unsigned char *str, unsigned char *line_end,
178 |                                   unsigned char **fvarn_start, unsigned char **fvarn_end,
179 |                                   unsigned char **ffilen_start, unsigned char **ffilen_end);
180 |     int processAction();
181 |     bool splitUrlEncodedRuleset(char *str, const vector<http_rule_t> &rules, MATCH_ZONE zone);
182 | };
183 | 
184 | #endif /* RUNTIMESCANNER_HPP */
185 | 
186 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ![ModDefender logo](https://i.imgur.com/EIHE0dS.png)  
  2 | [![travis-ci](https://travis-ci.org/Annihil/mod_defender.svg?branch=master)](https://travis-ci.org/Annihil/mod_defender)  
  3 | Mod Defender is an Apache2 module aiming to block attacks thanks to a whitelist policy  
  4 | It is an almost complete replication of [NAXSI](https://github.com/nbs-system/naxsi), which is for Nginx  
  5 | It uses the same configs format and is thus fully compatible with [NXAPI/NXTOOL](https://github.com/nbs-system/naxsi/tree/master/nxapi)  
  6 | 
  7 |  - Input
  8 |     - [MainRule](https://github.com/nbs-system/naxsi/blob/master/naxsi_config/naxsi_core.rules)
  9 |     - [BasicRule](https://github.com/nbs-system/naxsi/wiki/whitelists-bnf)
 10 |     - [CheckRule](https://github.com/nbs-system/naxsi/wiki/checkrules-bnf)
 11 |  - Output
 12 |     - [Learning log](https://github.com/nbs-system/naxsi/wiki/naxsilogs#naxsi_fmt)
 13 |     - [Extensive learning log](https://github.com/nbs-system/naxsi/wiki/naxsilogs#naxsi_exlog)
 14 | 
 15 | ## Advantages
 16 |   - Human readable log: colored output to watch Mainrules and Basicrules processing
 17 |   - JSON match log: easier parsing and more compact logs
 18 |   - Combined log: regular and extensive match log are mixed so that content and name of variable in question are presents on the same line
 19 | 
 20 | ## Required packages
 21 | * apache2 dev package to provide Apache2 headers
 22 | * apr package to provide Apache Portal Runtime library and headers
 23 | * gcc & g++ >= 4.9 (for std::regex)
 24 | * GNU make
 25 | * cmake >= 3.2
 26 | 
 27 | ## Installation
 28 | ### Debian
 29 | 1. Install required packages
 30 | 	```sh
 31 | 	sudo apt-get install apache2-dev make gcc g++ cmake
 32 | 	```
 33 | 
 34 | 1. Compile the source
 35 | 	```sh
 36 | 	cmake -H. -Bbuild
 37 | 	cmake --build build -- -j4
 38 | 	```
 39 | 
 40 | 1. Install the module
 41 |     ```sh
 42 |     sudo cp build/mod_defender.so /usr/lib/apache2/modules/
 43 |     ```
 44 | 
 45 | 1. Create its module load file
 46 |     ```sh
 47 |     cat << EOF | sudo tee /etc/apache2/mods-available/defender.load > /dev/null
 48 |     LoadModule defender_module /usr/lib/apache2/modules/mod_defender.so
 49 |     <IfModule defender_module>
 50 |     Include /etc/defender/core.rules
 51 |     </IfModule>
 52 |     EOF
 53 |     ```
 54 | 
 55 | 1. Add mod_defender settings in the desired location / directory / proxy blocks
 56 |     ```
 57 |     <VirtualHost *:80>
 58 |         ServerName ...
 59 |         DocumentRoot ...
 60 | 
 61 |         <Location ...>
 62 |             <IfModule defender_module>
 63 |             # Defender toggle
 64 |             Defender On
 65 |             # Match log path
 66 |             MatchLog ${APACHE_LOG_DIR}/defender_match.log
 67 |             # JSON Match log path
 68 |             JSONMatchLog ${APACHE_LOG_DIR}/defender_json_match.log
 69 |             # Request body limit
 70 |             RequestBodyLimit 8388608
 71 |             # Learning mode toggle
 72 |             LearningMode On
 73 |             # Extensive Learning log toggle
 74 |             ExtensiveLog Off
 75 |             # Libinjection SQL toggle
 76 |             LibinjectionSQL Off
 77 |             # Libinjection XSS toggle
 78 |             LibinjectionXSS Off
 79 |             ## Score action
 80 |             CheckRule "$SQL >= 8" BLOCK
 81 |             CheckRule "$RFI >= 8" BLOCK
 82 |             CheckRule "$TRAVERSAL >= 4" BLOCK
 83 |             CheckRule "$EVADE >= 4" BLOCK
 84 |             CheckRule "$XSS >= 8" BLOCK
 85 |             CheckRule "$UPLOAD >= 8" BLOCK
 86 | 
 87 |             # Whitelists (BasicRule)
 88 |             Include /etc/defender/my_whitelist.rules
 89 |             </IfModule>
 90 |         </Location>
 91 |     <VirtualHost>
 92 |     ```
 93 | 
 94 | 1. Create Mod Defender conf directory
 95 |     ```sh
 96 |     sudo mkdir /etc/defender/
 97 |     ```
 98 | 
 99 | 1. Populate it with the core rules
100 | 	```sh
101 | 	sudo wget -O /etc/defender/core.rules \
102 | 	https://raw.githubusercontent.com/nbs-system/naxsi/master/naxsi_config/naxsi_core.rules
103 | 	```
104 | 
105 | 1. Enable the module
106 | 	```sh
107 | 	sudo a2enmod defender
108 | 	```
109 | 
110 | 1. Restart Apache2 to take effect
111 | 	```sh
112 | 	sudo service apache2 restart
113 | 	```
114 | 
115 | ### FreeBSD
116 | 1. Install required packages
117 | 	```sh
118 | 	pkg install apr make gcc cmake
119 | 	```
120 | 
121 | 1. Compile the source
122 | 	```sh
123 | 	cmake -H. -Bbuild
124 |     cmake --build build -- -j4
125 | 	```
126 | 
127 | 1. Install the module
128 |     ```sh
129 |     cp build/mod_defender.so /usr/local/libexec/apache24/
130 |     ```
131 | 
132 | 1. Create its module load file
133 |    	```sh
134 |     cat << EOF | tee /usr/local/etc/apache24/modules.d/250_defender.conf > /dev/null
135 |     LoadModule defender_module libexec/apache24/mod_defender.so
136 |     <IfModule defender_module>
137 |     Include etc/defender/core.rules
138 |     </IfModule>
139 |     EOF
140 |    	```
141 | 
142 | 1. Add mod_defender settings in the desired location / directory / proxy blocks
143 |     ```
144 |     <VirtualHost *:80>
145 |         ServerName ...
146 |         DocumentRoot ...
147 | 
148 |         <Location ...>
149 |             <IfModule defender_module>
150 |             # Defender toggle
151 |             Defender On
152 |             # Match log path
153 |             MatchLog /var/log/defender_match.log
154 |             # JSON Match log path
155 |             JSONMatchLog /var/log/defender_json_match.log
156 |             # Request body limit
157 |             RequestBodyLimit 8388608
158 |             # Learning mode toggle
159 |             LearningMode On
160 |             # Extensive Learning log toggle
161 |             ExtensiveLog Off
162 |             # Libinjection SQL toggle
163 |             LibinjectionSQL Off
164 |             # Libinjection XSS toggle
165 |             LibinjectionXSS Off
166 |             ## Score action
167 |             CheckRule "$SQL >= 8" BLOCK
168 |             CheckRule "$RFI >= 8" BLOCK
169 |             CheckRule "$TRAVERSAL >= 4" BLOCK
170 |             CheckRule "$EVADE >= 4" BLOCK
171 |             CheckRule "$XSS >= 8" BLOCK
172 |             CheckRule "$UPLOAD >= 8" BLOCK
173 | 
174 |             # Whitelists (BasicRule)
175 |             Include etc/defender/my_whitelist.rules
176 |             </IfModule>
177 |         </Location>
178 |     <VirtualHost>
179 |     ```
180 | 
181 | 1. Create Mod Defender conf directory
182 |     ```sh
183 |     mkdir /usr/local/etc/defender/
184 |     ```
185 | 
186 | 1. Populate it with the core rules
187 | 	```sh
188 | 	wget -O /usr/local/etc/defender/core.rules \
189 | 	https://raw.githubusercontent.com/nbs-system/naxsi/master/naxsi_config/naxsi_core.rules
190 | 	```
191 | 
192 | 1. Restart Apache2 to take effect
193 | 	```sh
194 | 	service apache24 restart
195 | 	```
196 | 
197 | ## Configuration hierarchy
198 | ### Top (above &lt;VirtualHost&gt;)
199 | ```
200 | # Score rules
201 | Include /etc/defender/core.rules
202 | MainRule "..."
203 | ```
204 | 
205 | ### &lt;Location&gt; / &lt;Directory&gt; / &lt;Proxy&gt; blocks
206 | ```
207 | # Action rules
208 | CheckRule "..."
209 | 
210 | # Whitelist rules
211 | BasicRule "..."
212 | ```
213 | 
214 | ## Credits
215 | [NAXSI's team](https://github.com/orgs/nbs-system/people) from nbs-system
216 | 


--------------------------------------------------------------------------------
/deps/libinjection/libinjection_sqli.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2012-2016 Nick Galbreath
  3 |  * nickg@client9.com
  4 |  * BSD License -- see `COPYING.txt` for details
  5 |  *
  6 |  * https://libinjection.client9.com/
  7 |  *
  8 |  */
  9 | 
 10 | #ifndef LIBINJECTION_SQLI_H
 11 | #define LIBINJECTION_SQLI_H
 12 | 
 13 | #ifdef __cplusplus
 14 | extern "C" {
 15 | #endif
 16 | 
 17 | /*
 18 |  * Pull in size_t
 19 |  */
 20 | #include <string.h>
 21 | 
 22 | enum sqli_flags {
 23 |     FLAG_NONE            = 0
 24 |     , FLAG_QUOTE_NONE    = 1   /* 1 << 0 */
 25 |     , FLAG_QUOTE_SINGLE  = 2   /* 1 << 1 */
 26 |     , FLAG_QUOTE_DOUBLE  = 4   /* 1 << 2 */
 27 | 
 28 |     , FLAG_SQL_ANSI      = 8   /* 1 << 3 */
 29 |     , FLAG_SQL_MYSQL     = 16  /* 1 << 4 */
 30 | };
 31 | 
 32 | enum lookup_type {
 33 |     LOOKUP_WORD        = 1
 34 |     , LOOKUP_TYPE        = 2
 35 |     , LOOKUP_OPERATOR    = 3
 36 |     , LOOKUP_FINGERPRINT = 4
 37 | };
 38 | 
 39 | struct libinjection_sqli_token {
 40 | #ifdef SWIG
 41 | %immutable;
 42 | #endif
 43 |     char type;
 44 |     char str_open;
 45 |     char str_close;
 46 | 
 47 |     /*
 48 |      * position and length of token
 49 |      * in original string
 50 |      */
 51 |     size_t pos;
 52 |     size_t len;
 53 | 
 54 |     /*  count:
 55 |      *  in type 'v', used for number of opening '@'
 56 |      *  but maybe used in other contexts
 57 |      */
 58 |     int  count;
 59 | 
 60 |     char val[32];
 61 | };
 62 | 
 63 | typedef struct libinjection_sqli_token stoken_t;
 64 | 
 65 | /**
 66 |  * Pointer to function, takes c-string input,
 67 |  *  returns '\0' for no match, else a char
 68 |  */
 69 | struct libinjection_sqli_state;
 70 | typedef char (*ptr_lookup_fn)(struct libinjection_sqli_state*, int lookuptype, const char* word, size_t len);
 71 | 
 72 | struct libinjection_sqli_state {
 73 | #ifdef SWIG
 74 | %immutable;
 75 | #endif
 76 | 
 77 |     /*
 78 |      * input, does not need to be null terminated.
 79 |      * it is also not modified.
 80 |      */
 81 |     const char *s;
 82 | 
 83 |     /*
 84 |      * input length
 85 |      */
 86 |     size_t slen;
 87 | 
 88 |     /*
 89 |      * How to lookup a word or fingerprint
 90 |      */
 91 |     ptr_lookup_fn lookup;
 92 |     void*         userdata;
 93 | 
 94 |     /*
 95 |      *
 96 |      */
 97 |     int flags;
 98 | 
 99 |     /*
100 |      * pos is the index in the string during tokenization
101 |      */
102 |     size_t pos;
103 | 
104 | #ifndef SWIG
105 |     /* for SWIG.. don't use this.. use functional API instead */
106 | 
107 |     /* MAX TOKENS + 1 since we use one extra token
108 |      * to determine the type of the previous token
109 |      */
110 |     struct libinjection_sqli_token tokenvec[8];
111 | #endif
112 | 
113 |     /*
114 |      * Pointer to token position in tokenvec, above
115 |      */
116 |     struct libinjection_sqli_token *current;
117 | 
118 |     /*
119 |      * fingerprint pattern c-string
120 |      * +1 for ending null
121 |      * Minimum of 8 bytes to add gcc's -fstack-protector to work
122 |      */
123 |     char fingerprint[8];
124 | 
125 |     /*
126 |      * Line number of code that said decided if the input was SQLi or
127 |      * not.  Most of the time it's line that said "it's not a matching
128 |      * fingerprint" but there is other logic that sometimes approves
129 |      * an input. This is only useful for debugging.
130 |      *
131 |      */
132 |     int reason;
133 | 
134 |     /* Number of ddw (dash-dash-white) comments
135 |      * These comments are in the form of
136 |      *   '--[whitespace]' or '--[EOF]'
137 |      *
138 |      * All databases treat this as a comment.
139 |      */
140 |      int stats_comment_ddw;
141 | 
142 |     /* Number of ddx (dash-dash-[notwhite]) comments
143 |      *
144 |      * ANSI SQL treats these are comments, MySQL treats this as
145 |      * two unary operators '-' '-'
146 |      *
147 |      * If you are parsing result returns FALSE and
148 |      * stats_comment_dd > 0, you should reparse with
149 |      * COMMENT_MYSQL
150 |      *
151 |      */
152 |     int stats_comment_ddx;
153 | 
154 |     /*
155 |      * c-style comments found  /x .. x/
156 |      */
157 |     int stats_comment_c;
158 | 
159 |     /* '#' operators or MySQL EOL comments found
160 |      *
161 |      */
162 |     int stats_comment_hash;
163 | 
164 |     /*
165 |      * number of tokens folded away
166 |      */
167 |     int stats_folds;
168 | 
169 |     /*
170 |      * total tokens processed
171 |      */
172 |     int stats_tokens;
173 | 
174 | };
175 | 
176 | typedef struct libinjection_sqli_state sfilter;
177 | 
178 | struct libinjection_sqli_token* libinjection_sqli_get_token(
179 |     struct libinjection_sqli_state* sqlistate, int i);
180 | 
181 | /*
182 |  * Version info.
183 |  *
184 |  * This is moved into a function to allow SWIG and other auto-generated
185 |  * binding to not be modified during minor release changes.  We change
186 |  * change the version number in the c source file, and not regenerated
187 |  * the binding
188 |  *
189 |  * See python's normalized version
190 |  * http://www.python.org/dev/peps/pep-0386/#normalizedversion
191 |  */
192 | const char* libinjection_version(void);
193 | 
194 | /**
195 |  *
196 |  */
197 | void libinjection_sqli_init(struct libinjection_sqli_state* sql_state,
198 |                             const char* s, size_t slen,
199 |                             int flags);
200 | 
201 | /**
202 |  * Main API: tests for SQLi in three possible contexts, no quotes,
203 |  * single quote and double quote
204 |  *
205 |  * \param sql_state core data structure
206 |  *
207 |  * \return 1 (true) if SQLi, 0 (false) if benign
208 |  */
209 | int libinjection_is_sqli(struct libinjection_sqli_state* sql_state);
210 | 
211 | /*  FOR HACKERS ONLY
212 |  *   provides deep hooks into the decision making process
213 |  */
214 | void libinjection_sqli_callback(struct libinjection_sqli_state*  sql_state,
215 |                                 ptr_lookup_fn fn,
216 |                                 void* userdata);
217 | 
218 | 
219 | /*
220 |  * Resets state, but keeps initial string and callbacks
221 |  */
222 | void libinjection_sqli_reset(struct libinjection_sqli_state* sql_state,
223 |                              int flags);
224 | 
225 | /**
226 |  *
227 |  */
228 | 
229 | /**
230 |  * This detects SQLi in a single context, mostly useful for custom
231 |  * logic and debugging.
232 |  *
233 |  * \param sql_state  Main data structure
234 |  * \param flags flags to adjust parsing
235 |  *
236 |  * \returns a pointer to sfilter.fingerprint as convenience
237 |  *          do not free!
238 |  *
239 |  */
240 | const char* libinjection_sqli_fingerprint(struct libinjection_sqli_state* sql_state,
241 |                                           int flags);
242 | 
243 | /**
244 |  * The default "word" to token-type or fingerprint function.  This
245 |  * uses a ASCII case-insensitive binary tree.
246 |  */
247 | char libinjection_sqli_lookup_word(struct libinjection_sqli_state* sql_state,
248 |                                    int lookup_type,
249 |                                    const char* s,
250 |                                    size_t slen);
251 | 
252 | /* Streaming tokenization interface.
253 |  *
254 |  * sql_state->current is updated with the current token.
255 |  *
256 |  * \returns 1, has a token, keep going, or 0 no tokens
257 |  *
258 |  */
259 | int  libinjection_sqli_tokenize(struct libinjection_sqli_state * sql_state);
260 | 
261 | /**
262 |  * parses and folds input, up to 5 tokens
263 |  *
264 |  */
265 | int libinjection_sqli_fold(struct libinjection_sqli_state * sql_state);
266 | 
267 | /** The built-in default function to match fingerprints
268 |  *  and do false negative/positive analysis.  This calls the following
269 |  *  two functions.  With this, you over-ride one part or the other.
270 |  *
271 |  *     return libinjection_sqli_blacklist(sql_state) &&
272 |  *        libinjection_sqli_not_whitelist(sql_state);
273 |  *
274 |  * \param sql_state should be filled out after libinjection_sqli_fingerprint is called
275 |  */
276 | int libinjection_sqli_check_fingerprint(struct libinjection_sqli_state * sql_state);
277 | 
278 | /* Given a pattern determine if it's a SQLi pattern.
279 |  *
280 |  * \return TRUE if sqli, false otherwise
281 |  */
282 | int libinjection_sqli_blacklist(struct libinjection_sqli_state* sql_state);
283 | 
284 | /* Given a positive match for a pattern (i.e. pattern is SQLi), this function
285 |  * does additional analysis to reduce false positives.
286 |  *
287 |  * \return TRUE if SQLi, false otherwise
288 |  */
289 | int libinjection_sqli_not_whitelist(struct libinjection_sqli_state * sql_state);
290 | 
291 | #ifdef __cplusplus
292 | }
293 | #endif
294 | 
295 | #endif /* LIBINJECTION_SQLI_H */
296 | 


--------------------------------------------------------------------------------
/JsonValidator.cpp:
--------------------------------------------------------------------------------
  1 | /*                       _        _       __                _
  2 |  *   _ __ ___   ___   __| |    __| | ___ / _| ___ _ __   __| | ___ _ __
  3 |  *  | '_ ` _ \ / _ \ / _` |   / _` |/ _ \ |_ / _ \ '_ \ / _` |/ _ \ '__|
  4 |  *  | | | | | | (_) | (_| |  | (_| |  __/  _|  __/ | | | (_| |  __/ |
  5 |  *  |_| |_| |_|\___/ \__,_|___\__,_|\___|_|  \___|_| |_|\__,_|\___|_|
  6 |  *                       |_____|
  7 |  *  Copyright (c) 2017 Annihil
  8 |  *  Released under the GPLv3
  9 |  */
 10 | 
 11 | #include "JsonValidator.hpp"
 12 | #include "RuntimeScanner.hpp"
 13 | 
 14 | bool JsonValidator::jsonForward(json_t &js) {
 15 |     while ((*(js.src + js.off) == ' ' ||
 16 |             *(js.src + js.off) == '\t' ||
 17 |             *(js.src + js.off) == '\n' ||
 18 |             *(js.src + js.off) == '\r') && js.off < js.len) {
 19 |         js.off++;
 20 |     }
 21 |     js.c = *(js.src + js.off);
 22 |     return true;
 23 | }
 24 | 
 25 | /*
 26 | ** used to fast forward in json POSTS,
 27 | ** we skip whitespaces/tab/CR/LF
 28 | */
 29 | bool JsonValidator::jsonSeek(json_t &js, unsigned char seek) {
 30 |     jsonForward(js);
 31 |     return js.c == seek;
 32 | }
 33 | 
 34 | /*
 35 | ** extract a quoted strings,
 36 | ** JSON spec only supports double-quoted strings,
 37 | ** so do we.
 38 | */
 39 | bool JsonValidator::jsonQuoted(json_t &js, str_t *ve) {
 40 |     u_char *vn_start, *vn_end = NULL;
 41 | 
 42 |     if (*(js.src + js.off) != '"')
 43 |         return false;
 44 |     js.off++;
 45 |     vn_start = js.src + js.off;
 46 |     /* extract varname inbetween "..."*/
 47 |     while (js.off < js.len) {
 48 |         /* skip next character if backslashed */
 49 |         if (*(js.src + js.off) == '\\') {
 50 |             js.off += 2;
 51 |             if (js.off >= js.len) break;
 52 |         }
 53 |         if (*(js.src + js.off) == '"') {
 54 |             vn_end = js.src + js.off;
 55 |             js.off++;
 56 |             break;
 57 |         }
 58 |         js.off++;
 59 |     }
 60 |     if (!vn_start || !vn_end)
 61 |         return false;
 62 |     if (!*vn_start || !*vn_end)
 63 |         return false;
 64 |     ve->data = vn_start;
 65 |     ve->len = vn_end - vn_start;
 66 |     return true;
 67 | }
 68 | 
 69 | /*
 70 | ** an array is values separated by ','
 71 | */
 72 | bool JsonValidator::jsonArray(json_t &js) {
 73 |     bool rc;
 74 | 
 75 |     js.c = *(js.src + js.off);
 76 |     if (js.c != '[' || js.depth > JSON_MAX_DEPTH)
 77 |         return false;
 78 |     js.off++;
 79 |     do {
 80 |         rc = jsonVal(js);
 81 |         /* if we cannot extract the value,
 82 |            we may have reached array end. */
 83 |         if (!rc)
 84 |             break;
 85 |         jsonForward(js);
 86 |         if (js.c == ',') {
 87 |             js.off++;
 88 |             jsonForward(js);
 89 |         } else break;
 90 |     } while (true);
 91 |     return js.c == ']';
 92 | }
 93 | 
 94 | 
 95 | bool JsonValidator::jsonVal(json_t &js) {
 96 |     str_t val;
 97 |     bool ret;
 98 | 
 99 |     val.data = NULL;
100 |     val.len = 0;
101 | 
102 |     jsonForward(js);
103 |     if (js.c == '"') {
104 |         ret = jsonQuoted(js, &val);
105 |         if (ret) {
106 |             /* parse extracted values. */
107 |             string jsckey = string((char *) js.ckey.data, js.ckey.len);
108 |             string value = string((char *) val.data, val.len);
109 |             transform(jsckey.begin(), jsckey.end(), jsckey.begin(), tolower);
110 |             transform(value.begin(), value.end(), value.begin(), tolower);
111 |             scanner.basestrRuleset(BODY, jsckey, value, bodyRules);
112 |             scanner.logg(LOG_LVL_DEBUG, scanner.errorLogFile, "JSON '%s' : '%s'\n", (char *) js.ckey.data,
113 |                            (char *) val.data);
114 |         }
115 |         return ret;
116 |     }
117 |     if ((js.c >= '0' && js.c <= '9') || js.c == '-') {
118 |         val.data = js.src + js.off;
119 |         while (((*(js.src + js.off) >= '0' && *(js.src + js.off) <= '9') ||
120 |                 *(js.src + js.off) == '.' || *(js.src + js.off) == '-') && js.off < js.len) {
121 |             val.len++;
122 |             js.off++;
123 |         }
124 |         /* parse extracted values. */
125 |         string jsckey = string((char *) js.ckey.data, js.ckey.len);
126 |         string value = string((char *) val.data, val.len);
127 |         transform(jsckey.begin(), jsckey.end(), jsckey.begin(), tolower);
128 |         transform(value.begin(), value.end(), value.begin(), tolower);
129 |         scanner.basestrRuleset(BODY, jsckey, value, bodyRules);
130 |         scanner.logg(LOG_LVL_DEBUG, scanner.errorLogFile, "JSON '%s' : '%s'\n", (char *) js.ckey.data,
131 |                      (char *) val.data);
132 |         return true;
133 |     }
134 |     if (!strncasecmp((const char *) (js.src + js.off), (const char *) "true", 4) ||
135 |         !strncasecmp((const char *) (js.src + js.off), (const char *) "false", 5) ||
136 |         !strncasecmp((const char *) (js.src + js.off), (const char *) "null", 4)) {
137 |         js.c = *(js.src + js.off);
138 |         /* we don't check static values, do we ?! */
139 |         val.data = js.src + js.off;
140 |         if (js.c == 'F' || js.c == 'f') {
141 |             js.off += 5;
142 |             val.len = 5;
143 |         } else {
144 |             js.off += 4;
145 |             val.len = 4;
146 |         }
147 |         /* parse extracted values. */
148 |         string jsckey = string((char *) js.ckey.data, js.ckey.len);
149 |         string value = string((char *) val.data, val.len);
150 |         transform(jsckey.begin(), jsckey.end(), jsckey.begin(), tolower);
151 |         transform(value.begin(), value.end(), value.begin(), tolower);
152 |         scanner.basestrRuleset(BODY, jsckey, value, bodyRules);
153 | 
154 |         scanner.logg(LOG_LVL_DEBUG, scanner.errorLogFile, "JSON '%s' : '%s'\n", (char *) js.ckey.data,
155 |                        (char *) val.data);
156 |         return true;
157 |     }
158 | 
159 |     if (js.c == '[') {
160 |         ret = jsonArray(js);
161 |         if (js.c != ']')
162 |             return false;
163 |         js.off++;
164 |         return (ret);
165 |     }
166 |     if (js.c == '{') {
167 |         /*
168 |         ** if sub-struct, parse key without value :
169 |         ** "foobar" : { "bar" : [1,2,3]} => "foobar" parsed alone.
170 |         ** this is to avoid "foobar" left unparsed, as we won't have
171 |         ** key/value here with "foobar" as a key.
172 |         */
173 |         string jsckey = string((char *) js.ckey.data, js.ckey.len);
174 |         transform(jsckey.begin(), jsckey.end(), jsckey.begin(), tolower);
175 |         scanner.basestrRuleset(BODY, jsckey, empty, bodyRules);
176 | 
177 |         ret = jsonObj(js);
178 |         jsonForward(js);
179 |         if (js.c != '}')
180 |             return false;
181 |         js.off++;
182 |         return (ret);
183 |     }
184 |     return false;
185 | }
186 | 
187 | 
188 | bool JsonValidator::jsonObj(json_t &js) {
189 |     js.c = *(js.src + js.off);
190 | 
191 |     if (js.c != '{' || js.depth > JSON_MAX_DEPTH)
192 |         return false;
193 |     js.off++;
194 | 
195 |     do {
196 |         jsonForward(js);
197 |         /* check subs (arrays, objects) */
198 |         switch (js.c) {
199 |             case '[': /* array */
200 |                 js.depth++;
201 |                 jsonArray(js);
202 |                 if (!jsonSeek(js, ']'))
203 |                     return false;
204 |                 js.off++;
205 |                 js.depth--;
206 |                 break;
207 |             case '{': /* sub-object */
208 |                 js.depth++;
209 |                 jsonObj(js);
210 |                 if (js.c != '}')
211 |                     return false;
212 |                 js.off++;
213 |                 js.depth--;
214 |                 break;
215 |             case '"': /* key : value, extract and parse. */
216 |                 if (!jsonQuoted(js, &(js.ckey)))
217 |                     return false;
218 |                 if (!jsonSeek(js, ':'))
219 |                     return false;
220 |                 js.off++;
221 |                 jsonForward(js);
222 |                 if (!jsonVal(js))
223 |                     return false;
224 |             default:
225 |                 break;
226 |         }
227 |         jsonForward(js);
228 |         /* another element ? */
229 |         if (js.c == ',') {
230 |             js.off++;
231 |             jsonForward(js);
232 |             continue;
233 | 
234 |         } else if (js.c == '}') {
235 |             js.depth--;
236 |             /* or maybe we just finished parsing this object */
237 |             return true;
238 |         } else {
239 |             /* nothing we expected, die. */
240 |             return false;
241 |         }
242 |     } while (js.off < js.len);
243 | 
244 |     return false;
245 | }
246 | 
247 | /*
248 | ** Parse a JSON request
249 | */
250 | void JsonValidator::jsonParse(u_char *src, unsigned long len) {
251 |     json_t js;
252 |     js.json.data = js.src = src;
253 |     js.json.len = js.len = len;
254 | 
255 |     if (!jsonSeek(js, '{')) {
256 |         scanner.applyRuleMatch(scanner.parser.invalidJson, 1, BODY, "missing opening brace", empty,
257 |                                       false);
258 |         return;
259 |     }
260 |     if (!jsonObj(js)) {
261 |         scanner.applyRuleMatch(scanner.parser.invalidJson, 1, BODY, "malformed json object", empty,
262 |                                       false);
263 |         scanner.logg(LOG_LVL_NOTICE, scanner.errorLogFile, "jsonObj returned error, apply invalid json.\n");
264 |         return;
265 |     }
266 |     /* we are now on closing bracket, check for garbage. */
267 |     js.off++;
268 |     jsonForward(js);
269 |     if (js.off != js.len) {
270 |         scanner.applyRuleMatch(scanner.parser.invalidJson, 1, BODY, "garbage after the closing brace",
271 |                                       empty, false);
272 |     }
273 | }
274 | 


--------------------------------------------------------------------------------
/RuleParser.h:
--------------------------------------------------------------------------------
  1 | /*                       _        _       __                _
  2 |  *   _ __ ___   ___   __| |    __| | ___ / _| ___ _ __   __| | ___ _ __
  3 |  *  | '_ ` _ \ / _ \ / _` |   / _` |/ _ \ |_ / _ \ '_ \ / _` |/ _ \ '__|
  4 |  *  | | | | | | (_) | (_| |  | (_| |  __/  _|  __/ | | | (_| |  __/ |
  5 |  *  |_| |_| |_|\___/ \__,_|___\__,_|\___|_|  \___|_| |_|\__,_|\___|_|
  6 |  *                       |_____|
  7 |  *  Copyright (c) 2017 Annihil
  8 |  *  Released under the GPLv3
  9 |  */
 10 | 
 11 | #ifndef MOD_DEFENDER_RULEPARSER_H
 12 | #define MOD_DEFENDER_RULEPARSER_H
 13 | 
 14 | #include <iostream>
 15 | #include <sstream>
 16 | #include <vector>
 17 | #include <string>
 18 | #include <algorithm>
 19 | #include <iterator>
 20 | #include "Util.h"
 21 | #include <regex>
 22 | #include <unordered_map>
 23 | 
 24 | //#define DEBUG_CONFIG_MAINRULE
 25 | #ifdef DEBUG_CONFIG_MAINRULE
 26 | #define DEBUG_CONF_MR(x) do { std::cerr << x; } while (0)
 27 | #else
 28 | #define DEBUG_CONF_MR(x)
 29 | #endif
 30 | 
 31 | //#define DEBUG_CONFIG_CHECKRULE
 32 | #ifdef DEBUG_CONFIG_CHECKRULE
 33 | #define DEBUG_CONF_CR(x) do { std::cerr << x; } while (0)
 34 | #else
 35 | #define DEBUG_CONF_CR(x)
 36 | #endif
 37 | 
 38 | //#define DEBUG_CONFIG_BASICRULE
 39 | #ifdef DEBUG_CONFIG_BASICRULE
 40 | #define DEBUG_CONF_BR(x) do { std::cerr << x; } while (0)
 41 | #else
 42 | #define DEBUG_CONF_BR(x)
 43 | #endif
 44 | 
 45 | //#define DEBUG_CONFIG_ACTION
 46 | #ifdef DEBUG_CONFIG_ACTION
 47 | #define DEBUG_CONF_ACTN(x) do { std::cerr << x; } while (0)
 48 | #else
 49 | #define DEBUG_CONF_ACTN(x)
 50 | #endif
 51 | 
 52 | //#define DEBUG_CONFIG_MATCHZONE
 53 | #ifdef DEBUG_CONFIG_MATCHZONE
 54 | #define DEBUG_CONF_MZ(x) do { std::cerr << x; } while (0)
 55 | #else
 56 | #define DEBUG_CONF_MZ(x)
 57 | #endif
 58 | 
 59 | //#define DEBUG_CONFIG_HASHTABLES
 60 | #ifdef DEBUG_CONFIG_HASHTABLES
 61 | #define DEBUG_CONF_HT(x) do { std::cerr << x; } while (0)
 62 | #else
 63 | #define DEBUG_CONF_HT(x)
 64 | #endif
 65 | 
 66 | //#define DEBUG_CONFIG_WLRFIND
 67 | #ifdef DEBUG_CONFIG_WLRFIND
 68 | #define DEBUG_CONF_WLRF(x) do { std::cerr << x << endl; } while (0)
 69 | #else
 70 | #define DEBUG_CONF_WLRF(x)
 71 | #endif
 72 | 
 73 | //#define DEBUG_CONFIG_WL
 74 | #ifdef DEBUG_CONFIG_WL
 75 | #define DEBUG_CONF_WL(x) do { std::cerr << x << endl; } while (0)
 76 | #else
 77 | #define DEBUG_CONF_WL(x)
 78 | #endif
 79 | 
 80 | using namespace Util;
 81 | using std::pair;
 82 | using std::vector;
 83 | using std::string;
 84 | using std::cerr;
 85 | using std::stringstream;
 86 | using std::endl;
 87 | using std::istream_iterator;
 88 | using std::istringstream;
 89 | using std::regex;
 90 | using std::sregex_iterator;
 91 | using std::regex_match;
 92 | using std::distance;
 93 | using std::unordered_map;
 94 | 
 95 | typedef enum {
 96 |     SUP_OR_EQUAL,
 97 |     SUP,
 98 |     INF_OR_EQUAL,
 99 |     INF
100 | } comparator_t;
101 | 
102 | typedef enum {
103 |     ALLOW = 0,
104 |     BLOCK,
105 |     DROP,
106 |     LOG
107 | } rule_action_t;
108 | 
109 | typedef struct {
110 |     comparator_t comparator;
111 |     unsigned long limit;
112 |     rule_action_t action = ALLOW;
113 | } check_rule_t;
114 | 
115 | /*
116 | ** struct used to store a specific match zone
117 | ** in conf : MATCH_ZONE:[GET_VAR|HEADER|POST_VAR]:VAR_NAME:
118 | */
119 | typedef struct {
120 |     bool bodyVar = false; // match in [name] var of body
121 |     bool headersVar = false; // match in [name] var of headers
122 |     bool argsVar = false; // match in [name] var of args
123 |     bool specificUrl = false; // match on URL [name]
124 |     string target; // to be used for string match zones
125 |     regex targetRx; // to be used for regexed match zones
126 | } custom_rule_location_t;
127 | 
128 | /*
129 | ** WhiteList Rules Definition :
130 | ** A whitelist contains :
131 | ** - an URI
132 | **
133 | ** - one or several sets containing :
134 | **	- an variable name ('foo') associated with a zone ($GET_VAR:foo)
135 | **	- one or several rules id to whitelist
136 | */
137 | typedef struct {
138 |     bool body = false; // match in full body (POST DATA)
139 |     bool bodyVar = false; // match in [name] var of body
140 |     bool headers = false; // match in all headers
141 |     bool headersVar = false; // match in [name] var of headers
142 |     bool url = false; // match in URI
143 |     bool args = false; // match in args (bla.php?<ARGS>)
144 |     bool argsVar = false; // match in [name] var of args
145 |     bool flags = false; // match on a global flag : weird_request, big_body etc.
146 |     bool fileExt = false; // match on file upload extension
147 |     /* set if defined "custom" match zone (GET_VAR/POST_VAR/...)  */
148 |     vector<int> wlIds;
149 |     string target;
150 | } whitelist_location_t;
151 | 
152 | /*
153 | ** basic rule can have 4 (so far) kind of matching mechanisms
154 | ** RX
155 | ** STR
156 | ** LIBINJ_XSS
157 | ** LIBINJ_SQL
158 | */
159 | enum DETECT_MECHANISM  {
160 |     NONE = -1,
161 |     RX,
162 |     STR,
163 |     LIBINJ_XSS,
164 |     LIBINJ_SQL
165 | };
166 | 
167 | enum MATCH_TYPE {
168 |     URI_ONLY = 0,
169 |     NAME_ONLY,
170 |     MIXED
171 | };
172 | 
173 | enum MATCH_ZONE {
174 |     HEADERS = 0,
175 |     URL,
176 |     ARGS,
177 |     BODY,
178 |     RAW_BODY,
179 |     FILE_EXT,
180 |     UNKNOWN
181 | };
182 | 
183 | #if defined(RUNTIME_SCANNER_DEF) || defined(DEBUG_CONFIG_WL)
184 | static const char *match_zones[] = {
185 |         "HEADERS",
186 |         "URL",
187 |         "ARGS",
188 |         "BODY",
189 |         "RAW_BODY",
190 |         "FILE_EXT",
191 |         "UNKNOWN",
192 |         NULL
193 | };
194 | #endif
195 | 
196 | /*
197 | ** this struct is used to aggregate all whitelist
198 | ** that point to the same URI or the same VARNAME
199 | ** all the "subrules" will then be stored in the "whitelist_locations"
200 | */
201 | typedef struct {
202 |     vector<whitelist_location_t> whitelistLocations;
203 |     MATCH_ZONE zone; // zone to wich the WL applies
204 |     bool uriOnly = false; // if the "name" is only an url, specify it
205 |     bool targetName = false; // does the rule targets the name instead of the content
206 |     string name; // hash key [#]URI#VARNAME
207 |     vector<int> ids;
208 | } whitelist_rule_t;
209 | 
210 | typedef struct {
211 |     bool active = false; // to check if there is a basic rule or not
212 |     regex rx;
213 |     string str;
214 |     /*
215 |     ** basic rule can have 4 (so far) kind of matching mechanisms :
216 |     ** RX, STR, LIBINJ_XSS, LIBINJ_SQL
217 |     */
218 |     enum DETECT_MECHANISM match_type;
219 |     bool rxMz = false;
220 |     MATCH_ZONE zone;
221 |     bool bodyMz = false;
222 |     bool rawBodyMz = false;
223 |     bool bodyVarMz = false;
224 |     bool headersMz = false;
225 |     bool headersVarMz = false;
226 |     bool urlMz = false;
227 |     bool specificUrlMz = false;
228 |     bool argsMz = false;
229 |     bool argsVarMz = false;
230 |     bool fileExtMz = false;
231 |     bool customLocation = false; // set if defined "custom" match zone (GET_VAR/POST_VAR/...)
232 |     bool targetName = false; // does the rule targets variable name instead ?
233 |     bool negative = false;
234 |     vector<custom_rule_location_t> customLocations;
235 | } basic_rule_t;
236 | 
237 | enum RULE_TYPE {
238 |     MAIN_RULE = 0,
239 |     BASIC_RULE
240 | };
241 | 
242 | /* TOP level rule structure */
243 | typedef struct {
244 |     RULE_TYPE type; // type of the rule
245 |     bool whitelist = false; // simply put a flag if it's a wlr, wl_id array will be used to store the whitelisted IDs
246 |     vector<int> wlIds;
247 |     /* "common" data for all rules */
248 |     unsigned long id;
249 |     string logMsg; // a specific log message
250 |     /* List of scores increased on rule match. */
251 |     vector<pair<string, unsigned long>> scores;
252 |     rule_action_t action = ALLOW;
253 |     basic_rule_t br; // specific rule stuff
254 | } http_rule_t;
255 | 
256 | extern vector<string> tmpMainRules;
257 | 
258 | extern vector<http_rule_t> getRules;
259 | extern vector<http_rule_t> bodyRules;
260 | extern vector<http_rule_t> rawBodyRules;
261 | extern vector<http_rule_t> headerRules;
262 | extern vector<http_rule_t> genericRules; // URL
263 | 
264 | class RuleParser {
265 | private:
266 |     vector<http_rule_t> whitelistRules; // raw array of whitelist rules
267 |     bool isRuleWhitelistedRx(const http_rule_t &rule, const string uri, const string &name, MATCH_ZONE zone, bool targetName);
268 |     bool isWhitelistAdapted(whitelist_rule_t &wlrule, MATCH_ZONE zone, const http_rule_t &rule,
269 |                             MATCH_TYPE type, bool targetName);
270 | 
271 | public:
272 |     unordered_map<string, check_rule_t> checkRules;
273 | 
274 |     vector<whitelist_rule_t> tmpWlr; // raw array of transformed whitelists
275 |     vector<http_rule_t> rxMzWlr; // raw array of regex-mz whitelists
276 | 
277 |     unordered_map<string, whitelist_rule_t> wlUrlHash; // hash table of whitelisted URL rules
278 |     unordered_map<string, whitelist_rule_t> wlArgsHash; // hash table of whitelisted ARGS rules
279 |     unordered_map<string, whitelist_rule_t> wlBodyHash; // hash table of whitelisted BODY rules
280 |     unordered_map<string, whitelist_rule_t> wlHeadersHash; // hash table of whitelisted HEADERS rules
281 |     vector<http_rule_t> disabled_rules; // rules that are globally disabled in one location
282 |     http_rule_t bigRequest;
283 |     http_rule_t uncommonHexEncoding;
284 |     http_rule_t uncommonContentType;
285 |     http_rule_t uncommonUrl;
286 |     http_rule_t uncommonPostFormat;
287 |     http_rule_t uncommonPostBoundary;
288 |     http_rule_t invalidJson;
289 |     http_rule_t emptyPostBody;
290 |     http_rule_t libsqliRule;
291 |     http_rule_t libxssRule;
292 | 
293 |     RuleParser();
294 |     static unsigned int parseMainRules(vector<string> &ruleLines, string errorMsg);
295 |     void parseCheckRule(vector<pair<string, string>> &rulesArray, string errorMsg);
296 |     unsigned int parseBasicRules(vector<string> &ruleLines, string errorMsg);
297 |     static void parseAction(string action, rule_action_t& rule_action);
298 |     static void parseMatchZone(http_rule_t &rule, string &rawMatchZone, stringstream &err);
299 |     static string parseCode(std::regex_constants::error_type etype);
300 |     void generateHashTables();
301 |     void wlrIdentify(const http_rule_t &curr, MATCH_ZONE &zone, int &uri_idx, int &name_idx);
302 |     void wlrFind(const http_rule_t &curr, whitelist_rule_t &father_wlr, MATCH_ZONE &zone, int &uriIndex, int &name_idx);
303 |     bool checkIds(unsigned long matchId, const vector<int> &wlIds);
304 |     bool findWlInHash(whitelist_rule_t &wlRule, const string &key, MATCH_ZONE zone);
305 |     bool isRuleWhitelisted(const http_rule_t &rule, const string& uri, const string &name, MATCH_ZONE zone, bool targetName);
306 | };
307 | 
308 | 
309 | #endif //MOD_DEFENDER_RULEPARSER_H
310 | 


--------------------------------------------------------------------------------
/Util.cpp:
--------------------------------------------------------------------------------
  1 | /*                       _        _       __                _
  2 |  *   _ __ ___   ___   __| |    __| | ___ / _| ___ _ __   __| | ___ _ __
  3 |  *  | '_ ` _ \ / _ \ / _` |   / _` |/ _ \ |_ / _ \ '_ \ / _` |/ _ \ '__|
  4 |  *  | | | | | | (_) | (_| |  | (_| |  __/  _|  __/ | | | (_| |  __/ |
  5 |  *  |_| |_| |_|\___/ \__,_|___\__,_|\___|_|  \___|_| |_|\__,_|\___|_|
  6 |  *                       |_____|
  7 |  *  Copyright (c) 2017 Annihil
  8 |  *  Released under the GPLv3
  9 |  */
 10 | 
 11 | #include "Util.h"
 12 | #include "RuntimeScanner.hpp"
 13 | 
 14 | static const char *logLevels[] = {"emerg", "alert", "crit", "error", "warn", "notice", "info", "debug", NULL};
 15 | 
 16 | namespace Util {
 17 |     vector<string> split(const string &s, char delimiter) {
 18 |         vector<string> v;
 19 |         size_t last = 0;
 20 |         size_t next = 0;
 21 |         string token;
 22 |         while ((next = s.find(delimiter, last)) != string::npos) {
 23 |             token = s.substr(last, next - last);
 24 |             if (!token.empty())
 25 |                 v.push_back(token);
 26 |             last = next + 1;
 27 |         }
 28 |         token = s.substr(last);
 29 |         if (!token.empty())
 30 |             v.push_back(token);
 31 | 
 32 |         return v;
 33 |     }
 34 | 
 35 |     vector<int> splitToInt(string &s, char delimiter) {
 36 |         vector<int> v;
 37 |         size_t pos = 0;
 38 |         string token;
 39 |         while ((pos = s.find(delimiter)) != string::npos) {
 40 |             token = s.substr(0, pos);
 41 |             if (token.size() > 0)
 42 |                 v.push_back(std::stoi(token));
 43 |             s.erase(0, pos + 1);
 44 |         }
 45 |         v.push_back(std::stoi(s));
 46 | 
 47 |         return v;
 48 |     }
 49 | 
 50 |     pair<string, string> splitAtFirst(const string &s, string delim) {
 51 |         pair<string, string> p;
 52 |         unsigned long delimpos = s.find(delim);
 53 |         p.first = s.substr(0, delimpos);
 54 |         p.second = s.substr(delimpos + delim.length(), s.size());
 55 |         return p;
 56 |     }
 57 | 
 58 |     std::vector<string> parseRawDirective(std::string raw_directive) {
 59 |         std::size_t semicolon_pos = raw_directive.rfind(';');
 60 |         if (semicolon_pos != std::string::npos) {
 61 |             raw_directive = raw_directive.substr(0, semicolon_pos);
 62 |             raw_directive = rtrim(raw_directive);
 63 |         }
 64 |         std::vector<string> parts;
 65 |         bool in_quotes = false;
 66 |         std::string part;
 67 |         unsigned int backslash = 0;
 68 |         for (size_t i = 0; i < raw_directive.length(); i++) {
 69 |             const char &c = raw_directive[i];
 70 |             bool char_added = false;
 71 |             if (in_quotes || (c != ' ')) {
 72 |                 part.push_back(c);
 73 |                 if (in_quotes && backslash % 2 == 1 && c == '"') {
 74 | 
 75 |                 } else if ((c == '"' && !in_quotes))
 76 |                     in_quotes = true;
 77 |                 else if (c == '"')
 78 |                     in_quotes = false;
 79 |                 char_added = true;
 80 |             }
 81 |             if (in_quotes && c == '\\')
 82 |                 backslash++;
 83 |             else
 84 |                 backslash = 0;
 85 |             if (!part.empty() && (!char_added || (i == raw_directive.length() - 1))) {
 86 |                 if (part.front() == '\"' && part.back() == '\"') {
 87 |                     part.erase(0, 1); // remove leading and
 88 |                     part.pop_back(); // trailing double quotes
 89 |                 }
 90 |                 parts.push_back(unescape(part));
 91 |                 part.clear();
 92 |             }
 93 |         }
 94 |         return parts;
 95 |     }
 96 | 
 97 |     string apacheTimeFmt() {
 98 |         time_t timer;
 99 |         char date[20];
100 |         struct tm *tm_info;
101 |         time(&timer);
102 |         tm_info = localtime(&timer);
103 |         strftime(date, 20, "%a %b %d %T", tm_info);
104 | 
105 |         struct timespec tp;
106 |         clock_gettime(CLOCK_REALTIME, &tp);
107 |         long mic = tp.tv_nsec / 1000;
108 | 
109 |         std::ostringstream oss;
110 |         oss << date << "." << mic << " ";
111 | 
112 |         char year[5];
113 |         strftime(year, 5, "%Y", tm_info);
114 |         oss << year;
115 |         return oss.str();
116 |     }
117 | 
118 |     string naxsiTimeFmt() {
119 |         time_t timer;
120 |         char buffer[26];
121 |         struct tm *tm_info;
122 |         time(&timer);
123 |         tm_info = localtime(&timer);
124 |         strftime(buffer, 26, "%Y/%m/%d %T", tm_info);
125 |         return string(buffer);
126 |     }
127 | 
128 |     string formatLog(int loglevel, const string &clientIp) {
129 |         stringstream ss;
130 |         ss << "[" << apacheTimeFmt() << "] ";
131 |         ss << "[defender:" << logLevels[loglevel] << "] ";
132 | //        ss << "[pid " << getpid() << "] ";
133 |         if (!clientIp.empty())
134 |             ss << "[client " << clientIp << "] ";
135 |         return ss.str();
136 |     }
137 | 
138 |     int naxsi_unescape_uri(u_char **dst, u_char **src, size_t size, unsigned int type) {
139 |         u_char *d, *s, ch, c, decoded;
140 |         int bad = 0;
141 | 
142 |         enum {
143 |             sw_usual = 0,
144 |             sw_quoted,
145 |             sw_quoted_second
146 |         } state;
147 | 
148 |         d = *dst;
149 |         s = *src;
150 | 
151 |         state = sw_usual;
152 |         decoded = 0;
153 | 
154 |         while (size--) {
155 |             ch = *s++;
156 |             switch (state) {
157 |                 case sw_usual:
158 |                     if (ch == '?'
159 |                         && (type & (UNESCAPE_URI | UNESCAPE_REDIRECT))) {
160 |                         *d++ = ch;
161 |                         goto done;
162 |                     }
163 | 
164 |                     if (ch == '%') {
165 |                         state = sw_quoted;
166 |                         break;
167 |                     }
168 | 
169 |                     *d++ = ch;
170 |                     break;
171 |                 case sw_quoted:
172 |                     if (ch >= '0' && ch <= '9') {
173 |                         decoded = (u_char) (ch - '0');
174 |                         state = sw_quoted_second;
175 |                         break;
176 |                     }
177 | 
178 |                     c = (u_char) (ch | 0x20);
179 |                     if (c >= 'a' && c <= 'f') {
180 |                         decoded = (u_char) (c - 'a' + 10);
181 |                         state = sw_quoted_second;
182 |                         break;
183 |                     }
184 | 
185 |                     /* the invalid quoted character */
186 |                     bad++;
187 |                     state = sw_usual;
188 |                     *d++ = '%';
189 |                     *d++ = ch;
190 |                     break;
191 | 
192 |                 case sw_quoted_second:
193 |                     state = sw_usual;
194 |                     if (ch >= '0' && ch <= '9') {
195 |                         ch = (u_char) ((decoded << 4) + ch - '0');
196 | 
197 |                         if (type & UNESCAPE_REDIRECT) {
198 |                             if (ch > '%' && ch < 0x7f) {
199 |                                 *d++ = ch;
200 |                                 break;
201 |                             }
202 | 
203 |                             *d++ = '%';
204 |                             *d++ = *(s - 2);
205 |                             *d++ = *(s - 1);
206 | 
207 |                             break;
208 |                         }
209 | 
210 |                         *d++ = ch;
211 | 
212 |                         break;
213 |                     }
214 | 
215 |                     c = (u_char) (ch | 0x20);
216 |                     if (c >= 'a' && c <= 'f') {
217 |                         ch = (u_char) ((decoded << 4) + c - 'a' + 10);
218 | 
219 |                         if (type & UNESCAPE_URI) {
220 |                             if (ch == '?') {
221 |                                 *d++ = ch;
222 |                                 goto done;
223 |                             }
224 | 
225 |                             *d++ = ch;
226 |                             break;
227 |                         }
228 | 
229 |                         if (type & UNESCAPE_REDIRECT) {
230 |                             if (ch == '?') {
231 |                                 *d++ = ch;
232 |                                 goto done;
233 |                             }
234 | 
235 |                             if (ch > '%' && ch < 0x7f) {
236 |                                 *d++ = ch;
237 |                                 break;
238 |                             }
239 | 
240 |                             *d++ = '%';
241 |                             *d++ = *(s - 2);
242 |                             *d++ = *(s - 1);
243 |                             break;
244 |                         }
245 | 
246 |                         *d++ = ch;
247 | 
248 |                         break;
249 |                     }
250 |                     /* the invalid quoted character */
251 |                     /* as it happened in the 2nd part of quoted character,
252 |                        we need to restore the decoded char as well. */
253 |                     *d++ = '%';
254 |                     *d++ = (u_char) ((0 >= decoded && decoded < 10) ? decoded + '0' : decoded - 10 + 'a');
255 |                     *d++ = ch;
256 |                     bad++;
257 |                     break;
258 |             }
259 |         }
260 | 
261 |         done:
262 | 
263 |         *dst = d;
264 |         *src = s;
265 | 
266 |         return bad;
267 |     }
268 | 
269 |     string escapeQuotes(const string &before) {
270 |         string after;
271 |         after.reserve(before.length() + 4);
272 |         for (string::size_type i = 0; i < before.length(); ++i) {
273 |             switch (before[i]) {
274 |                 case '"':
275 |                 case '\\':
276 |                     after += '\\';
277 |                 default:
278 |                     after += before[i];
279 |             }
280 |         }
281 |         return after;
282 |     }
283 | 
284 |     /*
285 |      * Similar to Apache directive args parsing:
286 |      * \\ -> \
287 |      * \ -> \
288 |      * \" -> "
289 |      * \<any other char> -> \<any other char>
290 |      */
291 |     string unescape(const string &s) {
292 |         string res;
293 |         string::const_iterator it = s.begin();
294 |         while (it != s.end()) {
295 |             char c = *it++;
296 |             if (c == '\\' && it != s.end()) {
297 |                 char next = *it++;
298 |                 switch (next) {
299 |                     case '\\':
300 |                         c = '\\';
301 |                         break;
302 |                     case '"':
303 |                         c = '"';
304 |                         break;
305 |                     default:
306 |                         res += c;
307 |                         res += next;
308 |                         continue;
309 |                 }
310 |             }
311 |             res += c;
312 |         }
313 |         return res;
314 |     }
315 | }


--------------------------------------------------------------------------------
/deps/libinjection/libinjection_xss.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "libinjection.h"
  3 | #include "libinjection_xss.h"
  4 | #include "libinjection_html5.h"
  5 | 
  6 | #include <assert.h>
  7 | #include <stdio.h>
  8 | 
  9 | typedef enum attribute {
 10 |     TYPE_NONE
 11 |     , TYPE_BLACK     /* ban always */
 12 |     , TYPE_ATTR_URL   /* attribute value takes a URL-like object */
 13 |     , TYPE_STYLE
 14 |     , TYPE_ATTR_INDIRECT  /* attribute *name* is given in *value* */
 15 | } attribute_t;
 16 | 
 17 | 
 18 | static attribute_t is_black_attr(const char* s, size_t len);
 19 | static int is_black_tag(const char* s, size_t len);
 20 | static int is_black_url(const char* s, size_t len);
 21 | static int cstrcasecmp_with_null(const char *a, const char *b, size_t n);
 22 | static int html_decode_char_at(const char* src, size_t len, size_t* consumed);
 23 | static int htmlencode_startswith(const char* prefix, const char *src, size_t n);
 24 | 
 25 | 
 26 | typedef struct stringtype {
 27 |     const char* name;
 28 |     attribute_t atype;
 29 | } stringtype_t;
 30 | 
 31 | 
 32 | static const int gsHexDecodeMap[256] = {
 33 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 34 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 35 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 36 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 37 |     0,   1,   2,   3,   4,   5,   6,   7,   8,   9, 256, 256,
 38 |     256, 256, 256, 256, 256,  10,  11,  12,  13,  14,  15, 256,
 39 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 40 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 41 |     256,  10,  11,  12,  13,  14,  15, 256, 256, 256, 256, 256,
 42 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 43 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 44 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 45 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 46 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 47 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 48 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 49 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 50 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 51 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 52 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 53 |     256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
 54 |     256, 256, 256, 256
 55 | };
 56 | 
 57 | static int html_decode_char_at(const char* src, size_t len, size_t* consumed)
 58 | {
 59 |     int val = 0;
 60 |     size_t i;
 61 |     int ch;
 62 | 
 63 |     if (len == 0 || src == NULL) {
 64 |         *consumed = 0;
 65 |         return -1;
 66 |     }
 67 | 
 68 |     *consumed = 1;
 69 |     if (*src != '&' || len < 2) {
 70 |         return (unsigned char)(*src);
 71 |     }
 72 | 
 73 | 
 74 |     if (*(src+1) != '#') {
 75 |         /* normally this would be for named entities
 76 |          * but for this case we don't actually care
 77 |          */
 78 |         return '&';
 79 |     }
 80 | 
 81 |     if (*(src+2) == 'x' || *(src+2) == 'X') {
 82 |         ch = (unsigned char) (*(src+3));
 83 |         ch = gsHexDecodeMap[ch];
 84 |         if (ch == 256) {
 85 |             /* degenerate case  '&#[?]' */
 86 |             return '&';
 87 |         }
 88 |         val = ch;
 89 |         i = 4;
 90 |         while (i < len) {
 91 |             ch = (unsigned char) src[i];
 92 |             if (ch == ';') {
 93 |                 *consumed = i + 1;
 94 |                 return val;
 95 |             }
 96 |             ch = gsHexDecodeMap[ch];
 97 |             if (ch == 256) {
 98 |                 *consumed = i;
 99 |                 return val;
100 |             }
101 |             val = (val * 16) + ch;
102 |             if (val > 0x1000FF) {
103 |                 return '&';
104 |             }
105 |             ++i;
106 |         }
107 |         *consumed = i;
108 |         return val;
109 |     } else {
110 |         i = 2;
111 |         ch = (unsigned char) src[i];
112 |         if (ch < '0' || ch > '9') {
113 |             return '&';
114 |         }
115 |         val = ch - '0';
116 |         i += 1;
117 |         while (i < len) {
118 |             ch = (unsigned char) src[i];
119 |             if (ch == ';') {
120 |                 *consumed = i + 1;
121 |                 return val;
122 |             }
123 |             if (ch < '0' || ch > '9') {
124 |                 *consumed = i;
125 |                 return val;
126 |             }
127 |             val = (val * 10) + (ch - '0');
128 |             if (val > 0x1000FF) {
129 |                 return '&';
130 |             }
131 |             ++i;
132 |         }
133 |         *consumed = i;
134 |         return val;
135 |     }
136 | }
137 | 
138 | 
139 | /*
140 |  * view-source:
141 |  * data:
142 |  * javascript:
143 |  */
144 | static stringtype_t BLACKATTR[] = {
145 |     { "ACTION", TYPE_ATTR_URL }     /* form */
146 |     , { "ATTRIBUTENAME", TYPE_ATTR_INDIRECT } /* SVG allow indirection of attribute names */
147 |     , { "BY", TYPE_ATTR_URL }         /* SVG */
148 |     , { "BACKGROUND", TYPE_ATTR_URL } /* IE6, O11 */
149 |     , { "DATAFORMATAS", TYPE_BLACK }  /* IE */
150 |     , { "DATASRC", TYPE_BLACK }       /* IE */
151 |     , { "DYNSRC", TYPE_ATTR_URL }     /* Obsolete img attribute */
152 |     , { "FILTER", TYPE_STYLE }        /* Opera, SVG inline style */
153 |     , { "FORMACTION", TYPE_ATTR_URL } /* HTML 5 */
154 |     , { "FOLDER", TYPE_ATTR_URL }     /* Only on A tags, IE-only */
155 |     , { "FROM", TYPE_ATTR_URL }       /* SVG */
156 |     , { "HANDLER", TYPE_ATTR_URL }    /* SVG Tiny, Opera */
157 |     , { "HREF", TYPE_ATTR_URL }
158 |     , { "LOWSRC", TYPE_ATTR_URL }     /* Obsolete img attribute */
159 |     , { "POSTER", TYPE_ATTR_URL }     /* Opera 10,11 */
160 |     , { "SRC", TYPE_ATTR_URL }
161 |     , { "STYLE", TYPE_STYLE }
162 |     , { "TO", TYPE_ATTR_URL }         /* SVG */
163 |     , { "VALUES", TYPE_ATTR_URL }     /* SVG */
164 |     , { "XLINK:HREF", TYPE_ATTR_URL }
165 |     , { NULL, TYPE_NONE }
166 | };
167 | 
168 | /* xmlns */
169 | /* `xml-stylesheet` > <eval>, <if expr=> */
170 | 
171 | /*
172 |   static const char* BLACKATTR[] = {
173 |   "ATTRIBUTENAME",
174 |   "BACKGROUND",
175 |   "DATAFORMATAS",
176 |   "HREF",
177 |   "SCROLL",
178 |   "SRC",
179 |   "STYLE",
180 |   "SRCDOC",
181 |   NULL
182 |   };
183 | */
184 | 
185 | static const char* BLACKTAG[] = {
186 |     "APPLET"
187 |     /*    , "AUDIO" */
188 |     , "BASE"
189 |     , "COMMENT"  /* IE http://html5sec.org/#38 */
190 |     , "EMBED"
191 |     /*   ,  "FORM" */
192 |     , "FRAME"
193 |     , "FRAMESET"
194 |     , "HANDLER" /* Opera SVG, effectively a script tag */
195 |     , "IFRAME"
196 |     , "IMPORT"
197 |     , "ISINDEX"
198 |     , "LINK"
199 |     , "LISTENER"
200 |     /*    , "MARQUEE" */
201 |     , "META"
202 |     , "NOSCRIPT"
203 |     , "OBJECT"
204 |     , "SCRIPT"
205 |     , "STYLE"
206 |     /*    , "VIDEO" */
207 |     , "VMLFRAME"
208 |     , "XML"
209 |     , "XSS"
210 |     , NULL
211 | };
212 | 
213 | 
214 | static int cstrcasecmp_with_null(const char *a, const char *b, size_t n)
215 | {
216 |     char ca;
217 |     char cb;
218 |     /* printf("Comparing to %s %.*s\n", a, (int)n, b); */
219 |     while (n-- > 0) {
220 |         cb = *b++;
221 |         if (cb == '\0') continue;
222 | 
223 |         ca = *a++;
224 | 
225 |         if (cb >= 'a' && cb <= 'z') {
226 |             cb -= 0x20;
227 |         }
228 |         /* printf("Comparing %c vs %c with %d left\n", ca, cb, (int)n); */
229 |         if (ca != cb) {
230 |             return 1;
231 |         }
232 |     }
233 | 
234 |     if (*a == 0) {
235 |         /* printf(" MATCH \n"); */
236 |         return 0;
237 |     } else {
238 |         return 1;
239 |     }
240 | }
241 | 
242 | /*
243 |  * Does an HTML encoded  binary string (const char*, length) start with
244 |  * a all uppercase c-string (null terminated), case insensitive!
245 |  *
246 |  * also ignore any embedded nulls in the HTML string!
247 |  *
248 |  * return 1 if match / starts with
249 |  * return 0 if not
250 |  */
251 | static int htmlencode_startswith(const char *a, const char *b, size_t n)
252 | {
253 |     size_t consumed;
254 |     int cb;
255 |     int first = 1;
256 |     /* printf("Comparing %s with %.*s\n", a,(int)n,b); */
257 |     while (n > 0) {
258 |         if (*a == 0) {
259 |             /* printf("Match EOL!\n"); */
260 |             return 1;
261 |         }
262 |         cb = html_decode_char_at(b, n, &consumed);
263 |         b += consumed;
264 |         n -= consumed;
265 | 
266 |         if (first && cb <= 32) {
267 |             /* ignore all leading whitespace and control characters */
268 |             continue;
269 |         }
270 |         first = 0;
271 | 
272 |         if (cb == 0) {
273 |             /* always ignore null characters in user input */
274 |             continue;
275 |         }
276 | 
277 |         if (cb == 10) {
278 |             /* always ignore vertical tab characters in user input */
279 |             /* who allows this?? */
280 |             continue;
281 |         }
282 | 
283 |         if (cb >= 'a' && cb <= 'z') {
284 |             /* upcase */
285 |             cb -= 0x20;
286 |         }
287 | 
288 |         if (*a != (char) cb) {
289 |             /* printf("    %c != %c\n", *a, cb); */
290 |             /* mismatch */
291 |             return 0;
292 |         }
293 |         a++;
294 |     }
295 | 
296 |     return (*a == 0) ? 1 : 0;
297 | }
298 | 
299 | static int is_black_tag(const char* s, size_t len)
300 | {
301 |     const char** black;
302 | 
303 |     if (len < 3) {
304 |         return 0;
305 |     }
306 | 
307 |     black = BLACKTAG;
308 |     while (*black != NULL) {
309 |         if (cstrcasecmp_with_null(*black, s, len) == 0) {
310 |             /* printf("Got black tag %s\n", *black); */
311 |             return 1;
312 |         }
313 |         black += 1;
314 |     }
315 | 
316 |     /* anything SVG related */
317 |     if ((s[0] == 's' || s[0] == 'S') &&
318 |         (s[1] == 'v' || s[1] == 'V') &&
319 |         (s[2] == 'g' || s[2] == 'G')) {
320 |         /*        printf("Got SVG tag \n"); */
321 |         return 1;
322 |     }
323 | 
324 |     /* Anything XSL(t) related */
325 |     if ((s[0] == 'x' || s[0] == 'X') &&
326 |         (s[1] == 's' || s[1] == 'S') &&
327 |         (s[2] == 'l' || s[2] == 'L')) {
328 |         /*      printf("Got XSL tag\n"); */
329 |         return 1;
330 |     }
331 | 
332 |     return 0;
333 | }
334 | 
335 | static attribute_t is_black_attr(const char* s, size_t len)
336 | {
337 |     stringtype_t* black;
338 | 
339 |     if (len < 2) {
340 |         return TYPE_NONE;
341 |     }
342 | 
343 |     /* JavaScript on.* */
344 |     if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) {
345 |         /* printf("Got JavaScript on- attribute name\n"); */
346 |         return TYPE_BLACK;
347 |     }
348 | 
349 | 
350 |     if (len >= 5) {
351 |         /* XMLNS can be used to create arbitrary tags */
352 |         if (cstrcasecmp_with_null("XMLNS", s, 5) == 0 || cstrcasecmp_with_null("XLINK", s, 5) == 0) {
353 |             /*      printf("Got XMLNS and XLINK tags\n"); */
354 |             return TYPE_BLACK;
355 |         }
356 |     }
357 | 
358 |     black = BLACKATTR;
359 |     while (black->name != NULL) {
360 |         if (cstrcasecmp_with_null(black->name, s, len) == 0) {
361 |             /*      printf("Got banned attribute name %s\n", black->name); */
362 |             return black->atype;
363 |         }
364 |         black += 1;
365 |     }
366 | 
367 |     return TYPE_NONE;
368 | }
369 | 
370 | static int is_black_url(const char* s, size_t len)
371 | {
372 | 
373 |     static const char* data_url = "DATA";
374 |     static const char* viewsource_url = "VIEW-SOURCE";
375 | 
376 |     /* obsolete but interesting signal */
377 |     static const char* vbscript_url = "VBSCRIPT";
378 | 
379 |     /* covers JAVA, JAVASCRIPT, + colon */
380 |     static const char* javascript_url = "JAVA";
381 | 
382 |     /* skip whitespace */
383 |     while (len > 0 && (*s <= 32 || *s >= 127)) {
384 |         /*
385 |          * HEY: this is a signed character.
386 |          *  We are intentionally skipping high-bit characters too
387 |          *  since they are not ASCII, and Opera sometimes uses UTF-8 whitespace.
388 |          *
389 |          * Also in EUC-JP some of the high bytes are just ignored.
390 |          */
391 |         ++s;
392 |         --len;
393 |     }
394 | 
395 |     if (htmlencode_startswith(data_url, s, len)) {
396 |         return 1;
397 |     }
398 | 
399 |     if (htmlencode_startswith(viewsource_url, s, len)) {
400 |         return 1;
401 |     }
402 | 
403 |     if (htmlencode_startswith(javascript_url, s, len)) {
404 |         return 1;
405 |     }
406 | 
407 |     if (htmlencode_startswith(vbscript_url, s, len)) {
408 |         return 1;
409 |     }
410 |     return 0;
411 | }
412 | 
413 | int libinjection_is_xss(const char* s, size_t len, int flags)
414 | {
415 |     h5_state_t h5;
416 |     attribute_t attr = TYPE_NONE;
417 | 
418 |     libinjection_h5_init(&h5, s, len, (enum html5_flags) flags);
419 |     while (libinjection_h5_next(&h5)) {
420 |         if (h5.token_type != ATTR_VALUE) {
421 |             attr = TYPE_NONE;
422 |         }
423 | 
424 |         if (h5.token_type == DOCTYPE) {
425 |             return 1;
426 |         } else if (h5.token_type == TAG_NAME_OPEN) {
427 |             if (is_black_tag(h5.token_start, h5.token_len)) {
428 |                 return 1;
429 |             }
430 |         } else if (h5.token_type == ATTR_NAME) {
431 |             attr = is_black_attr(h5.token_start, h5.token_len);
432 |         } else if (h5.token_type == ATTR_VALUE) {
433 |             /*
434 |              * IE6,7,8 parsing works a bit differently so
435 |              * a whole <script> or other black tag might be hiding
436 |              * inside an attribute value under HTML 5 parsing
437 |              * See http://html5sec.org/#102
438 |              * to avoid doing a full reparse of the value, just
439 |              * look for "<".  This probably need adjusting to
440 |              * handle escaped characters
441 |              */
442 |             /*
443 |               if (memchr(h5.token_start, '<', h5.token_len) != NULL) {
444 |               return 1;
445 |               }
446 |             */
447 | 
448 |             switch (attr) {
449 |             case TYPE_NONE:
450 |                 break;
451 |             case TYPE_BLACK:
452 |                 return 1;
453 |             case TYPE_ATTR_URL:
454 |                 if (is_black_url(h5.token_start, h5.token_len)) {
455 |                     return 1;
456 |                 }
457 |                 break;
458 |             case TYPE_STYLE:
459 |                 return 1;
460 |             case TYPE_ATTR_INDIRECT:
461 |                 /* an attribute name is specified in a _value_ */
462 |                 if (is_black_attr(h5.token_start, h5.token_len)) {
463 |                     return 1;
464 |                 }
465 |                 break;
466 | /*
467 |   default:
468 |   assert(0);
469 | */
470 |             }
471 |             attr = TYPE_NONE;
472 |         } else if (h5.token_type == TAG_COMMENT) {
473 |             /* IE uses a "`" as a tag ending char */
474 |             if (memchr(h5.token_start, '`', h5.token_len) != NULL) {
475 |                 return 1;
476 |             }
477 | 
478 |             /* IE conditional comment */
479 |             if (h5.token_len > 3) {
480 |                 if (h5.token_start[0] == '[' &&
481 |                     (h5.token_start[1] == 'i' || h5.token_start[1] == 'I') &&
482 |                     (h5.token_start[2] == 'f' || h5.token_start[2] == 'F')) {
483 |                     return 1;
484 |                 }
485 |                 if ((h5.token_start[0] == 'x' || h5.token_start[0] == 'X') &&
486 |                     (h5.token_start[1] == 'm' || h5.token_start[1] == 'M') &&
487 |                     (h5.token_start[2] == 'l' || h5.token_start[2] == 'L')) {
488 |                     return 1;
489 |                 }
490 |             }
491 | 
492 |             if (h5.token_len > 5) {
493 |                 /*  IE <?import pseudo-tag */
494 |                 if (cstrcasecmp_with_null("IMPORT", h5.token_start, 6) == 0) {
495 |                     return 1;
496 |                 }
497 | 
498 |                 /*  XML Entity definition */
499 |                 if (cstrcasecmp_with_null("ENTITY", h5.token_start, 6) == 0) {
500 |                     return 1;
501 |                 }
502 |             }
503 |         }
504 |     }
505 |     return 0;
506 | }
507 | 
508 | 
509 | /*
510 |  * wrapper
511 |  */
512 | int libinjection_xss(const char* s, size_t len)
513 | {
514 |     if (libinjection_is_xss(s, len, DATA_STATE)) {
515 |         return 1;
516 |     }
517 |     if (libinjection_is_xss(s, len, VALUE_NO_QUOTE)) {
518 |         return 1;
519 |     }
520 |     if (libinjection_is_xss(s, len, VALUE_SINGLE_QUOTE)) {
521 |         return 1;
522 |     }
523 |     if (libinjection_is_xss(s, len, VALUE_DOUBLE_QUOTE)) {
524 |         return 1;
525 |     }
526 |     if (libinjection_is_xss(s, len, VALUE_BACK_QUOTE)) {
527 |         return 1;
528 |     }
529 | 
530 |     return 0;
531 | }
532 | 


--------------------------------------------------------------------------------
/mod_defender_body.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * \file     mod_defender_body.c
  3 |  * \authors  Kevin Guillemot
  4 |  * \version  1.0
  5 |  * \date     30/04/2018
  6 |  * \license  GPLv3
  7 |  * \brief    All body related methods used by mod_defender to save an give back data
  8 |  * \note     Most of the code present here come from mod_security2,
  9 |  *            more precisely from the file apache2/msc_reqbody.c
 10 |  */
 11 | 
 12 | 
 13 | /*************************/
 14 | /* Inclusion of .H files */
 15 | /*************************/
 16 | 
 17 | #include "mod_defender.hpp"
 18 | #include "RuntimeScanner.hpp"
 19 | 
 20 | 
 21 | /***************************/
 22 | /* Definition of fonctions */
 23 | /***************************/
 24 | 
 25 | /**
 26 |  *  Returns string representation of provided status, 200 bytes length max.
 27 |  */
 28 | static char *get_apr_error(apr_pool_t *p, apr_status_t rc) {
 29 |     char *text = (char *) apr_pcalloc(p, 201);
 30 |     if (text == NULL) return NULL;
 31 |     apr_strerror(rc, text, 200);
 32 |     return text;
 33 | }
 34 | 
 35 | /**
 36 |  * Initialize all variables used to forward request body
 37 |  */
 38 | apr_status_t body_retrieve_start(defender_t *def, char **error_msg, request_rec *r) {
 39 |     *error_msg = NULL;
 40 | 
 41 |     def->body_chunk_position = 0;
 42 |     def->body_chunk_offset = 0;
 43 | 
 44 |     def->body_chunk = (chunk_t *)apr_pcalloc(def->body_pool, sizeof(chunk_t));
 45 |     if( def->body_chunk == NULL ) {
 46 |         *error_msg = apr_psprintf(r->pool, "Failed to allocate %lu bytes for request body disk chunk.",
 47 |                                   (unsigned long)sizeof(chunk_t));
 48 |         return -1;
 49 |     }
 50 | 
 51 |     return 1;
 52 | }
 53 | 
 54 | /**
 55 |  * Returns one chunk of request body data. It stores a NULL
 56 |  * in the chunk pointer when there is no data to return. The
 57 |  * return code is 1 if more calls can be made to retrieve more
 58 |  * data, 0 if there is no more data to retrieve, or -1 on error.
 59 |  *
 60 |  * The caller can limit the amount of data returned by providing
 61 |  * a non-negative value in nbytes.
 62 |  */
 63 | apr_status_t body_retrieve(defender_t *def, chunk_t **chunk, long int nbytes, char **error_msg, request_rec *r)
 64 | {
 65 |     chunk_t **chunks;
 66 |     *error_msg = NULL;
 67 | 
 68 |     if (chunk == NULL) {
 69 |         *error_msg = apr_pstrdup(r->pool, "Internal error, retrieving request body chunk.");
 70 |         return -1;
 71 |     }
 72 |     *chunk = NULL;
 73 | 
 74 |     /* Are there any chunks left? */
 75 |     if (def->body_chunk_position >= def->body_chunks->nelts) {
 76 |         /* No more chunks. */
 77 |         return 0;
 78 |     }
 79 | 
 80 |     /* We always respond with the same chunk, just different information in it. */
 81 |     *chunk = def->body_chunk;
 82 | 
 83 |     /* Advance to the current chunk and position on the
 84 |      * next byte we need to send.
 85 |      */
 86 |     chunks = (chunk_t **)def->body_chunks->elts;
 87 |     def->body_chunk->data = chunks[def->body_chunk_position]->data + def->body_chunk_offset;
 88 | 
 89 |     if (nbytes < 0) {
 90 |         /* Send what's left in this chunk as there is no limit on the size. */
 91 |         def->body_chunk->length = chunks[def->body_chunk_position]->length;
 92 |         def->body_chunk_position++;
 93 |         def->body_chunk_offset = 0;
 94 |     } else {
 95 |         /* We have a limit we must obey. */
 96 | 
 97 |         if (chunks[def->body_chunk_position]->length - def->body_chunk_offset <= (unsigned int)nbytes)
 98 |         {
 99 |             /* If what's left in our chunk is less than the limit
100 |              * then send it all back.
101 |              */
102 |             def->body_chunk->length = chunks[def->body_chunk_position]->length - def->body_chunk_offset;
103 |             def->body_chunk_position++;
104 |             def->body_chunk_offset = 0;
105 |         } else {
106 |             /* If we have more data in our chunk, send the
107 |              * maximum bytes we can (nbytes).
108 |              */
109 |             def->body_chunk->length = nbytes;
110 |             def->body_chunk_offset += nbytes;
111 |         }
112 |     }
113 | 
114 |     /* If we've advanced beyond our last chunk then
115 |      * we have no more data to send.
116 |      */
117 |     if (def->body_chunk_position >= def->body_chunks->nelts) {
118 |         return 0; /* No more chunks. */
119 |     }
120 | 
121 |     /* More data available. */
122 |     return 1;
123 | }
124 | 
125 | /**
126 |  * Prepare to accept the request body (part 2).
127 |  */
128 | static apr_status_t body_start_init(defender_t *def, char **error_msg, request_rec *r) {
129 |     *error_msg = NULL;
130 | 
131 |     /* Prepare to store request body in memory. */
132 | 
133 |     def->body_chunks = apr_array_make(def->body_pool, 32, sizeof(chunk_t *));
134 |     if( def->body_chunks == NULL ) {
135 |         *error_msg = apr_pstrdup(r->pool, "Body start init: Failed to prepare in-memory storage.");
136 |         return -1;
137 |     }
138 | 
139 |     return 1;
140 | }
141 | 
142 | /**
143 |  * Prepare to accept the request body (part 1).
144 |  */
145 | apr_status_t body_start(defender_t *def, char **error_msg, request_rec *r) {
146 |     *error_msg = NULL;
147 |     def->body_length = 0;
148 |     def->stream_input_length = 0;
149 | 
150 |     /* Create a separate memory pool that will be used
151 |      * to allocate structures from (not data, which is allocated
152 |      * via malloc).
153 |      */
154 |     apr_pool_create(&def->body_pool, NULL);
155 | 
156 |     return body_start_init(def, error_msg, r);
157 | }
158 | 
159 | /**
160 |  *
161 |  * Store data into msr->stream_input_data to
162 |  */
163 | apr_status_t body_to_stream(defender_t *def, const char *buffer, int buflen, char **error_msg, request_rec *r) {
164 |     char *stream_input_body = NULL;
165 |     char *data = NULL;
166 |     int first_pkt = 0;
167 | 
168 |     if(def->stream_input_data == NULL)  {
169 |         def->stream_input_data = (char *)calloc(sizeof(char), def->stream_input_length + 1);
170 |         first_pkt = 1;
171 |     }
172 |     else {
173 | 
174 |         data = (char *)malloc(def->stream_input_length + 1 - buflen);
175 | 
176 |         if(data == NULL) {
177 |             *error_msg = apr_psprintf(r->pool, "Unable to allocate memory to hold request body on stream. Asked for %"
178 |                                                APR_SIZE_T_FMT " bytes.", def->stream_input_length + 1 - buflen);
179 |             return -1;
180 |         }
181 | 
182 |         memset(data, 0, def->stream_input_length + 1 - buflen);
183 |         memcpy(data, def->stream_input_data, def->stream_input_length - buflen);
184 | 
185 |         stream_input_body = (char *)realloc(def->stream_input_data, def->stream_input_length + 1);
186 | 
187 |         def->stream_input_data = (char *)stream_input_body;
188 |     }
189 | 
190 |     if (def->stream_input_data == NULL) {
191 |         if(data)    {
192 |             free(data);
193 |             data = NULL;
194 |         }
195 |         *error_msg = apr_psprintf(r->pool, "Unable to allocate memory to hold request body on stream. Asked for %"
196 |                                           APR_SIZE_T_FMT " bytes.", def->stream_input_length + 1);
197 |         return -1;
198 |     }
199 | 
200 |     memset(def->stream_input_data, 0, def->stream_input_length+1);
201 | 
202 |     if(first_pkt)   {
203 |         memcpy(def->stream_input_data, buffer, def->stream_input_length);
204 |     } else {
205 |         memcpy(def->stream_input_data, data, def->stream_input_length - buflen);
206 |         memcpy(def->stream_input_data+(def->stream_input_length - buflen), buffer, buflen);
207 |     }
208 | 
209 |     if(data)    {
210 |         free(data);
211 |         data = NULL;
212 |     }
213 | 
214 |     return 1;
215 | }
216 | 
217 | /**
218 |  * Stores one chunk of request body data in memory.
219 |  */
220 | static apr_status_t body_store_memory(defender_t *def, const char *data, apr_size_t length, char **error_msg,
221 |                                       request_rec *r) {
222 | 
223 |     *error_msg = NULL;
224 | 
225 |     /* If we're here that means we are not over the
226 |      * request body in-memory limit yet.
227 |      */
228 |     {
229 |         unsigned long int bucket_offset, bucket_left;
230 | 
231 |         bucket_offset = 0;
232 |         bucket_left = length;
233 | 
234 |         /* Although we store the request body in chunks we don't
235 |          * want to use the same chunk sizes as the incoming memory
236 |          * buffers. They are often of very small sizes and that
237 |          * would make us waste a lot of memory. That's why we
238 |          * use our own chunks of CHUNK_CAPACITY sizes.
239 |          */
240 | 
241 |         /* Loop until we empty this bucket into our chunks. */
242 |         while(bucket_left > 0) {
243 |             /* Allocate a new chunk if we have to. */
244 |             if (def->body_chunk_current == NULL) {
245 |                 def->body_chunk_current = (chunk_t *)apr_pcalloc(def->body_pool, sizeof(chunk_t));
246 |                 if( def->body_chunk_current == NULL ) {
247 |                     *error_msg = apr_psprintf(r->pool, "Input filter: Failed to allocate %lu bytes "
248 |                             "for request body chunk.", (unsigned long)sizeof(chunk_t));
249 |                     return -1;
250 |                 }
251 | 
252 |                 def->body_chunk_current->data = (char *)malloc(CHUNK_CAPACITY);
253 |                 if( def->body_chunk_current->data == NULL ) {
254 |                     *error_msg = apr_psprintf(r->pool, "Input filter: Failed to allocate %d bytes "
255 |                             "for request body chunk data.", CHUNK_CAPACITY);
256 |                     return -1;
257 |                 }
258 | 
259 |                 def->body_chunk_current->length = 0;
260 |                 def->body_chunk_current->is_permanent = 1;
261 | 
262 |                 *(const chunk_t **)apr_array_push(def->body_chunks) = def->body_chunk_current;
263 |             }
264 | 
265 |             if( bucket_left < (CHUNK_CAPACITY - def->body_chunk_current->length) ) {
266 |                 /* There's enough space in the current chunk. */
267 |                 memcpy(def->body_chunk_current->data +
268 |                        def->body_chunk_current->length, data + bucket_offset, bucket_left);
269 |                 def->body_chunk_current->length += bucket_left;
270 |                 bucket_left = 0;
271 |             } else {
272 |                 /* Fill the existing chunk. */
273 |                 unsigned long int copy_length = CHUNK_CAPACITY - def->body_chunk_current->length;
274 | 
275 |                 memcpy(def->body_chunk_current->data + def->body_chunk_current->length, data + bucket_offset, copy_length);
276 |                 bucket_offset += copy_length;
277 |                 bucket_left -= copy_length;
278 |                 def->body_chunk_current->length += copy_length;
279 | 
280 |                 /* We're done with this chunk. Setting the pointer
281 |                  * to NULL is going to force a new chunk to be allocated
282 |                  * on the next go.
283 |                  */
284 |                 def->body_chunk_current = NULL;
285 |             }
286 |         }
287 |     }
288 | 
289 |     return 1;
290 | }
291 | 
292 | /**
293 |  * Replace a bunch of chunks holding a request body with a single large chunk.
294 |  */
295 | static apr_status_t body_end_raw(defender_t *def, char **error_msg, request_rec *r) {
296 |     chunk_t **chunks, *one_chunk;
297 |     char *d;
298 |     int i, sofar;
299 | 
300 |     *error_msg = NULL;
301 | 
302 |     /* Allocate a buffer large enough to hold the request body. */
303 | 
304 |     if( def->body_length + 1 == 0 ) {
305 |         *error_msg = apr_psprintf(r->pool, "Internal error, request body length will overflow: %" APR_SIZE_T_FMT,
306 |                                             def->body_length);
307 |         return -1;
308 |     }
309 | 
310 |     def->body_buffer = (char *)malloc(def->body_length + 1);
311 |     if( def->body_buffer == NULL ) {
312 |         *error_msg = apr_psprintf(r->pool, "Unable to allocate memory to hold request body. Asked for %" APR_SIZE_T_FMT
313 |                                           " bytes.",  def->body_length + 1);
314 |         return -1;
315 |     }
316 | 
317 |     def->body_buffer[def->body_length] = '\0';
318 | 
319 |     /* Copy the data we keep in chunks into the new buffer. */
320 | 
321 |     sofar = 0;
322 |     d = def->body_buffer;
323 |     chunks = (chunk_t **)def->body_chunks->elts;
324 |     for( i = 0; i < def->body_chunks->nelts; i++ ) {
325 |         if( sofar + chunks[i]->length <= def->body_length ) {
326 |             memcpy(d, chunks[i]->data, chunks[i]->length);
327 |             d += chunks[i]->length;
328 |             sofar += chunks[i]->length;
329 |         } else {
330 |             *error_msg = apr_psprintf(r->pool, "Internal error, request body buffer overflow.");
331 |             return -1;
332 |         }
333 |     }
334 | 
335 | 
336 |     /* Now free the memory used by the chunks. */
337 | 
338 |     chunks = (chunk_t **)def->body_chunks->elts;
339 |     for( i = 0; i < def->body_chunks->nelts; i++ ) {
340 |         free(chunks[i]->data);
341 |         chunks[i]->data = NULL;
342 |     }
343 | 
344 |     /* Create a new array with only one chunk in it. */
345 | 
346 |     def->body_chunks = apr_array_make(def->body_pool, 2, sizeof(chunk_t *));
347 |     if( def->body_chunks == NULL ) {
348 |         *error_msg = apr_pstrdup(r->pool, "Failed to create structure to hold request body.");
349 |         return -1;
350 |     }
351 | 
352 |     one_chunk = (chunk_t *)apr_pcalloc(def->body_pool, sizeof(chunk_t));
353 |     one_chunk->data = def->body_buffer;
354 |     one_chunk->length = def->body_length;
355 |     one_chunk->is_permanent = 1;
356 |     *(const chunk_t **)apr_array_push(def->body_chunks) = one_chunk;
357 | 
358 |     /* FIXME : Code needed ?
359 |     if( def->txcfg->reqbody_limit > 0 && msr->txcfg->reqbody_limit < msr->msc_reqbody_length)    {
360 |         msr->msc_reqbody_length = msr->txcfg->reqbody_limit;
361 |     }
362 |     */
363 | 
364 |     return 1;
365 | }
366 | 
367 | /**
368 |  * Stops receiving the request body.
369 |  */
370 | apr_status_t body_end(defender_t *def, char **error_msg, request_rec *r) {
371 |     *error_msg = NULL;
372 | 
373 |     /* Note that we've read the body. */
374 |     def->body_read = 1;
375 | 
376 |     /* Convert to a single continous buffer, but don't do anything else. */
377 |     return body_end_raw(def, error_msg, r);
378 | }
379 | 
380 | /**
381 |  * Reads request body from a client.
382 |  */
383 | apr_status_t read_request_body(defender_t *def, char **error_msg, request_rec *r, unsigned long body_limit) {
384 | 
385 |     unsigned int finished_reading;
386 |     apr_bucket_brigade *bb_in;
387 |     apr_bucket *bucket;
388 | 
389 |     if( error_msg == NULL ) return -1;
390 |     *error_msg = NULL;
391 | 
392 |     if( def->body_should_exist != 1 ) {
393 |         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "This request does not have a body.");
394 |         return 0;
395 |     }
396 | 
397 |     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "Input filter: Reading request body.");
398 | 
399 |     if( body_start(def, error_msg, r) < 0 ) {
400 |         return -1;
401 |     }
402 | 
403 |     finished_reading = 0;
404 |     def->if_seen_eos = 0;
405 |     bb_in = apr_brigade_create(r->pool, r->connection->bucket_alloc);
406 |     if( bb_in == NULL ) {
407 |         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "Read body: Failed to allocate new brigade.");
408 |         return -1;
409 |     }
410 |     do {
411 |         apr_status_t rc;
412 | 
413 |         rc = ap_get_brigade(r->input_filters, bb_in, AP_MODE_READBYTES, APR_BLOCK_READ, HUGE_STRING_LEN);
414 |         if (rc != APR_SUCCESS) {
415 |             /* NOTE Apache returns AP_FILTER_ERROR here when the request is
416 |              *      too large and APR_EGENERAL when the client disconnects.
417 |              */
418 |             switch(rc) {
419 |                 case APR_INCOMPLETE :
420 |                     *error_msg = apr_psprintf(r->pool, "Error reading request body: %s", get_apr_error(r->pool, rc));
421 |                     return -7;
422 |                 case APR_EOF :
423 |                     *error_msg = apr_psprintf(r->pool, "Error reading request body: %s", get_apr_error(r->pool, rc));
424 |                     return -6;
425 |                 case APR_TIMEUP :
426 |                     *error_msg = apr_psprintf(r->pool, "Error reading request body: %s", get_apr_error(r->pool, rc));
427 |                     return -4;
428 |                 case AP_FILTER_ERROR :
429 |                     *error_msg = apr_psprintf(r->pool, "Error reading request body: HTTP Error 413 - Request entity too large. (Most likely.)");
430 |                     return -3;
431 |                 case APR_EGENERAL :
432 |                     *error_msg = apr_psprintf(r->pool, "Error reading request body: Client went away.");
433 |                     return -2;
434 |                 default :
435 |                     *error_msg = apr_psprintf(r->pool, "Error reading request body: %s", get_apr_error(r->pool, rc));
436 |                     return -1;
437 |             }
438 |         }
439 | 
440 |         /* Loop through the buckets in the brigade in order
441 |          * to extract the size of the data available.
442 |          */
443 |         for( bucket = APR_BRIGADE_FIRST(bb_in);
444 |             bucket != APR_BRIGADE_SENTINEL(bb_in);
445 |             bucket = APR_BUCKET_NEXT(bucket) ) {
446 | 
447 |             const char *buf;
448 |             apr_size_t buflen;
449 | 
450 |             rc = apr_bucket_read(bucket, &buf, &buflen, APR_BLOCK_READ);
451 |             if( rc != APR_SUCCESS ) {
452 |                 *error_msg = apr_psprintf(r->pool, "Failed reading input / bucket (%d): %s", rc, get_apr_error(r->pool, rc));
453 |                 return -1;
454 |             }
455 | 
456 |             ap_log_rerror(APLOG_MARK, APLOG_TRACE8, 0, r, "Input filter: Bucket type %s contains %" APR_SIZE_T_FMT
457 |                                                           " bytes. Total length=%lu", bucket->type->name, buflen,
458 |                                                            def->body_length);
459 | 
460 |             /* Check request body limit (should only trigger on chunked requests). */
461 |             if( def->body_length + buflen > (apr_size_t)body_limit ) {
462 |                 *error_msg = apr_psprintf(r->pool, "Request body (%ld+%ld) is larger than the configured limit (%ld).",
463 |                                                     def->body_length, buflen, body_limit);
464 |                 return -5;
465 |             }
466 | 
467 |             def->stream_input_length += buflen;
468 |             body_to_stream(def, buf, buflen, error_msg, r);
469 | 
470 |             def->body_length += buflen;
471 | 
472 |             if( buflen != 0 ) {
473 |                 int rcbs = body_store_memory(def, buf, buflen, error_msg, r);
474 |                 if( rcbs < 0 ) {
475 |                     ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "Error while ending bb saving : %s", *error_msg);
476 |                     return -1;
477 |                 }
478 |             }
479 | 
480 |             if( APR_BUCKET_IS_EOS(bucket) ) {
481 |                 finished_reading = 1;
482 |                 def->if_seen_eos = 1;
483 |             }
484 |         }
485 | 
486 |         apr_brigade_cleanup(bb_in);
487 |     } while( !finished_reading );
488 | 
489 | 
490 |     if( body_end(def, error_msg, r) < 0 ) {
491 |         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "Error while ending bb saving : %s", *error_msg);
492 |     }
493 | 
494 |     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "Input filter: Completed receiving request body (length %"
495 |                                                   APR_SIZE_T_FMT ").", def->body_length);
496 | 
497 |     def->status = IF_STATUS_WANTS_TO_RUN;
498 | 
499 |     return 1;
500 | }
501 | 
502 | /**
503 |  *  Clear defender_t body_chunks and body_pool attributes.
504 |  *  Called when request pool is destroyed, registered with apr_pool_cleanup_register()
505 |  */
506 | apr_status_t body_clear(void *data) {
507 |     defender_t *def = (defender_t *)data;
508 | 
509 |     /* Release memory we used to store request body data. */
510 |     if( def->body_chunks != NULL) {
511 |         chunk_t **chunks = (chunk_t **)def->body_chunks->elts;
512 |         int i;
513 | 
514 |         for(i = 0; i < def->body_chunks->nelts; i++) {
515 |             if (chunks[i]->data != NULL) {
516 |                 free(chunks[i]->data);
517 |                 chunks[i]->data = NULL;
518 |             }
519 |         }
520 |     }
521 | 
522 |     if( def->body_pool != NULL ) {
523 |         apr_pool_destroy(def->body_pool);
524 |         def->body_pool = NULL;
525 |     }
526 | 
527 |     return 1;
528 | }
529 | 


--------------------------------------------------------------------------------
/deps/libinjection/libinjection_html5.c:
--------------------------------------------------------------------------------
  1 | #include "libinjection_html5.h"
  2 | 
  3 | #include <string.h>
  4 | #include <assert.h>
  5 | 
  6 | #ifdef DEBUG
  7 | #include <stdio.h>
  8 | #define TRACE() printf("%s:%d\n", __FUNCTION__, __LINE__)
  9 | #else
 10 | #define TRACE()
 11 | #endif
 12 | 
 13 | 
 14 | #define CHAR_EOF -1
 15 | #define CHAR_NULL 0
 16 | #define CHAR_BANG 33
 17 | #define CHAR_DOUBLE 34
 18 | #define CHAR_PERCENT 37
 19 | #define CHAR_SINGLE 39
 20 | #define CHAR_DASH 45
 21 | #define CHAR_SLASH 47
 22 | #define CHAR_LT 60
 23 | #define CHAR_EQUALS 61
 24 | #define CHAR_GT 62
 25 | #define CHAR_QUESTION 63
 26 | #define CHAR_RIGHTB 93
 27 | #define CHAR_TICK 96
 28 | 
 29 | /* prototypes */
 30 | 
 31 | static int h5_skip_white(h5_state_t* hs);
 32 | static int h5_is_white(char c);
 33 | static int h5_state_eof(h5_state_t* hs);
 34 | static int h5_state_data(h5_state_t* hs);
 35 | static int h5_state_tag_open(h5_state_t* hs);
 36 | static int h5_state_tag_name(h5_state_t* hs);
 37 | static int h5_state_tag_name_close(h5_state_t* hs);
 38 | static int h5_state_end_tag_open(h5_state_t* hs);
 39 | static int h5_state_self_closing_start_tag(h5_state_t* hs);
 40 | static int h5_state_attribute_name(h5_state_t* hs);
 41 | static int h5_state_after_attribute_name(h5_state_t* hs);
 42 | static int h5_state_before_attribute_name(h5_state_t* hs);
 43 | static int h5_state_before_attribute_value(h5_state_t* hs);
 44 | static int h5_state_attribute_value_double_quote(h5_state_t* hs);
 45 | static int h5_state_attribute_value_single_quote(h5_state_t* hs);
 46 | static int h5_state_attribute_value_back_quote(h5_state_t* hs);
 47 | static int h5_state_attribute_value_no_quote(h5_state_t* hs);
 48 | static int h5_state_after_attribute_value_quoted_state(h5_state_t* hs);
 49 | static int h5_state_comment(h5_state_t* hs);
 50 | static int h5_state_cdata(h5_state_t* hs);
 51 | 
 52 | 
 53 | /* 12.2.4.44 */
 54 | static int h5_state_bogus_comment(h5_state_t* hs);
 55 | static int h5_state_bogus_comment2(h5_state_t* hs);
 56 | 
 57 | /* 12.2.4.45 */
 58 | static int h5_state_markup_declaration_open(h5_state_t* hs);
 59 | 
 60 | /* 8.2.4.52 */
 61 | static int h5_state_doctype(h5_state_t* hs);
 62 | 
 63 | /**
 64 |  * public function
 65 |  */
 66 | void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags flags)
 67 | {
 68 |     memset(hs, 0, sizeof(h5_state_t));
 69 |     hs->s = s;
 70 |     hs->len = len;
 71 | 
 72 |     switch (flags) {
 73 |     case DATA_STATE:
 74 |         hs->state = h5_state_data;
 75 |         break;
 76 |     case VALUE_NO_QUOTE:
 77 |         hs->state = h5_state_before_attribute_name;
 78 |         break;
 79 |     case VALUE_SINGLE_QUOTE:
 80 |         hs->state = h5_state_attribute_value_single_quote;
 81 |         break;
 82 |     case VALUE_DOUBLE_QUOTE:
 83 |         hs->state = h5_state_attribute_value_double_quote;
 84 |         break;
 85 |     case VALUE_BACK_QUOTE:
 86 |         hs->state = h5_state_attribute_value_back_quote;
 87 |         break;
 88 |     }
 89 | }
 90 | 
 91 | /**
 92 |  * public function
 93 |  */
 94 | int libinjection_h5_next(h5_state_t* hs)
 95 | {
 96 |     assert(hs->state != NULL);
 97 |     return (*hs->state)(hs);
 98 | }
 99 | 
100 | /**
101 |  * Everything below here is private
102 |  *
103 |  */
104 | 
105 | 
106 | static int h5_is_white(char ch)
107 | {
108 |     /*
109 |      * \t = horizontal tab = 0x09
110 |      * \n = newline = 0x0A
111 |      * \v = vertical tab = 0x0B
112 |      * \f = form feed = 0x0C
113 |      * \r = cr  = 0x0D
114 |      */
115 |     return strchr(" \t\n\v\f\r", ch) != NULL;
116 | }
117 | 
118 | static int h5_skip_white(h5_state_t* hs)
119 | {
120 |     char ch;
121 |     while (hs->pos < hs->len) {
122 |         ch = hs->s[hs->pos];
123 |         switch (ch) {
124 |         case 0x00: /* IE only */
125 |         case 0x20:
126 |         case 0x09:
127 |         case 0x0A:
128 |         case 0x0B: /* IE only */
129 |         case 0x0C:
130 |         case 0x0D: /* IE only */
131 |             hs->pos += 1;
132 |             break;
133 |         default:
134 |             return ch;
135 |         }
136 |     }
137 |     return CHAR_EOF;
138 | }
139 | 
140 | static int h5_state_eof(h5_state_t* hs)
141 | {
142 |     /* eliminate unused function argument warning */
143 |     (void)hs;
144 |     return 0;
145 | }
146 | 
147 | static int h5_state_data(h5_state_t* hs)
148 | {
149 |     const char* idx;
150 | 
151 |     TRACE();
152 |     assert(hs->len >= hs->pos);
153 |     idx = (const char*) memchr(hs->s + hs->pos, CHAR_LT, hs->len - hs->pos);
154 |     if (idx == NULL) {
155 |         hs->token_start = hs->s + hs->pos;
156 |         hs->token_len = hs->len - hs->pos;
157 |         hs->token_type = DATA_TEXT;
158 |         hs->state = h5_state_eof;
159 |         if (hs->token_len == 0) {
160 |             return 0;
161 |         }
162 |     } else {
163 |         hs->token_start = hs->s + hs->pos;
164 |         hs->token_type = DATA_TEXT;
165 |         hs->token_len = (size_t)(idx - hs->s) - hs->pos;
166 |         hs->pos = (size_t)(idx - hs->s) + 1;
167 |         hs->state = h5_state_tag_open;
168 |         if (hs->token_len == 0) {
169 |             return h5_state_tag_open(hs);
170 |         }
171 |     }
172 |     return 1;
173 | }
174 | 
175 | /**
176 |  * 12 2.4.8
177 |  */
178 | static int h5_state_tag_open(h5_state_t* hs)
179 | {
180 |     char ch;
181 | 
182 |     TRACE();
183 |     ch = hs->s[hs->pos];
184 |     if (ch == CHAR_BANG) {
185 |         hs->pos += 1;
186 |         return h5_state_markup_declaration_open(hs);
187 |     } else if (ch == CHAR_SLASH) {
188 |         hs->pos += 1;
189 |         hs->is_close = 1;
190 |         return h5_state_end_tag_open(hs);
191 |     } else if (ch == CHAR_QUESTION) {
192 |         hs->pos += 1;
193 |         return h5_state_bogus_comment(hs);
194 |     } else if (ch == CHAR_PERCENT) {
195 |         /* this is not in spec.. alternative comment format used
196 |            by IE <= 9 and Safari < 4.0.3 */
197 |         hs->pos += 1;
198 |         return h5_state_bogus_comment2(hs);
199 |     } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
200 |         return h5_state_tag_name(hs);
201 |     } else if (ch == CHAR_NULL) {
202 |         /* IE-ism  NULL characters are ignored */
203 |         return h5_state_tag_name(hs);
204 |     } else {
205 |         /* user input mistake in configuring state */
206 |         if (hs->pos == 0) {
207 |             return h5_state_data(hs);
208 |         }
209 |         hs->token_start = hs->s + hs->pos - 1;
210 |         hs->token_len = 1;
211 |         hs->token_type = DATA_TEXT;
212 |         hs->state = h5_state_data;
213 |         return 1;
214 |     }
215 | }
216 | /**
217 |  * 12.2.4.9
218 |  */
219 | static int h5_state_end_tag_open(h5_state_t* hs)
220 | {
221 |     char ch;
222 | 
223 |     TRACE();
224 | 
225 |     if (hs->pos >= hs->len) {
226 |         return 0;
227 |     }
228 |     ch = hs->s[hs->pos];
229 |     if (ch == CHAR_GT) {
230 |         return h5_state_data(hs);
231 |     } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
232 |         return h5_state_tag_name(hs);
233 |     }
234 | 
235 |     hs->is_close = 0;
236 |     return h5_state_bogus_comment(hs);
237 | }
238 | /*
239 |  *
240 |  */
241 | static int h5_state_tag_name_close(h5_state_t* hs)
242 | {
243 |     TRACE();
244 |     hs->is_close = 0;
245 |     hs->token_start = hs->s + hs->pos;
246 |     hs->token_len = 1;
247 |     hs->token_type = TAG_NAME_CLOSE;
248 |     hs->pos += 1;
249 |     if (hs->pos < hs->len) {
250 |         hs->state = h5_state_data;
251 |     } else {
252 |         hs->state = h5_state_eof;
253 |     }
254 | 
255 |     return 1;
256 | }
257 | 
258 | /**
259 |  * 12.2.4.10
260 |  */
261 | static int h5_state_tag_name(h5_state_t* hs)
262 | {
263 |     char ch;
264 |     size_t pos;
265 | 
266 |     TRACE();
267 |     pos = hs->pos;
268 |     while (pos < hs->len) {
269 |         ch = hs->s[pos];
270 |         if (ch == 0) {
271 |             /* special non-standard case */
272 |             /* allow nulls in tag name   */
273 |             /* some old browsers apparently allow and ignore them */
274 |             pos += 1;
275 |         } else if (h5_is_white(ch)) {
276 |             hs->token_start = hs->s + hs->pos;
277 |             hs->token_len = pos - hs->pos;
278 |             hs->token_type = TAG_NAME_OPEN;
279 |             hs->pos = pos + 1;
280 |             hs->state = h5_state_before_attribute_name;
281 |             return 1;
282 |         } else if (ch == CHAR_SLASH) {
283 |             hs->token_start = hs->s + hs->pos;
284 |             hs->token_len = pos - hs->pos;
285 |             hs->token_type = TAG_NAME_OPEN;
286 |             hs->pos = pos + 1;
287 |             hs->state = h5_state_self_closing_start_tag;
288 |             return 1;
289 |         } else if (ch == CHAR_GT) {
290 |             hs->token_start = hs->s + hs->pos;
291 |             hs->token_len = pos - hs->pos;
292 |             if (hs->is_close) {
293 |                 hs->pos = pos + 1;
294 |                 hs->is_close = 0;
295 |                 hs->token_type = TAG_CLOSE;
296 |                 hs->state = h5_state_data;
297 |             } else {
298 |                 hs->pos = pos;
299 |                 hs->token_type = TAG_NAME_OPEN;
300 |                 hs->state = h5_state_tag_name_close;
301 |             }
302 |             return 1;
303 |         } else {
304 |             pos += 1;
305 |         }
306 |     }
307 | 
308 |     hs->token_start = hs->s + hs->pos;
309 |     hs->token_len = hs->len - hs->pos;
310 |     hs->token_type = TAG_NAME_OPEN;
311 |     hs->state = h5_state_eof;
312 |     return 1;
313 | }
314 | 
315 | /**
316 |  * 12.2.4.34
317 |  */
318 | static int h5_state_before_attribute_name(h5_state_t* hs)
319 | {
320 |     int ch;
321 | 
322 |     TRACE();
323 |     ch = h5_skip_white(hs);
324 |     switch (ch) {
325 |     case CHAR_EOF: {
326 |         return 0;
327 |     }
328 |     case CHAR_SLASH: {
329 |         hs->pos += 1;
330 |         return h5_state_self_closing_start_tag(hs);
331 |     }
332 |     case CHAR_GT: {
333 |         hs->state = h5_state_data;
334 |         hs->token_start = hs->s + hs->pos;
335 |         hs->token_len = 1;
336 |         hs->token_type = TAG_NAME_CLOSE;
337 |         hs->pos += 1;
338 |         return 1;
339 |     }
340 |     default: {
341 |         return h5_state_attribute_name(hs);
342 |     }
343 |     }
344 | }
345 | 
346 | static int h5_state_attribute_name(h5_state_t* hs)
347 | {
348 |     char ch;
349 |     size_t pos;
350 | 
351 |     TRACE();
352 |     pos = hs->pos + 1;
353 |     while (pos < hs->len) {
354 |         ch = hs->s[pos];
355 |         if (h5_is_white(ch)) {
356 |             hs->token_start = hs->s + hs->pos;
357 |             hs->token_len   = pos - hs->pos;
358 |             hs->token_type  = ATTR_NAME;
359 |             hs->state = h5_state_after_attribute_name;
360 |             hs->pos = pos + 1;
361 |             return 1;
362 |         } else if (ch == CHAR_SLASH) {
363 |             hs->token_start = hs->s + hs->pos;
364 |             hs->token_len   = pos - hs->pos;
365 |             hs->token_type  = ATTR_NAME;
366 |             hs->state = h5_state_self_closing_start_tag;
367 |             hs->pos = pos + 1;
368 |             return 1;
369 |         } else if (ch == CHAR_EQUALS) {
370 |             hs->token_start = hs->s + hs->pos;
371 |             hs->token_len   = pos - hs->pos;
372 |             hs->token_type  = ATTR_NAME;
373 |             hs->state = h5_state_before_attribute_value;
374 |             hs->pos = pos + 1;
375 |             return 1;
376 |         } else if (ch == CHAR_GT) {
377 |             hs->token_start = hs->s + hs->pos;
378 |             hs->token_len   = pos - hs->pos;
379 |             hs->token_type  = ATTR_NAME;
380 |             hs->state = h5_state_tag_name_close;
381 |             hs->pos = pos;
382 |             return 1;
383 |         } else {
384 |             pos += 1;
385 |         }
386 |     }
387 |     /* EOF */
388 |     hs->token_start = hs->s + hs->pos;
389 |     hs->token_len   = hs->len - hs->pos;
390 |     hs->token_type  = ATTR_NAME;
391 |     hs->state = h5_state_eof;
392 |     hs->pos = hs->len;
393 |     return 1;
394 | }
395 | 
396 | /**
397 |  * 12.2.4.36
398 |  */
399 | static int h5_state_after_attribute_name(h5_state_t* hs)
400 | {
401 |     int c;
402 | 
403 |     TRACE();
404 |     c = h5_skip_white(hs);
405 |     switch (c) {
406 |     case CHAR_EOF: {
407 |         return 0;
408 |     }
409 |     case CHAR_SLASH: {
410 |         hs->pos += 1;
411 |         return h5_state_self_closing_start_tag(hs);
412 |     }
413 |     case CHAR_EQUALS: {
414 |         hs->pos += 1;
415 |         return h5_state_before_attribute_value(hs);
416 |     }
417 |     case CHAR_GT: {
418 |         return h5_state_tag_name_close(hs);
419 |     }
420 |     default: {
421 |         return h5_state_attribute_name(hs);
422 |     }
423 |     }
424 | }
425 | 
426 | /**
427 |  * 12.2.4.37
428 |  */
429 | static int h5_state_before_attribute_value(h5_state_t* hs)
430 | {
431 |     int c;
432 |     TRACE();
433 | 
434 |     c = h5_skip_white(hs);
435 | 
436 |     if (c == CHAR_EOF) {
437 |         hs->state = h5_state_eof;
438 |         return 0;
439 |     }
440 | 
441 |     if (c == CHAR_DOUBLE) {
442 |         return h5_state_attribute_value_double_quote(hs);
443 |     } else if (c == CHAR_SINGLE) {
444 |         return h5_state_attribute_value_single_quote(hs);
445 |     } else if (c == CHAR_TICK) {
446 |         /* NON STANDARD IE */
447 |         return h5_state_attribute_value_back_quote(hs);
448 |     } else {
449 |         return h5_state_attribute_value_no_quote(hs);
450 |     }
451 | }
452 | 
453 | 
454 | static int h5_state_attribute_value_quote(h5_state_t* hs, char qchar)
455 | {
456 |     const char* idx;
457 | 
458 |     TRACE();
459 | 
460 |     /* skip initial quote in normal case.
461 |      * don't do this "if (pos == 0)" since it means we have started
462 |      * in a non-data state.  given an input of '><foo
463 |      * we want to make 0-length attribute name
464 |      */
465 |     if (hs->pos > 0) {
466 |         hs->pos += 1;
467 |     }
468 | 
469 | 
470 |     idx = (const char*) memchr(hs->s + hs->pos, qchar, hs->len - hs->pos);
471 |     if (idx == NULL) {
472 |         hs->token_start = hs->s + hs->pos;
473 |         hs->token_len = hs->len - hs->pos;
474 |         hs->token_type = ATTR_VALUE;
475 |         hs->state = h5_state_eof;
476 |     } else {
477 |         hs->token_start = hs->s + hs->pos;
478 |         hs->token_len = (size_t)(idx - hs->s) - hs->pos;
479 |         hs->token_type = ATTR_VALUE;
480 |         hs->state = h5_state_after_attribute_value_quoted_state;
481 |         hs->pos += hs->token_len + 1;
482 |     }
483 |     return 1;
484 | }
485 | 
486 | static
487 | int h5_state_attribute_value_double_quote(h5_state_t* hs)
488 | {
489 |     TRACE();
490 |     return h5_state_attribute_value_quote(hs, CHAR_DOUBLE);
491 | }
492 | 
493 | static
494 | int h5_state_attribute_value_single_quote(h5_state_t* hs)
495 | {
496 |     TRACE();
497 |     return h5_state_attribute_value_quote(hs, CHAR_SINGLE);
498 | }
499 | 
500 | static
501 | int h5_state_attribute_value_back_quote(h5_state_t* hs)
502 | {
503 |     TRACE();
504 |     return h5_state_attribute_value_quote(hs, CHAR_TICK);
505 | }
506 | 
507 | static int h5_state_attribute_value_no_quote(h5_state_t* hs)
508 | {
509 |     char ch;
510 |     size_t pos;
511 | 
512 |     TRACE();
513 |     pos = hs->pos;
514 |     while (pos < hs->len) {
515 |         ch = hs->s[pos];
516 |         if (h5_is_white(ch)) {
517 |             hs->token_type = ATTR_VALUE;
518 |             hs->token_start = hs->s + hs->pos;
519 |             hs->token_len = pos - hs->pos;
520 |             hs->pos = pos + 1;
521 |             hs->state = h5_state_before_attribute_name;
522 |             return 1;
523 |         } else if (ch == CHAR_GT) {
524 |             hs->token_type = ATTR_VALUE;
525 |             hs->token_start = hs->s + hs->pos;
526 |             hs->token_len = pos - hs->pos;
527 |             hs->pos = pos;
528 |             hs->state = h5_state_tag_name_close;
529 |             return 1;
530 |         }
531 |         pos += 1;
532 |     }
533 |     TRACE();
534 |     /* EOF */
535 |     hs->state = h5_state_eof;
536 |     hs->token_start = hs->s + hs->pos;
537 |     hs->token_len = hs->len - hs->pos;
538 |     hs->token_type = ATTR_VALUE;
539 |     return 1;
540 | }
541 | 
542 | /**
543 |  * 12.2.4.41
544 |  */
545 | static int h5_state_after_attribute_value_quoted_state(h5_state_t* hs)
546 | {
547 |     char ch;
548 | 
549 |     TRACE();
550 |     if (hs->pos >= hs->len) {
551 |         return 0;
552 |     }
553 |     ch = hs->s[hs->pos];
554 |     if (h5_is_white(ch)) {
555 |         hs->pos += 1;
556 |         return h5_state_before_attribute_name(hs);
557 |     } else if (ch == CHAR_SLASH) {
558 |         hs->pos += 1;
559 |         return h5_state_self_closing_start_tag(hs);
560 |     } else if (ch == CHAR_GT) {
561 |         hs->token_start = hs->s + hs->pos;
562 |         hs->token_len = 1;
563 |         hs->token_type = TAG_NAME_CLOSE;
564 |         hs->pos += 1;
565 |         hs->state = h5_state_data;
566 |         return 1;
567 |     } else {
568 |         return h5_state_before_attribute_name(hs);
569 |     }
570 | }
571 | 
572 | /**
573 |  * 12.2.4.43
574 |  */
575 | static int h5_state_self_closing_start_tag(h5_state_t* hs)
576 | {
577 |     char ch;
578 | 
579 |     TRACE();
580 |     if (hs->pos >= hs->len) {
581 |         return 0;
582 |     }
583 |     ch = hs->s[hs->pos];
584 |     if (ch == CHAR_GT) {
585 |         assert(hs->pos > 0);
586 |         hs->token_start = hs->s + hs->pos -1;
587 |         hs->token_len = 2;
588 |         hs->token_type = TAG_NAME_SELFCLOSE;
589 |         hs->state = h5_state_data;
590 |         hs->pos += 1;
591 |         return 1;
592 |     } else {
593 |         return h5_state_before_attribute_name(hs);
594 |     }
595 | }
596 | 
597 | /**
598 |  * 12.2.4.44
599 |  */
600 | static int h5_state_bogus_comment(h5_state_t* hs)
601 | {
602 |     const char* idx;
603 | 
604 |     TRACE();
605 |     idx = (const char*) memchr(hs->s + hs->pos, CHAR_GT, hs->len - hs->pos);
606 |     if (idx == NULL) {
607 |         hs->token_start = hs->s + hs->pos;
608 |         hs->token_len = hs->len - hs->pos;
609 |         hs->pos = hs->len;
610 |         hs->state = h5_state_eof;
611 |     } else {
612 |         hs->token_start = hs->s + hs->pos;
613 |         hs->token_len = (size_t)(idx - hs->s) - hs->pos;
614 |         hs->pos =  (size_t)(idx - hs->s) + 1;
615 |         hs->state = h5_state_data;
616 |     }
617 | 
618 |     hs->token_type = TAG_COMMENT;
619 |     return 1;
620 | }
621 | 
622 | /**
623 |  * 12.2.4.44 ALT
624 |  */
625 | static int h5_state_bogus_comment2(h5_state_t* hs)
626 | {
627 |     const char* idx;
628 |     size_t pos;
629 | 
630 |     TRACE();
631 |     pos = hs->pos;
632 |     while (1) {
633 |         idx = (const char*) memchr(hs->s + pos, CHAR_PERCENT, hs->len - pos);
634 |         if (idx == NULL || (idx + 1 >= hs->s + hs->len)) {
635 |             hs->token_start = hs->s + hs->pos;
636 |             hs->token_len = hs->len - hs->pos;
637 |             hs->pos = hs->len;
638 |             hs->token_type = TAG_COMMENT;
639 |             hs->state = h5_state_eof;
640 |             return 1;
641 |         }
642 | 
643 |         if (*(idx +1) != CHAR_GT) {
644 |             pos = (size_t)(idx - hs->s) + 1;
645 |             continue;
646 |         }
647 | 
648 |         /* ends in %> */
649 |         hs->token_start = hs->s + hs->pos;
650 |         hs->token_len = (size_t)(idx - hs->s) - hs->pos;
651 |         hs->pos = (size_t)(idx - hs->s) + 2;
652 |         hs->state = h5_state_data;
653 |         hs->token_type = TAG_COMMENT;
654 |         return 1;
655 |     }
656 | }
657 | 
658 | /**
659 |  * 8.2.4.45
660 |  */
661 | static int h5_state_markup_declaration_open(h5_state_t* hs)
662 | {
663 |     size_t remaining;
664 | 
665 |     TRACE();
666 |     remaining = hs->len - hs->pos;
667 |     if (remaining >= 7 &&
668 |         /* case insensitive */
669 |         (hs->s[hs->pos + 0] == 'D' || hs->s[hs->pos + 0] == 'd') &&
670 |         (hs->s[hs->pos + 1] == 'O' || hs->s[hs->pos + 1] == 'o') &&
671 |         (hs->s[hs->pos + 2] == 'C' || hs->s[hs->pos + 2] == 'c') &&
672 |         (hs->s[hs->pos + 3] == 'T' || hs->s[hs->pos + 3] == 't') &&
673 |         (hs->s[hs->pos + 4] == 'Y' || hs->s[hs->pos + 4] == 'y') &&
674 |         (hs->s[hs->pos + 5] == 'P' || hs->s[hs->pos + 5] == 'p') &&
675 |         (hs->s[hs->pos + 6] == 'E' || hs->s[hs->pos + 6] == 'e')
676 |         ) {
677 |         return h5_state_doctype(hs);
678 |     } else if (remaining >= 7 &&
679 |                /* upper case required */
680 |                hs->s[hs->pos + 0] == '[' &&
681 |                hs->s[hs->pos + 1] == 'C' &&
682 |                hs->s[hs->pos + 2] == 'D' &&
683 |                hs->s[hs->pos + 3] == 'A' &&
684 |                hs->s[hs->pos + 4] == 'T' &&
685 |                hs->s[hs->pos + 5] == 'A' &&
686 |                hs->s[hs->pos + 6] == '['
687 |         ) {
688 |         hs->pos += 7;
689 |         return h5_state_cdata(hs);
690 |     } else if (remaining >= 2 &&
691 |                hs->s[hs->pos + 0] == '-' &&
692 |                hs->s[hs->pos + 1] == '-') {
693 |         hs->pos += 2;
694 |         return h5_state_comment(hs);
695 |     }
696 | 
697 |     return h5_state_bogus_comment(hs);
698 | }
699 | 
700 | /**
701 |  * 12.2.4.48
702 |  * 12.2.4.49
703 |  * 12.2.4.50
704 |  * 12.2.4.51
705 |  *   state machine spec is confusing since it can only look
706 |  *   at one character at a time but simply it's comments end by:
707 |  *   1) EOF
708 |  *   2) ending in -->
709 |  *   3) ending in -!>
710 |  */
711 | static int h5_state_comment(h5_state_t* hs)
712 | {
713 |     char ch;
714 |     const char* idx;
715 |     size_t pos;
716 |     size_t offset;
717 |     const char* end = hs->s + hs->len;
718 | 
719 |     TRACE();
720 |     pos = hs->pos;
721 |     while (1) {
722 | 
723 |         idx = (const char*) memchr(hs->s + pos, CHAR_DASH, hs->len - pos);
724 | 
725 |         /* did not find anything or has less than 3 chars left */
726 |         if (idx == NULL || idx > hs->s + hs->len - 3) {
727 |             hs->state = h5_state_eof;
728 |             hs->token_start = hs->s + hs->pos;
729 |             hs->token_len = hs->len - hs->pos;
730 |             hs->token_type = TAG_COMMENT;
731 |             return 1;
732 |         }
733 |         offset = 1;
734 | 
735 |         /* skip all nulls */
736 |         while (idx + offset < end && *(idx + offset) == 0) {
737 |             offset += 1;
738 |         }
739 |         if (idx + offset == end) {
740 |             hs->state = h5_state_eof;
741 |             hs->token_start = hs->s + hs->pos;
742 |             hs->token_len = hs->len - hs->pos;
743 |             hs->token_type = TAG_COMMENT;
744 |             return 1;
745 |         }
746 | 
747 |         ch = *(idx + offset);
748 |         if (ch != CHAR_DASH && ch != CHAR_BANG) {
749 |             pos = (size_t)(idx - hs->s) + 1;
750 |             continue;
751 |         }
752 | 
753 |         /* need to test */
754 | #if 0
755 |         /* skip all nulls */
756 |         while (idx + offset < end && *(idx + offset) == 0) {
757 |             offset += 1;
758 |         }
759 |         if (idx + offset == end) {
760 |             hs->state = h5_state_eof;
761 |             hs->token_start = hs->s + hs->pos;
762 |             hs->token_len = hs->len - hs->pos;
763 |             hs->token_type = TAG_COMMENT;
764 |             return 1;
765 |         }
766 | #endif
767 | 
768 |         offset += 1;
769 |         if (idx + offset == end) {
770 |             hs->state = h5_state_eof;
771 |             hs->token_start = hs->s + hs->pos;
772 |             hs->token_len = hs->len - hs->pos;
773 |             hs->token_type = TAG_COMMENT;
774 |             return 1;
775 |         }
776 | 
777 | 
778 |         ch = *(idx + offset);
779 |         if (ch != CHAR_GT) {
780 |             pos = (size_t)(idx - hs->s) + 1;
781 |             continue;
782 |         }
783 |         offset += 1;
784 | 
785 |         /* ends in --> or -!> */
786 |         hs->token_start = hs->s + hs->pos;
787 |         hs->token_len = (size_t)(idx - hs->s) - hs->pos;
788 |         hs->pos = (size_t)(idx + offset - hs->s);
789 |         hs->state = h5_state_data;
790 |         hs->token_type = TAG_COMMENT;
791 |         return 1;
792 |     }
793 | }
794 | 
795 | static int h5_state_cdata(h5_state_t* hs)
796 | {
797 |     const char* idx;
798 |     size_t pos;
799 | 
800 |     TRACE();
801 |     pos = hs->pos;
802 |     while (1) {
803 |         idx = (const char*) memchr(hs->s + pos, CHAR_RIGHTB, hs->len - pos);
804 | 
805 |         /* did not find anything or has less than 3 chars left */
806 |         if (idx == NULL || idx > hs->s + hs->len - 3) {
807 |             hs->state = h5_state_eof;
808 |             hs->token_start = hs->s + hs->pos;
809 |             hs->token_len = hs->len - hs->pos;
810 |             hs->token_type = DATA_TEXT;
811 |             return 1;
812 |         } else if ( *(idx+1) == CHAR_RIGHTB && *(idx+2) == CHAR_GT) {
813 |             hs->state = h5_state_data;
814 |             hs->token_start = hs->s + hs->pos;
815 |             hs->token_len = (size_t)(idx - hs->s) - hs->pos;
816 |             hs->pos = (size_t)(idx - hs->s) + 3;
817 |             hs->token_type = DATA_TEXT;
818 |             return 1;
819 |         } else {
820 |             pos = (size_t)(idx - hs->s) + 1;
821 |         }
822 |     }
823 | }
824 | 
825 | /**
826 |  * 8.2.4.52
827 |  * http://www.w3.org/html/wg/drafts/html/master/syntax.html#doctype-state
828 |  */
829 | static int h5_state_doctype(h5_state_t* hs)
830 | {
831 |     const char* idx;
832 | 
833 |     TRACE();
834 |     hs->token_start = hs->s + hs->pos;
835 |     hs->token_type = DOCTYPE;
836 | 
837 |     idx = (const char*) memchr(hs->s + hs->pos, CHAR_GT, hs->len - hs->pos);
838 |     if (idx == NULL) {
839 |         hs->state = h5_state_eof;
840 |         hs->token_len = hs->len - hs->pos;
841 |     } else {
842 |         hs->state = h5_state_data;
843 |         hs->token_len = (size_t)(idx - hs->s) - hs->pos;
844 |         hs->pos = (size_t)(idx - hs->s) + 1;
845 |     }
846 |     return 1;
847 | }
848 | 


--------------------------------------------------------------------------------
/mod_defender.cpp:
--------------------------------------------------------------------------------
  1 | /*                       _        _       __                _
  2 |  *   _ __ ___   ___   __| |    __| | ___ / _| ___ _ __   __| | ___ _ __
  3 |  *  | '_ ` _ \ / _ \ / _` |   / _` |/ _ \ |_ / _ \ '_ \ / _` |/ _ \ '__|
  4 |  *  | | | | | | (_) | (_| |  | (_| |  __/  _|  __/ | | | (_| |  __/ |
  5 |  *  |_| |_| |_|\___/ \__,_|___\__,_|\___|_|  \___|_| |_|\__,_|\___|_|
  6 |  *                       |_____|
  7 |  *  Copyright (c) 2017 Annihil
  8 |  *  Released under the GPLv3
  9 |  */
 10 | /**
 11 |  * \file     mod_defender.c
 12 |  * \authors  Annihil, Kevin Guillemot
 13 |  * \version  2.0
 14 |  * \date     28/02/2017
 15 |  * \license  GPLv3
 16 |  * \brief    mod_defender principal code and handlers
 17 |  */
 18 | 
 19 | 
 20 | /*************************/
 21 | /* Inclusion of .H files */
 22 | /*************************/
 23 | 
 24 | #include "mod_defender.hpp"
 25 | 
 26 | 
 27 | /********************/
 28 | /* Global variables */
 29 | /********************/
 30 | 
 31 | /**
 32 |  *  Configuration structure
 33 |  */
 34 | std::vector<dir_config_t *> dir_cfgs;
 35 | 
 36 | 
 37 | /***************************/
 38 | /* Definition of functions */
 39 | /***************************/
 40 | 
 41 | /**
 42 |  *  Custom function to ensure our RuntimeScanner get's deleted at the
 43 |  *   end of the request cycle.
 44 |  */
 45 | static apr_status_t defender_delete_runtimescanner_object(void *inPtr) {
 46 |     if (inPtr)
 47 |         delete (RuntimeScanner *) inPtr;
 48 |     return OK;
 49 | }
 50 | 
 51 | /**
 52 |  *  Custom function to ensure our RuleParser get's deleted at the
 53 |  *   end of the request cycle.
 54 |  */
 55 | static apr_status_t defender_delete_ruleparser_object(void *inPtr) {
 56 |     if (inPtr) {
 57 |         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, NULL, "Unloading Defender for a loc");
 58 |         delete (RuleParser *) inPtr;
 59 |     }
 60 |     return OK;
 61 | }
 62 | 
 63 | /**
 64 |  *  This routine is called after the server finishes the configuration process.
 65 |  *  At this point the module may review and adjust its configuration
 66 |  *   settings in relation to one another and report any problems.
 67 |  *  On restart, this routine will be called only once, in the running server process.
 68 |  */
 69 | static int post_config(apr_pool_t *pconf, apr_pool_t *, apr_pool_t *, server_rec *s) {
 70 |     /* Figure out if we are here for the first time */
 71 |     void *init_flag = NULL;
 72 |     apr_pool_userdata_get(&init_flag, "defender-init-flag", s->process->pool);
 73 |     if (init_flag == NULL) { // first load
 74 |         apr_pool_userdata_set((const void *) 1, "defender-init-flag", apr_pool_cleanup_null, s->process->pool);
 75 |         tmpMainRules.clear();
 76 |     } else { // second (last) load
 77 |         string mainruleErr;
 78 |         unsigned int mainRuleCount = RuleParser::parseMainRules(tmpMainRules, mainruleErr);
 79 |         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, s, "Defender active on server %s: %d MainRules loaded",
 80 |                      s->server_hostname, mainRuleCount);
 81 |         if (!mainruleErr.empty())
 82 |             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, s, "MainRules error %s", mainruleErr.c_str());
 83 | 
 84 |         for (size_t i = 0; i < dir_cfgs.size(); i++) {
 85 |             dir_config_t *dcfg = dir_cfgs[i];
 86 |             if (dcfg->defender) {
 87 |                 dcfg->parser = new RuleParser();
 88 |                 apr_pool_cleanup_register(pconf, (void *) dcfg->parser, defender_delete_ruleparser_object,
 89 |                                           apr_pool_cleanup_null);
 90 |                 string checkruleErr;
 91 |                 dcfg->parser->parseCheckRule(dcfg->tmpCheckRules, checkruleErr);
 92 |                 string basicruleErr;
 93 |                 unsigned int basicRuleCount = dcfg->parser->parseBasicRules(dcfg->tmpBasicRules, basicruleErr);
 94 |                 ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, s,
 95 |                              "Defender active%s on loc %s: %lu CheckRules loaded, %d BasicRules loaded",
 96 |                              (dcfg->learning ? " (learning)" : ""), dcfg->loc_path, dcfg->parser->checkRules.size(),
 97 |                              basicRuleCount);
 98 |                 if (!checkruleErr.empty())
 99 |                     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, s, "CheckRule parsing error %s", checkruleErr.c_str());
100 |                 if (!basicruleErr.empty())
101 |                     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, s, "BasicRule parsing error %s", basicruleErr.c_str());
102 |                 dcfg->parser->generateHashTables();
103 |             } else {
104 |                 ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, s, "Defender scanner disabled for loc %s",
105 |                              dcfg->loc_path);
106 |             }
107 |         }
108 |     }
109 |     dir_cfgs.clear();
110 |     return OK;
111 | }
112 | 
113 | /**
114 |  *  If learning is not activated, add all mod_defender score types into env.
115 |  *  They will be retrieved into mod_security and in mod_vulture to increment global score.
116 |  */
117 | static int pass_in_env(request_rec *r, RuntimeScanner *scanner) {
118 |     // if ((scanner->block && !scanner->learning) || scanner->drop) {  // NOT USING BLOCK OR DROP IN VULTURE
119 |     if (!scanner->learning) {
120 |         for (const auto &match : scanner->matchScores) {
121 |             apr_table_set(r->subprocess_env, apr_psprintf(r->pool, "defender_%s", match.first.c_str()),
122 |                           apr_itoa(r->pool, match.second));
123 |         }
124 |     }
125 | 
126 |     return DECLINED;
127 | }
128 | 
129 | /**
130 |  *  Function used to write into error file. Used as scanner->writeLogFn attribute.
131 |  */
132 | static int write_log(void *thefile, const void *buf, size_t *nbytes) {
133 |     return apr_file_write((apr_file_t *) thefile, buf, nbytes);
134 | }
135 | 
136 | /**
137 |  *  Function from Apache httpd, used to convert 2 chars string into hex int
138 |  *   Example: "2a" => 0x2a
139 |  */
140 | static char x2c(const char *what)
141 | {
142 |     char digit;
143 | 
144 | #if !APR_CHARSET_EBCDIC
145 |     digit = ((what[0] >= 'A') ? ((what[0] & 0xdf) - 'A') + 10
146 |                               : (what[0] - '0'));
147 |     digit *= 16;
148 |     digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10
149 |                              : (what[1] - '0'));
150 | #else /*APR_CHARSET_EBCDIC*/
151 |     char xstr[5];
152 |     xstr[0]='0';
153 |     xstr[1]='x';
154 |     xstr[2]=what[0];
155 |     xstr[3]=what[1];
156 |     xstr[4]='\0';
157 |     digit = apr_xlate_conv_byte(ap_hdrs_from_ascii,
158 |                                 0xFF & strtol(xstr, NULL, 16));
159 | #endif /*APR_CHARSET_EBCDIC*/
160 |     return (digit);
161 | }
162 | 
163 | /**
164 |  *  Function from Apache httpd, used to urldecode string
165 |  *   The particularity of this function, instead of Apache's, is that the %00 is not interpreted
166 |  */
167 | static int unescape_url(char *url, const char *forbid, const char *reserved)
168 | {
169 |     int badesc, badpath;
170 |     char *x, *y;
171 | 
172 |     badesc = 0;
173 |     badpath = 0;
174 |     /* Initial scan for first '%'. Don't bother writing values before
175 |      * seeing a '%' */
176 |     y = strchr(url, '%');
177 |     if (y == NULL) {
178 |         return OK;
179 |     }
180 |     for (x = y; *y; ++x, ++y) {
181 |         if (*y != '%') {
182 |             *x = *y;
183 |         }
184 |         else {
185 |             if (!apr_isxdigit(*(y + 1)) || !apr_isxdigit(*(y + 2))) {
186 |                 badesc = 1;
187 |                 *x = '%';
188 |             }
189 |             else {
190 |                 char decoded;
191 |                 decoded = x2c(y + 1);
192 |                 if( decoded == '\0' ) {
193 |                     /* Copy-Paste the %00 - don't interpret ! */
194 |                     *x++ = *y++;
195 |                     *x++ = *y++;
196 |                     *x = *y;
197 |                     badpath = 1;
198 |                 } else if( forbid && ap_strchr_c(forbid, decoded) ) {
199 |                     badpath = 1;
200 |                     *x = decoded;
201 |                     y += 2;
202 |                 }
203 |                 else if (reserved && ap_strchr_c(reserved, decoded)) {
204 |                     *x++ = *y++;
205 |                     *x++ = *y++;
206 |                     *x = *y;
207 |                 }
208 |                 else {
209 |                     *x = decoded;
210 |                     y += 2;
211 |                 }
212 |             }
213 |         }
214 |     }
215 |     *x = '\0';
216 |     if (badesc) {
217 |         return HTTP_BAD_REQUEST;
218 |     }
219 |     else if (badpath) {
220 |         return HTTP_NOT_FOUND;
221 |     }
222 |     else {
223 |         return OK;
224 |     }
225 | }
226 | 
227 | /**
228 |  *  Function from Apache httpd, used too convert get params as string
229 |  *   into an apr_table_t struct filled in with key, value pairs GET params
230 |  */
231 | static void argstring_to_table(char *str, apr_table_t *parms)
232 | {
233 |     char *key;
234 |     char *value;
235 |     char *strtok_state;
236 | 
237 |     if (str == NULL) {
238 |         return;
239 |     }
240 | 
241 |     key = apr_strtok(str, "&", &strtok_state);
242 |     while (key) {
243 |         value = strchr(key, '=');
244 |         if (value) {
245 |             *value = '\0';      /* Split the string in two */
246 |             value++;            /* Skip passed the = */
247 |         }
248 |         else {
249 |             value = (char*)"1";
250 |         }
251 |         /* Verify return ? */
252 |         unescape_url(key, SLASHES, NULL);
253 |         unescape_url(value, SLASHES, NULL);
254 |         apr_table_set(parms, key, value);
255 |         key = apr_strtok(NULL, "&", &strtok_state);
256 |     }
257 | }
258 | 
259 | /**
260 |  *  Function from Apache httpd, used to convert request GET arguments
261 |  *   into an apr_table_t struct filled-in with key, value pairs
262 |  */
263 | void args_to_table(request_rec *r, apr_table_t **table)
264 | {
265 |     apr_table_t *t = apr_table_make(r->pool, 10);
266 |     argstring_to_table(apr_pstrdup(r->pool, r->args), t);
267 |     *table = t;
268 | }
269 | 
270 | 
271 | /**
272 |  *  This routine gives our module another chance to examine the request
273 |  *   headers and to take special action. This is the first phase whose
274 |  *   hooks' configuration directives can appear inside the <Directory>
275 |  *   and similar sections, because at this stage the URI has been mapped
276 |  *   to the filename. For example this phase can be used to block evil
277 |  *   clients, while little resources were wasted on these.
278 |  *
279 |  *  This is a RUN_ALL hook.
280 |  */
281 | static int header_parser(request_rec *r) {
282 |     // Get the module configuration
283 |     dir_config_t *dcfg = (dir_config_t *) ap_get_module_config(r->per_dir_config, &defender_module);
284 | 
285 |     // Stop if Defender not enabled
286 |     if (!dcfg->defender)
287 |         return DECLINED;
288 | 
289 |     RuntimeScanner *scanner = new RuntimeScanner(*dcfg->parser);
290 | 
291 |     // Register a C function to delete scanner at the end of the request cycle
292 |     apr_pool_cleanup_register(r->pool, (void *) scanner, defender_delete_runtimescanner_object,
293 |                               apr_pool_cleanup_null);
294 | 
295 |     // Reserve a temporary memory block from the request pool to store data between hooks
296 |     defender_config_t *pDefenderConfig = (defender_config_t *) apr_palloc(r->pool, sizeof(defender_config_t));
297 | 
298 |     // Remember our application pointer for future calls
299 |     pDefenderConfig->vpRuntimeScanner = scanner;
300 | 
301 |     // Register our config data structure for our module for retrieval later as required
302 |     ap_set_module_config(r->request_config, &defender_module, (void *) pDefenderConfig);
303 | 
304 | 
305 |     // Create our structure
306 |     defender_t *def = NULL;
307 |     def = (defender_t *)apr_pcalloc(r->pool, sizeof(defender_t));
308 |     if( def == NULL ) {
309 |         ap_log_error(APLOG_MARK, APLOG_ALERT, 0, NULL, "Failed to allocate %lu bytes for defender_t structure.",
310 |                                                         sizeof(defender_t));
311 |     }
312 | 
313 |     /* Initialise C-L */
314 |     const char *s = NULL;
315 |     long request_content_length = -1;
316 |     s = apr_table_get(r->headers_in, "Content-Length");
317 |     if (s != NULL) {
318 |         request_content_length = strtol(s, NULL, 10);
319 |     }
320 | 
321 |     /* Figure out whether this request has a body */
322 |     def->body_should_exist = 0;
323 |     if (request_content_length == -1) {
324 |         /* There's no C-L, but is chunked encoding used? */
325 |         char *transfer_encoding = (char *)apr_table_get(r->headers_in, "Transfer-Encoding");
326 |         if( (transfer_encoding != NULL) && (strcasecmp(transfer_encoding, "chunked") == 0) ) {
327 |             def->body_should_exist = 1;
328 |         }
329 |     } else {
330 |         /* C-L found */
331 |         def->body_should_exist = 1;
332 |     }
333 | 
334 | 
335 |     pDefenderConfig->def = def;
336 | 
337 |     // And register a cleanup hook
338 |     apr_pool_cleanup_register(r->pool, def, body_clear, apr_pool_cleanup_null);
339 | 
340 | 
341 |     // Set method
342 |     if (r->method_number == M_GET)
343 |         scanner->method = METHOD_GET;
344 |     else if (r->method_number == M_POST)
345 |         scanner->method = METHOD_POST;
346 |     else if (r->method_number == M_PUT)
347 |         scanner->method = METHOD_PUT;
348 | 
349 |     // Set logger info
350 |     scanner->pid = getpid();
351 |     apr_os_thread_t tid = apr_os_thread_current();
352 |     unsigned int pid_buffer_len = 16;
353 |     char pid_buffer[pid_buffer_len];
354 |     apr_snprintf(pid_buffer, pid_buffer_len, "%pT", &tid);
355 |     scanner->threadId = string(pid_buffer);
356 |     scanner->connectionId = r->connection->id;
357 |     scanner->clientIp = r->useragent_ip;
358 |     scanner->requestedHost = r->hostname;
359 |     scanner->serverHostname = r->server->server_hostname;
360 |     scanner->fullUri = r->unparsed_uri;
361 |     scanner->protocol = r->protocol;
362 |     ap_version_t vers;
363 |     ap_get_server_revision(&vers);
364 |     scanner->softwareVersion = std::to_string(vers.major) + "." + std::to_string(vers.minor) + "." +
365 |                                std::to_string(vers.patch);
366 |     scanner->logLevel = static_cast<LOG_LVL>(r->log->level);
367 |     if (scanner->logLevel >= APLOG_DEBUG)
368 |         scanner->logLevel = LOG_LVL_DEBUG;
369 |     scanner->writeLogFn = write_log;
370 |     scanner->errorLogFile = r->server->error_log;
371 |     scanner->learningLogFile = dcfg->matchlog_file;
372 |     scanner->learningJSONLogFile = dcfg->jsonmatchlog_file;
373 |     scanner->learning = dcfg->learning;
374 |     scanner->extensiveLearning = dcfg->extensive;
375 | 
376 |     // Set runtime modifiers
377 |     scanner->libinjSQL = dcfg->libinjection_sql;
378 |     scanner->libinjXSS = dcfg->libinjection_xss;
379 |     scanner->bodyLimit = dcfg->requestBodyLimit;
380 | 
381 |     // Set the uri path
382 |     scanner->setUri(r->parsed_uri.path);
383 | 
384 |     // Pass every HTTP header received
385 |     const apr_array_header_t *headerFields = apr_table_elts(r->headers_in);
386 |     apr_table_entry_t *headerEntry = (apr_table_entry_t *) headerFields->elts;
387 |     for (int i = 0; i < headerFields->nelts; i++)
388 |         scanner->addHeader(headerEntry[i].key, headerEntry[i].val);
389 | 
390 |     // Pass GET parameters
391 |     apr_table_t *getTable = NULL;
392 |     args_to_table(r, &getTable);
393 |     const apr_array_header_t *getParams = apr_table_elts(getTable);
394 |     apr_table_entry_t *getParam = (apr_table_entry_t *) getParams->elts;
395 |     for (int i = 0; i < getParams->nelts; i++)
396 |         scanner->addGETParameter(getParam[i].key, getParam[i].val);
397 | 
398 |     // Run scanner
399 |     int ret = scanner->processHeaders();
400 | 
401 |     if (dcfg->useenv)
402 |         ret = pass_in_env(r, scanner);
403 | 
404 |     return ret;
405 | }
406 | 
407 | /**
408 |  *  This routine is called to perform any module-specific fixing of header
409 |  *   fields, et cetera.  It is invoked just before any content-handler.
410 |  *
411 |  *  This is a RUN_ALL HOOK.
412 |  */
413 | static int fixups(request_rec *r) {
414 | 
415 |     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "Fixups beginning.");
416 | 
417 |     dir_config_t *dcfg = (dir_config_t *) ap_get_module_config(r->per_dir_config, &defender_module);
418 |     // Stop if Defender not enabled
419 |     if (!dcfg->defender)
420 |         return DECLINED;
421 | 
422 |     // Stop if this is not the main request
423 |     if (r->main != NULL || r->prev != NULL)
424 |         return DECLINED;
425 | 
426 |     // Process only if POST / PUT request
427 |     if (r->method_number != M_POST && r->method_number != M_PUT)
428 |         return DECLINED;
429 | 
430 |     defender_config_t *defc = (defender_config_t *) ap_get_module_config(r->request_config, &defender_module);
431 |     RuntimeScanner *scanner = defc->vpRuntimeScanner;
432 |     defender_t *def = defc->def;
433 | 
434 |     /* Has this phase been completed already? */
435 |     if( def->fixups_done ) {
436 |         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "Internal Error: Attempted to process the request body more than once.");
437 |         return DECLINED;
438 |     }
439 |     def->fixups_done = 1;
440 | 
441 |     if (scanner->contentLengthProvided && scanner->contentLength == 0)
442 |         return scanner->processBody();
443 | 
444 |     if (scanner->contentType == CONTENT_TYPE_UNSUPPORTED)
445 |         return scanner->processBody();
446 | 
447 |     if (scanner->bodyLimitExceeded)
448 |         return scanner->processBody();
449 | 
450 |     if (scanner->transferEncodingProvided && scanner->transferEncoding == TRANSFER_ENCODING_UNSUPPORTED)
451 |         return HTTP_NOT_IMPLEMENTED;
452 | 
453 |     if (!scanner->transferEncodingProvided && !scanner->contentLengthProvided)
454 |         return HTTP_NOT_IMPLEMENTED;
455 | 
456 |     if( scanner->contentLength >= MAX_BB_SIZE ) {
457 |         ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, "Content-Length '%lu' is greater than process limit : %d",
458 |                       scanner->contentLength, MAX_BB_SIZE);
459 |         return DECLINED;
460 |     }
461 | 
462 |     // Retrieve the body
463 |     // Pre-allocate necessary bytes
464 |     scanner->body.reserve(scanner->contentLength);
465 | 
466 |     /* Read body */
467 |     int ret;
468 |     char *error_msg = NULL;
469 |     ret = read_request_body(def, &error_msg, r, dcfg->requestBodyLimit);
470 |     if( ret < 0 ) {
471 |         switch( ret ) {
472 |             case -1 :
473 |                 if( error_msg != NULL ) {
474 |                     ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "%s", error_msg);
475 |                 }
476 |                 return HTTP_INTERNAL_SERVER_ERROR;
477 |             case -4 : /* Timeout. */
478 |                 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "%s", error_msg);
479 |                 r->connection->keepalive = AP_CONN_CLOSE;
480 |                 return HTTP_REQUEST_TIME_OUT;
481 |             case -5 : /* Request body limit reached. */
482 |                 r->connection->keepalive = AP_CONN_CLOSE;
483 |                 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "%s. Deny with code (%d)", error_msg, HTTP_REQUEST_ENTITY_TOO_LARGE);
484 |                 return HTTP_REQUEST_ENTITY_TOO_LARGE;
485 |             case -6 : /* EOF when reading request body. */
486 |                 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "%s", error_msg);
487 |                 r->connection->keepalive = AP_CONN_CLOSE;
488 |                 return HTTP_BAD_REQUEST;
489 |             case -7 : /* Partial recieved */
490 |                 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "%s", error_msg);
491 |                 r->connection->keepalive = AP_CONN_CLOSE;
492 |                 return HTTP_BAD_REQUEST;
493 |             default :
494 |                 /* allow through */
495 |                 break;
496 |         }
497 |         def->body_error = 1;
498 |         def->body_error_msg = error_msg;
499 |     }
500 | 
501 |     scanner->body.append(def->stream_input_data, def->stream_input_length);
502 | 
503 | //    cerr << "[pid " << getpid() << "] read " << scanner->body.length() << " bytes, ";
504 | //    cerr << "content-length: " << scanner->contentLength << endl;
505 | //    cerr << "body: " << scanner->body << endl;
506 | 
507 |     ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, "Brigades processing completed. Process body.");
508 | 
509 |     // Run scanner
510 |     ret = scanner->processBody();
511 | 
512 |     if (dcfg->useenv)
513 |         ret = pass_in_env(r, scanner);
514 | 
515 | //    cerr << "[pid " << getpid() << "] body (" << scanner->body.length() << ") scanned" << endl;
516 | 
517 |     /* Add the input filter. */
518 |     ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, "Insert_filter: Adding input forwarding filter %s(r %pp).",
519 |             (((r->main != NULL)||(r->prev != NULL)) ? "for subrequest " : ""), r);
520 | 
521 |     ap_add_input_filter("DEFENDER_IN", NULL, r, r->connection);
522 | 
523 |     return ret;
524 | }
525 | 
526 | /**
527 |  *  This request filter will forward the previously stored
528 |  *   request body further down the chain (most likely to the
529 |  *   processing module).
530 |  */
531 | apr_status_t input_filter(ap_filter_t *f, apr_bucket_brigade *bb_out,
532 |                           ap_input_mode_t mode, apr_read_type_e block, apr_off_t nbytes)
533 | {
534 |     request_rec *r = f->r;
535 |     apr_bucket *bucket = NULL;
536 |     apr_status_t rc;
537 |     char *error_msg = NULL;
538 |     chunk_t *chunk = NULL;
539 | 
540 |     ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r, "Defender input filter begins.");
541 | 
542 |     defender_config_t *config = (defender_config_t *) ap_get_module_config(r->request_config, &defender_module);
543 |     defender_t *def = config->def;
544 | 
545 |     // Stop if this is not the main request
546 |     if (r->main != NULL || r->prev != NULL)
547 |         return DECLINED;
548 | 
549 |     // Process only if POST / PUT request
550 |     if (r->method_number != M_POST && r->method_number != M_PUT)
551 |         return DECLINED;
552 | 
553 |     if( config->def == NULL ) {
554 |         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "Internal error in input filter: structure is null !");
555 |         ap_remove_input_filter(f);
556 |         return APR_EGENERAL;
557 |     }
558 | 
559 |     if( (def->status == IF_STATUS_COMPLETE) || (def->status == IF_STATUS_NONE) ) {
560 |         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "Input filter: Input forwarding already complete, "
561 |                 "skipping (f %pp, r %pp).", f, f->r);
562 |         ap_remove_input_filter(f);
563 |         return ap_get_brigade(f->next, bb_out, mode, block, nbytes);
564 |     }
565 | 
566 |     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "Input filter: Forwarding input: mode=%d, block=%d, nbytes=%"
567 |             APR_OFF_T_FMT " (f %pp, r %pp).", mode, block, nbytes, f, f->r);
568 | 
569 |     if( def->started_forwarding == 0) {
570 |         def->started_forwarding = 1;
571 |         rc = body_retrieve_start(def, &error_msg, r);
572 |         if( rc == -1 ) {
573 |             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "%s", error_msg);
574 |             return APR_EGENERAL;
575 |         }
576 |     }
577 | 
578 |     rc = body_retrieve(def, &chunk, (unsigned int)nbytes, &error_msg, r);
579 |     if (rc == -1) {
580 |         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "%s", error_msg);
581 |         return APR_EGENERAL;
582 |     }
583 | 
584 |     if( chunk && def->stream_changed == 0 ) {
585 |         /* Copy the data we received in the chunk */
586 |         bucket = apr_bucket_heap_create(chunk->data, chunk->length, NULL, r->connection->bucket_alloc);
587 | 
588 |         if( bucket == NULL ) {
589 |             /* FIXME : Correct log level ? */
590 |             ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, "Input filter: Heap bucket is NULL.");
591 |             return APR_EGENERAL;
592 |         }
593 |         /* Append the bucket at the end of the brigade */
594 |         APR_BRIGADE_INSERT_TAIL(bb_out, bucket);
595 |         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "Input filter: Forwarded %" APR_SIZE_T_FMT " bytes.", chunk->length);
596 | 
597 |     } else if( def->stream_input_data != NULL ) {
598 | 
599 |         def->stream_changed = 0;
600 | 
601 |         bucket = apr_bucket_heap_create(def->stream_input_data, def->stream_input_length, NULL,
602 |                                         f->r->connection->bucket_alloc);
603 | 
604 |         if(def->stream_input_data != NULL) {
605 |             ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r, "Input filter: Freeing stream input data.");
606 |             free(def->stream_input_data);
607 |             def->stream_input_data = NULL;
608 |         }
609 | 
610 |         if( bucket == NULL ) {
611 |             /* FIXME : Correct log level ? */
612 |             ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, "Input filter: Heap bucket is NULL.");
613 |             return APR_EGENERAL;
614 |         }
615 |         APR_BRIGADE_INSERT_TAIL(bb_out, bucket);
616 | 
617 |         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "Input filter: Forwarded %" APR_SIZE_T_FMT " bytes.",
618 |                       def->stream_input_length);
619 |     }
620 | 
621 |     if( rc == 0 ) {
622 |         if( def->if_seen_eos ) {
623 |             bucket = apr_bucket_eos_create(f->r->connection->bucket_alloc);
624 |             if (bucket == NULL) return APR_EGENERAL;
625 |             APR_BRIGADE_INSERT_TAIL(bb_out, bucket);
626 | 
627 |             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "Input filter: Sent EOS.");
628 |         }
629 | 
630 |         /* We're done */
631 |         def->status = IF_STATUS_COMPLETE;
632 |         ap_remove_input_filter(f);
633 | 
634 |         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "Input filter: Input forwarding complete.");
635 |     }
636 | 
637 |     return APR_SUCCESS;
638 | }
639 | 
640 | /**
641 |  *  Apache callback to register our hooks.
642 |  */
643 | static void defender_register_hooks(apr_pool_t *) {
644 |     ap_hook_post_config(post_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
645 |     static const char *const aszSucc[] = {"mod_security2.c", NULL};
646 |     ap_hook_header_parser(header_parser, NULL, aszSucc, APR_HOOK_REALLY_FIRST - 20);
647 |     /* We must intervene BEFORE mod_security */
648 |     ap_hook_fixups(fixups, NULL, aszSucc, APR_HOOK_REALLY_FIRST - 20);
649 |     /* Insert input filter to give back data */
650 |     ap_register_input_filter("DEFENDER_IN", input_filter, NULL, AP_FTYPE_CONTENT_SET);
651 | }
652 | 
653 | /**
654 |  *  This function is called when the "MatchLog" configuration directive is parsed.
655 |  */
656 | static const char *set_matchlog_path(cmd_parms *cmd, void *cfg, const char *arg) {
657 |     dir_config_t *dcfg = (dir_config_t *) cfg;
658 | 
659 |     char *matchlog_path = apr_pstrdup(cmd->pool, arg);
660 | 
661 |     if (matchlog_path[0] == '|') {
662 |         const char *pipe_name = matchlog_path + 1;
663 |         piped_log *pipe_log;
664 | 
665 |         pipe_log = ap_open_piped_log(cmd->pool, pipe_name);
666 |         if (pipe_log == NULL)
667 |             return apr_psprintf(cmd->pool, "mod_defender: Failed to open the match log pipe: %s", pipe_name);
668 |         dcfg->matchlog_file = ap_piped_log_write_fd(pipe_log);
669 |     } else {
670 |         const char *file_name = ap_server_root_relative(cmd->pool, matchlog_path);
671 |         apr_status_t rc;
672 | 
673 |         rc = apr_file_open(&dcfg->matchlog_file, file_name,
674 |                            APR_WRITE | APR_APPEND | APR_CREATE | APR_BINARY,
675 |                            APR_UREAD | APR_UWRITE | APR_GREAD, cmd->pool);
676 | 
677 |         if (rc != APR_SUCCESS)
678 |             return apr_psprintf(cmd->pool, "mod_defender: Failed to open the match log file: %s", file_name);
679 |     }
680 | 
681 |     return NULL; // success
682 | }
683 | 
684 | /**
685 |  *  This function is called when the "JSONMatchLog" configuration directive is parsed.
686 |  */
687 | static const char *set_jsonerrorlog_path(cmd_parms *cmd, void *cfg, const char *arg) {
688 |     dir_config_t *dcfg = (dir_config_t *) cfg;
689 | 
690 |     char *jsonmatchlog_path = apr_pstrdup(cmd->pool, arg);
691 | 
692 |     if (jsonmatchlog_path[0] == '|') {
693 |         const char *pipe_name = jsonmatchlog_path + 1;
694 |         piped_log *pipe_log;
695 | 
696 |         pipe_log = ap_open_piped_log(cmd->pool, pipe_name);
697 |         if (pipe_log == NULL)
698 |             return apr_psprintf(cmd->pool, "mod_defender: Failed to open the json match log pipe: %s", pipe_name);
699 |         dcfg->jsonmatchlog_file = ap_piped_log_write_fd(pipe_log);
700 |     } else {
701 |         const char *file_name = ap_server_root_relative(cmd->pool, jsonmatchlog_path);
702 |         apr_status_t rc;
703 | 
704 |         rc = apr_file_open(&dcfg->jsonmatchlog_file, file_name,
705 |                            APR_WRITE | APR_APPEND | APR_CREATE | APR_BINARY,
706 |                            APR_UREAD | APR_UWRITE | APR_GREAD, cmd->pool);
707 | 
708 |         if (rc != APR_SUCCESS)
709 |             return apr_psprintf(cmd->pool, "mod_defender: Failed to open the json match log file: %s", file_name);
710 |     }
711 | 
712 |     return NULL; // success
713 | }
714 | 
715 | /**
716 |  *  This function is called when the "RequestBodyLimit" configuration directive is parsed.
717 |  */
718 | static const char *set_request_body_limit(cmd_parms *cmd, void *cfg, const char *arg) {
719 |     dir_config_t *dcfg = (dir_config_t *) cfg;
720 |     unsigned long limit = strtoul(arg, NULL, 10);
721 |     if (limit <= 0)
722 |         return apr_psprintf(cmd->pool, "mod_defender: Invalid value for RequestBodyLimit: %s", arg);
723 |     dcfg->requestBodyLimit = limit;
724 |     return NULL;
725 | }
726 | 
727 | /**
728 |  * This function is called when the "LibinjectionSQL" configuration directive is parsed.
729 |  */
730 | static const char *set_libinjection_sql_flag(cmd_parms *, void *cfg, int flag) {
731 |     dir_config_t *dcfg = (dir_config_t *) cfg;
732 |     dcfg->libinjection_sql = (bool) flag;
733 |     return NULL;
734 | }
735 | 
736 | /**
737 |  * This function is called when the "LibinjectionXSS" configuration directive is parsed.
738 |  */
739 | static const char *set_libinjection_xss_flag(cmd_parms *, void *cfg, int flag) {
740 |     dir_config_t *dcfg = (dir_config_t *) cfg;
741 |     dcfg->libinjection_xss = (bool) flag;
742 |     return NULL;
743 | }
744 | 
745 | /**
746 |  * This function is called when the "Defender" configuration directive is parsed.
747 |  */
748 | static const char *set_defender_flag(cmd_parms *, void *cfg, int flag) {
749 |     dir_config_t *dcfg = (dir_config_t *) cfg;
750 |     dcfg->defender = (bool) flag;
751 |     return NULL;
752 | }
753 | 
754 | /**
755 |  * This function is called when the "LearningMode" configuration directive is parsed.
756 |  */
757 | static const char *set_learning_flag(cmd_parms *, void *cfg, int flag) {
758 |     dir_config_t *dcfg = (dir_config_t *) cfg;
759 |     dcfg->learning = (bool) flag;
760 |     return NULL;
761 | }
762 | 
763 | /**
764 |  * This function is called when the "ExtensiveLog" configuration directive is parsed.
765 |  */
766 | static const char *set_extensive_flag(cmd_parms *, void *cfg, int flag) {
767 |     dir_config_t *dcfg = (dir_config_t *) cfg;
768 |     dcfg->extensive = (bool) flag;
769 |     return NULL;
770 | }
771 | 
772 | /**
773 |  * This function is called when the "UseEnv" configuration directive is parsed.
774 |  */
775 | static const char *set_useenv_flag(cmd_parms *, void *cfg, int flag) {
776 |     dir_config_t *dcfg = (dir_config_t *) cfg;
777 |     dcfg->useenv = (bool) flag;
778 |     return NULL;
779 | }
780 | 
781 | /**
782 |  * This function is called when the "MainRule" configuration directives are parsed.
783 |  */
784 | static const char *set_mainrules(cmd_parms *, void *, const char *line) {
785 |     tmpMainRules.push_back(string(line));
786 |     return NULL;
787 | }
788 | 
789 | /**
790 |  * This function is called when the "CheckRule" configuration directives are parsed.
791 |  */
792 | static const char *set_checkrules(cmd_parms *, void *cfg, const char *arg1, const char *arg2) {
793 |     dir_config_t *dcfg = (dir_config_t *) cfg;
794 |     dcfg->tmpCheckRules.push_back(std::make_pair(string(arg1), string(arg2)));
795 |     return NULL;
796 | }
797 | 
798 | /**
799 |  * This function is called when the "BasicRule" configuration directives are parsed.
800 |  */
801 | static const char *set_basicrules(cmd_parms *, void *cfg, const char *line) {
802 |     dir_config_t *dcfg = (dir_config_t *) cfg;
803 |     dcfg->tmpBasicRules.push_back(string(line));
804 |     return NULL;
805 | }
806 | 
807 | /**
808 |  * A declaration of the configuration directives that are supported by this module.
809 |  */
810 | static const command_rec directives[] = {
811 |     {"Defender",         (cmd_func) set_defender_flag,         NULL, ACCESS_CONF, FLAG,     "Defender toggle"},
812 |     {"MainRule",         (cmd_func) set_mainrules,             NULL, RSRC_CONF,   RAW_ARGS, "Match directive"},
813 |     {"CheckRule",        (cmd_func) set_checkrules,            NULL, ACCESS_CONF, TAKE2,    "Score directive"},
814 |     {"BasicRule",        (cmd_func) set_basicrules,            NULL, ACCESS_CONF, RAW_ARGS, "Whitelist directive"},
815 |     {"MatchLog",         (cmd_func) set_matchlog_path,         NULL, ACCESS_CONF, TAKE1,    "Path to the match log"},
816 |     {"JSONMatchLog",     (cmd_func) set_jsonerrorlog_path,     NULL, ACCESS_CONF, TAKE1,    "Path to the JSON match log"},
817 |     {"RequestBodyLimit", (cmd_func) set_request_body_limit,    NULL, ACCESS_CONF, TAKE1,    "Set Request Body Limit"},
818 |     {"LearningMode",     (cmd_func) set_learning_flag,         NULL, ACCESS_CONF, FLAG,     "Learning mode toggle"},
819 |     {"ExtensiveLog",     (cmd_func) set_extensive_flag,        NULL, ACCESS_CONF, FLAG,     "Extensive log toggle"},
820 |     {"LibinjectionSQL",  (cmd_func) set_libinjection_sql_flag, NULL, ACCESS_CONF, FLAG,     "Libinjection SQL toggle"},
821 |     {"LibinjectionXSS",  (cmd_func) set_libinjection_xss_flag, NULL, ACCESS_CONF, FLAG,     "Libinjection XSS toggle"},
822 |     {"UseEnv",           (cmd_func) set_useenv_flag,           NULL, ACCESS_CONF, FLAG,     "UseEnv toggle"},
823 |     {NULL,               NULL,                                 NULL, RSRC_CONF,   TAKE1,    NULL} /* End by an empty */
824 | };
825 | 
826 | /**
827 |  * Creates the per-server configuration records.
828 |  */
829 | static void *create_dir_config(apr_pool_t *p, char *path) {
830 |     // allocate space for the configuration structure from the provided pool p.
831 |     dir_config_t *dcfg = (dir_config_t *) apr_pcalloc(p, sizeof(dir_config_t));
832 | 
833 |     dir_cfgs.push_back(dcfg);
834 |     dcfg->loc_path = apr_pstrdup(p, path);
835 | 
836 |     dcfg->requestBodyLimit = 131072;
837 |     dcfg->learning = 1;
838 |     return dcfg;
839 | }
840 | 
841 | /* Our standard module definition.
842 |  */
843 | module AP_MODULE_DECLARE_DATA defender_module = {
844 |         STANDARD20_MODULE_STUFF,
845 |         create_dir_config,
846 |         NULL,
847 |         NULL, // create per-server configuration structures.,
848 |         NULL, // merge per-server configurations
849 |         directives, // configuration directive handlers,
850 |         defender_register_hooks // request handlers
851 | #if defined(AP_MODULE_HAS_FLAGS)
852 |         ,AP_MODULE_FLAG_ALWAYS_MERGE /* flags */
853 | #endif
854 | };


--------------------------------------------------------------------------------
/RuleParser.cpp:
--------------------------------------------------------------------------------
  1 | /*                       _        _       __                _
  2 |  *   _ __ ___   ___   __| |    __| | ___ / _| ___ _ __   __| | ___ _ __
  3 |  *  | '_ ` _ \ / _ \ / _` |   / _` |/ _ \ |_ / _ \ '_ \ / _` |/ _ \ '__|
  4 |  *  | | | | | | (_) | (_| |  | (_| |  __/  _|  __/ | | | (_| |  __/ |
  5 |  *  |_| |_| |_|\___/ \__,_|___\__,_|\___|_|  \___|_| |_|\__,_|\___|_|
  6 |  *                       |_____|
  7 |  *  Copyright (c) 2017 Annihil
  8 |  *  Released under the GPLv3
  9 |  */
 10 | 
 11 | #include "RuleParser.h"
 12 | 
 13 | vector<string> tmpMainRules;
 14 | 
 15 | vector<http_rule_t> getRules;
 16 | vector<http_rule_t> bodyRules;
 17 | vector<http_rule_t> rawBodyRules;
 18 | vector<http_rule_t> headerRules;
 19 | vector<http_rule_t> genericRules; // URL
 20 | 
 21 | RuleParser::RuleParser() {
 22 |     /* Internal rules */
 23 |     bigRequest.id = 2;
 24 |     bigRequest.logMsg = "Big request";
 25 |     bigRequest.action = BLOCK;
 26 | 
 27 |     uncommonHexEncoding.id = 10;
 28 |     uncommonHexEncoding.logMsg = "Uncommon hex encoding";
 29 |     uncommonHexEncoding.action = BLOCK;
 30 | 
 31 |     uncommonContentType.id = 11;
 32 |     uncommonContentType.logMsg = "Uncommon content type";
 33 |     uncommonContentType.action = BLOCK;
 34 | 
 35 |     uncommonUrl.id = 12;
 36 |     uncommonUrl.logMsg = "Uncommon url";
 37 |     uncommonUrl.action = BLOCK;
 38 | 
 39 |     uncommonPostFormat.id = 13;
 40 |     uncommonPostFormat.logMsg = "Uncommon post format";
 41 |     uncommonPostFormat.action = BLOCK;
 42 | 
 43 |     uncommonPostBoundary.id = 14;
 44 |     uncommonPostBoundary.logMsg = "Uncommon post boundary";
 45 |     uncommonPostBoundary.action = BLOCK;
 46 | 
 47 |     invalidJson.id = 15;
 48 |     invalidJson.logMsg = "Invalid json";
 49 |     invalidJson.action = BLOCK;
 50 | 
 51 |     emptyPostBody.id = 16;
 52 |     emptyPostBody.logMsg = "Empty post body";
 53 |     emptyPostBody.action = BLOCK;
 54 | 
 55 |     libsqliRule.id = 17;
 56 |     libsqliRule.scores.emplace_back("$LIBINJECTION_SQL", 8);
 57 |     libsqliRule.action = LOG;
 58 | 
 59 |     libxssRule.id = 18;
 60 |     libxssRule.logMsg = "Libinjection XSS";
 61 |     libxssRule.scores.emplace_back("$LIBINJECTION_XSS", 8);
 62 |     libxssRule.action = LOG;
 63 | }
 64 | 
 65 | unsigned int RuleParser::parseMainRules(vector<string> &ruleLines, string errorMsg) {
 66 |     getRules.clear();
 67 |     bodyRules.clear();
 68 |     rawBodyRules.clear();
 69 |     headerRules.clear();
 70 |     genericRules.clear();
 71 | 
 72 |     unsigned int ruleCount = 0;
 73 |     stringstream err;
 74 |     for (string &ruleLine : ruleLines) {
 75 |         bool error = false;
 76 |         DEBUG_CONF_MR("MainRule ");
 77 |         http_rule_t rule;
 78 |         rule.br.active = true;
 79 |         rule.type = MAIN_RULE;
 80 | 
 81 |         vector<string> ruleParts = parseRawDirective(ruleLine);
 82 |         for (const string &rulePart : ruleParts) {
 83 |             if (rulePart == "negative") {
 84 |                 rule.br.negative = true;
 85 |                 DEBUG_CONF_MR("negative=1 ");
 86 |             }
 87 |             else if (rulePart.substr(0, 4) == "str:") {
 88 |                 rule.br.str = rulePart.substr(4);
 89 |                 std::transform(rule.br.str.begin(), rule.br.str.end(), rule.br.str.begin(), tolower);
 90 |                 rule.br.match_type = STR;
 91 |                 DEBUG_CONF_MR("str='" << rule.br.str << "' ");
 92 |             }
 93 |             else if (rulePart.substr(0, 3) == "rx:") {
 94 |                 string rx = rulePart.substr(3);
 95 |                 std::transform(rx.begin(), rx.end(), rx.begin(), tolower);
 96 |                 try {
 97 |                     rule.br.rx = regex(rx, std::regex::optimize);
 98 |                 } catch (std::regex_error &e) {
 99 |                     err << "rx:" << rx << " " << parseCode(e.code()) << endl;
100 |                     error = true;
101 |                 }
102 |                 rule.br.match_type = RX;
103 |                 DEBUG_CONF_MR("rx='" << rx << "' ");
104 |             } else if (rulePart == "d:libinj_sql") {
105 |                 rule.br.match_type = LIBINJ_SQL;
106 |                 DEBUG_CONF_MR("d='libinj_sql' ");
107 |             } else if (rulePart == "d:libinj_xss") {
108 |                 rule.br.match_type = LIBINJ_XSS;
109 |                 DEBUG_CONF_MR("d='libinj_xss' ");
110 |             } else if (rulePart.substr(0, 4) == "msg:") {
111 |                 rule.logMsg = rulePart.substr(4);
112 |                 DEBUG_CONF_MR("msg='" << rule.logMsg << "' ");
113 |             } else if (rulePart.substr(0, 3) == "mz:") {
114 |                 string rawMatchZone = rulePart.substr(3);
115 |                 parseMatchZone(rule, rawMatchZone, err);
116 |             } else if (rulePart.substr(0, 2) == "s:") {
117 |                 string score = rulePart.substr(2);
118 |                 vector<string> scores = split(score, ',');
119 |                 DEBUG_CONF_MR("score=[");
120 |                 for (const string &sc : scores) {
121 |                     if (sc.front() == '$') { // $SCORE
122 |                         pair<string, string> scorepair = splitAtFirst(sc, ":");
123 |                         rule.scores.emplace_back(scorepair.first, std::stoul(scorepair.second));
124 |                         DEBUG_CONF_MR("'" << scorepair.first << "'='" << scorepair.second << "'");
125 |                     } else // action
126 |                         parseAction(sc, rule.action);
127 |                 }
128 |                 DEBUG_CONF_MR("] ");
129 |             } else if (rulePart.substr(0, 3) == "id:") {
130 |                 rule.id = std::stoul(rulePart.substr(3));
131 |                 DEBUG_CONF_MR("id='" << rule.id << "' ");
132 |             }
133 |         }
134 | 
135 |         if (!error) {
136 |             /*
137 |              * Naxsi has a bug that adds rules twice if there is multiple custom locations
138 |              * "issue: Multiple *_VAR lead to multiple matching"
139 |              * Handled here (enhancement)
140 |              */
141 |             if (rule.br.headersMz || rule.br.headersVarMz) { // push in headers rules
142 |                 headerRules.push_back(rule);
143 |                 DEBUG_CONF_MR("[header] ");
144 |             }
145 |             if (rule.br.bodyMz || rule.br.bodyVarMz) { // push in body match rules (POST/PUT)
146 |                 bodyRules.push_back(rule);
147 |                 DEBUG_CONF_MR("[body] ");
148 |             }
149 |             if (rule.br.rawBodyMz) { // push in raw body match rules (POST/PUT)
150 |                 rawBodyRules.push_back(rule);
151 |                 DEBUG_CONF_MR("[rawbody] ");
152 |             }
153 |             if (rule.br.urlMz || rule.br.specificUrlMz) { // push in generic rules, as it's matching the URI
154 |                 genericRules.push_back(rule);
155 |                 DEBUG_CONF_MR("[generic] ");
156 |             }
157 |             if (rule.br.argsMz ||
158 |                 rule.br.argsVarMz) { // push in GET arg rules, but we should push in POST rules too
159 |                 getRules.push_back(rule);
160 |                 DEBUG_CONF_MR("[get] ");
161 |             }
162 |         }
163 | 
164 |         if (!error)
165 |             ruleCount++;
166 |         else
167 |             err << "MainRule #" << rule.id << " skipped" << endl;
168 |         DEBUG_CONF_MR(endl);
169 |     }
170 |     errorMsg = err.str();
171 |     ruleLines.clear();
172 |     return ruleCount;
173 | }
174 | 
175 | void RuleParser::parseCheckRule(vector<pair<string, string>> &rulesArray, string errorMsg) {
176 |     stringstream err;
177 |     for (const pair<string, string> &rule : rulesArray) {
178 |         const string &equation = rule.first;
179 |         const string &action = rule.second;
180 | 
181 |         DEBUG_CONF_CR("CheckRule ");
182 |         check_rule_t chkrule;
183 |         vector<string> eqParts = split(equation, ' ');
184 | 
185 |         string tag = (std::basic_string<char, std::char_traits<char>, std::allocator<char>> &&) rtrim(eqParts[0]);
186 |         DEBUG_CONF_CR(tag << " ");
187 | 
188 |         if (eqParts[1] == ">=") {
189 |             chkrule.comparator = SUP_OR_EQUAL;
190 |             DEBUG_CONF_CR(">= ");
191 |         } else if (eqParts[1] == ">") {
192 |             chkrule.comparator = SUP;
193 |             DEBUG_CONF_CR("> ");
194 |         } else if (eqParts[1] == "<=") {
195 |             chkrule.comparator = INF_OR_EQUAL;
196 |             DEBUG_CONF_CR("<= ");
197 |         } else if (eqParts[1] == "<") {
198 |             chkrule.comparator = INF;
199 |             DEBUG_CONF_CR("< ");
200 |         }
201 | 
202 |         try {
203 |             chkrule.limit = std::stoul(eqParts[2]);
204 |             DEBUG_CONF_CR(chkrule.limit << " ");
205 |         }
206 |         catch (std::exception const &e) {
207 |             err << e.what() << " cannot convert " << eqParts[2] << " to integer" << endl;
208 |             continue;
209 |         }
210 | 
211 |         parseAction(action, chkrule.action);
212 | 
213 |         checkRules[tag] = chkrule;
214 | 
215 |         DEBUG_CONF_CR(endl);
216 |     }
217 |     errorMsg = err.str();
218 |     rulesArray.clear();
219 | }
220 | 
221 | unsigned int RuleParser::parseBasicRules(vector<string> &ruleLines, string errorMsg) {
222 |     unsigned int ruleCount = 0;
223 |     stringstream err;
224 |     for (string &ruleLine : ruleLines) {
225 |         DEBUG_CONF_BR("BasicRule ");
226 |         http_rule_t rule;
227 |         rule.type = BASIC_RULE;
228 |         rule.whitelist = true;
229 | 
230 |         vector<string> ruleParts = parseRawDirective(ruleLine);
231 |         for (const string &rulePart : ruleParts) {
232 |             if (rulePart.substr(0, 3) == "wl:") {
233 |                 string rawWhitelist = rulePart.substr(3);
234 |                 rule.wlIds = splitToInt(rawWhitelist, ',');
235 | #ifdef DEBUG_CONFIG_BASICRULE
236 |                 DEBUG_CONF_BR("wl='");
237 |                 for (const int &id : rule.wlIds)
238 |                     DEBUG_CONF_BR(id << ".");
239 |                 DEBUG_CONF_BR("' ");
240 | #endif // !DEBUG_CONFIG_BASICRULE
241 |             } else if (rulePart.substr(0, 3) == "mz:") {
242 |                 string rawMatchZone = rulePart.substr(3);
243 |                 parseMatchZone(rule, rawMatchZone, err);
244 |                 rule.br.active = true;
245 |             }
246 |         }
247 | 
248 |         whitelistRules.push_back(rule);
249 |         ruleCount++;
250 |         DEBUG_CONF_BR(endl);
251 |     }
252 |     errorMsg = err.str();
253 |     ruleLines.clear();
254 |     return ruleCount;
255 | }
256 | 
257 | void RuleParser::parseAction(string action, rule_action_t &rule_action) {
258 |     if (action == "BLOCK") {
259 |         rule_action = BLOCK;
260 |         DEBUG_CONF_ACTN("BLOCK ");
261 |     } else if (action == "DROP") {
262 |         rule_action = DROP;
263 |         DEBUG_CONF_ACTN("DROP ");
264 |     } else if (action == "ALLOW") {
265 |         rule_action = ALLOW;
266 |         DEBUG_CONF_ACTN("ALLOW ");
267 |     } else if (action == "LOG") {
268 |         rule_action = LOG;
269 |         DEBUG_CONF_ACTN("LOG ");
270 |     }
271 | }
272 | 
273 | void RuleParser::parseMatchZone(http_rule_t &rule, string &rawMatchZone, stringstream &err) {
274 |     vector<string> matchZones = split(rawMatchZone, '|');
275 |     for (const string &mz : matchZones) {
276 |         if (mz[0] != '$') {
277 |             if (mz == "ARGS") {
278 |                 rule.br.argsMz = true;
279 |                 DEBUG_CONF_MZ("ARGS ");
280 |             } else if (mz == "HEADERS") {
281 |                 rule.br.headersMz = true;
282 |                 DEBUG_CONF_MZ("HEADERS ");
283 |             } else if (mz == "URL") {
284 |                 rule.br.urlMz = true;
285 |                 DEBUG_CONF_MZ("URL ");
286 |             } else if (mz == "BODY") {
287 |                 rule.br.bodyMz = true;
288 |                 DEBUG_CONF_MZ("BODY ");
289 |             } else if (mz == "RAWBODY") {
290 |                 rule.br.rawBodyMz = true;
291 |                 DEBUG_CONF_MZ("RAWBODY ");
292 |             } else if (mz == "FILE_EXT") {
293 |                 rule.br.fileExtMz = true;
294 |                 rule.br.bodyMz = true;
295 |                 DEBUG_CONF_MZ("FILE_EXT ");
296 |             } else if (mz == "NAME") {
297 |                 rule.br.targetName = true;
298 |                 DEBUG_CONF_MZ("NAME ");
299 |             }
300 |         } else {
301 |             custom_rule_location_t customRule;
302 |             rule.br.customLocation = true;
303 |             pair<string, string> cmz = splitAtFirst(mz, ":");
304 | 
305 |             if (cmz.first == "$ARGS_VAR") {
306 |                 customRule.argsVar = true;
307 |                 rule.br.argsVarMz = true;
308 |                 DEBUG_CONF_MZ("$ARGS_VAR ");
309 |             } else if (cmz.first == "$HEADERS_VAR") {
310 |                 customRule.headersVar = true;
311 |                 rule.br.headersVarMz = true;
312 |                 DEBUG_CONF_MZ("$HEADERS_VAR ");
313 |             } else if (cmz.first == "$URL") {
314 |                 customRule.specificUrl = true;
315 |                 rule.br.specificUrlMz = true;
316 |                 DEBUG_CONF_MZ("$URL ");
317 |             } else if (cmz.first == "$BODY_VAR") {
318 |                 customRule.bodyVar = true;
319 |                 rule.br.bodyVarMz = true;
320 |                 DEBUG_CONF_MZ("$BODY_VAR ");
321 |             } else if (cmz.first == "$ARGS_VAR_X") {
322 |                 customRule.argsVar = true;
323 |                 rule.br.argsVarMz = true;
324 |                 rule.br.rxMz = true;
325 |                 DEBUG_CONF_MZ("$ARGS_VAR_X ");
326 |             } else if (cmz.first == "$HEADERS_VAR_X") {
327 |                 customRule.headersVar = true;
328 |                 rule.br.headersVarMz = true;
329 |                 rule.br.rxMz = true;
330 |                 DEBUG_CONF_MZ("$HEADERS_VAR_X ");
331 |             } else if (cmz.first == "$URL_X") {
332 |                 customRule.specificUrl = true;
333 |                 rule.br.specificUrlMz = true;
334 |                 rule.br.rxMz = true;
335 |                 DEBUG_CONF_MZ("$URL_X ");
336 |             } else if (cmz.first == "$BODY_VAR_X") {
337 |                 customRule.bodyVar = true;
338 |                 rule.br.bodyVarMz = true;
339 |                 rule.br.rxMz = true;
340 |                 DEBUG_CONF_MZ("$BODY_VAR_X ");
341 |             }
342 | 
343 |             if (!rule.br.rxMz) { // String MatchZone
344 |                 std::transform(cmz.second.begin(), cmz.second.end(), cmz.second.begin(), tolower);
345 |                 customRule.target = cmz.second;
346 |                 DEBUG_CONF_MZ("(str)" << cmz.second << " ");
347 |             } else { // Regex MatchZone
348 |                 try {
349 |                     customRule.targetRx = regex(cmz.second, std::regex::optimize);
350 |                 } catch (std::regex_error &e) {
351 |                     err << "regex_error: " << parseCode(e.code()) << endl;
352 |                     continue;
353 |                 }
354 |                 DEBUG_CONF_MZ("(rx)" << cmz.second << " ");
355 |             }
356 |             rule.br.customLocations.push_back(customRule);
357 |         }
358 |     }
359 |     DEBUG_CONF_MZ((rule.br.rxMz ? "(rxMz) " : " "));
360 | }
361 | 
362 | /* check rule, returns associed zone, as well as location index.
363 |   location index refers to $URL:bla or $ARGS_VAR:bla */
364 | void RuleParser::wlrIdentify(const http_rule_t &curr, MATCH_ZONE &zone, int &uriIndex, int &nameIndex) {
365 |     if (curr.br.bodyMz || curr.br.bodyVarMz)
366 |         zone = BODY;
367 |     else if (curr.br.headersMz || curr.br.headersVarMz)
368 |         zone = HEADERS;
369 |     else if (curr.br.argsMz || curr.br.argsVarMz)
370 |         zone = ARGS;
371 |     else if (curr.br.urlMz) /*don't assume that named $URL means zone is URL.*/
372 |         zone = URL;
373 |     else if (curr.br.fileExtMz)
374 |         zone = FILE_EXT;
375 | 
376 |     size_t i = 0;
377 |     for ( i = 0; i < curr.br.customLocations.size(); i++) {
378 |         const custom_rule_location_t &loc = curr.br.customLocations[i];
379 |         if (loc.specificUrl) {
380 |             uriIndex = i;
381 |         }
382 |         if (loc.bodyVar) {
383 |             if (nameIndex != -1) {
384 |                 DEBUG_CONF_HT("whitelist can't target more than one BODY item.");
385 |                 return;
386 |             }
387 |             nameIndex = i;
388 |             zone = BODY;
389 |         }
390 |         if (loc.headersVar) {
391 |             if (nameIndex != -1) {
392 |                 DEBUG_CONF_HT("whitelist can't target more than one HEADERS item.");
393 |                 return;
394 |             }
395 |             nameIndex = i;
396 |             zone = HEADERS;
397 |         }
398 |         if (loc.argsVar) {
399 |             if (nameIndex != -1) {
400 |                 DEBUG_CONF_HT("whitelist can't target more than one ARGS item.");
401 |                 return;
402 |             }
403 |             nameIndex = i;
404 |             zone = ARGS;
405 |         }
406 |     }
407 | }
408 | 
409 | void RuleParser::wlrFind(const http_rule_t &curr, whitelist_rule_t &father_wlr, MATCH_ZONE &zone, int &uriIndex,
410 |                          int &nameIndex) {
411 |     string fullname = "";
412 |     /* if WL targets variable name instead of content, prefix hash with '#' */
413 |     if (curr.br.targetName) {
414 |         DEBUG_CONF_WLRF("whitelist targets |NAME");
415 |         fullname += "#";
416 |     }
417 |     if (uriIndex != -1 && nameIndex != -1) { // name AND uri
418 |         DEBUG_CONF_WLRF("whitelist has uri + name");
419 |         fullname += curr.br.customLocations[uriIndex].target + "#" + curr.br.customLocations[nameIndex].target;
420 |     } else if (uriIndex != -1) { // only uri
421 |         DEBUG_CONF_WLRF("whitelist has uri");
422 |         fullname += curr.br.customLocations[uriIndex].target;
423 |     } else if (nameIndex != -1) { // only name
424 |         DEBUG_CONF_WLRF("whitelist has name");
425 |         fullname += curr.br.customLocations[nameIndex].target;
426 |     } else {
427 |         DEBUG_CONF_WLRF("wlrFind problem");
428 |         return;
429 |     }
430 | 
431 |     for (const whitelist_rule_t &wlr : tmpWlr) {
432 |         if (wlr.name == fullname && wlr.zone == zone) {
433 |             DEBUG_CONF_WLRF("found existing 'same' WL : " << wlr.name);
434 |             father_wlr = wlr;
435 |             return;
436 |         }
437 |     }
438 | 
439 |     /*
440 |     * Creates a new whitelist rule in the right place.
441 |     * setup name and zone
442 |     */
443 |     father_wlr.name = fullname;
444 |     father_wlr.zone = zone;
445 |     /* If there is URI and no name idx, specify it,
446 | 	 so that WL system won't get fooled by an argname like an URL */
447 |     if (uriIndex != -1 && nameIndex == -1)
448 |         father_wlr.uriOnly = true;
449 |     if (curr.br.targetName) // If targetName is present in son, report it
450 |         father_wlr.targetName = curr.br.targetName;
451 | }
452 | 
453 | /*
454 | ** This function will take the whitelist basicrules generated during the configuration
455 | ** parsing phase, and aggregate them to build hashtables according to the matchzones.
456 | **
457 | ** As whitelist can be in the form :
458 | ** "mz:$URL:bla|$ARGS_VAR:foo"
459 | ** "mz:$URL:bla|ARGS"
460 | ** "mz:$HEADERS_VAR:Cookie"
461 | ** ...
462 | **
463 | ** So, we will aggregate all the rules that are pointing to the same URL together,
464 | ** as well as rules targetting the same argument name / zone.
465 | */
466 | void RuleParser::generateHashTables() {
467 |     for (http_rule_t &curr_r : whitelistRules) {
468 |         int uriIndex = -1, nameIndex = -1;
469 |         MATCH_ZONE zone = UNKNOWN;
470 | 
471 |         /* no custom location at all means that the rule is disabled */
472 |         if (curr_r.br.customLocations.empty()) {
473 |             disabled_rules.push_back(curr_r);
474 |             continue;
475 |         }
476 |         wlrIdentify(curr_r, zone, uriIndex, nameIndex);
477 |         curr_r.br.zone = zone;
478 | 
479 |         /*
480 |         ** Handle regular-expression-matchzone rules :
481 |         ** Store them in a separate linked list, parsed
482 |         ** at runtime.
483 |         */
484 |         if (curr_r.br.rxMz) {
485 |             /*
486 |              * Naxsi converts custom location string target to regex target here,
487 |              * because it does not handle whitelist that mix _X elements with _VAR or $URL items.
488 |              * Not necessary ! Mod Defender supports it ;) (enhancement)
489 |              */
490 | 
491 |             rxMzWlr.push_back(curr_r);
492 |             continue;
493 |         }
494 | 
495 |         /*
496 |         ** Handle static match-zones for hashtables
497 |         */
498 |         whitelist_rule_t father_wl;
499 |         wlrFind(curr_r, father_wl, zone, uriIndex, nameIndex);
500 |         /* merge the two rules into father_wl, meaning ids. Not locations, as we are getting rid of it */
501 |         father_wl.ids.insert(father_wl.ids.end(), curr_r.wlIds.begin(), curr_r.wlIds.end());
502 | 
503 |         tmpWlr.push_back(father_wl);
504 |     }
505 | 
506 |     for (const whitelist_rule_t &wlr : tmpWlr) {
507 |         switch (wlr.zone) {
508 |             case FILE_EXT:
509 |             case BODY:
510 |                 wlBodyHash[wlr.name] = wlr;
511 |                 DEBUG_CONF_HT("body hash: " << wlr.name);
512 |                 break;
513 |             case HEADERS:
514 |                 wlHeadersHash[wlr.name] = wlr;
515 |                 DEBUG_CONF_HT("header hash: " << wlr.name);
516 |                 break;
517 |             case URL:
518 |                 wlUrlHash[wlr.name] = wlr;
519 |                 DEBUG_CONF_HT("url hash: " << wlr.name);
520 |                 break;
521 |             case ARGS:
522 |                 wlArgsHash[wlr.name] = wlr;
523 |                 DEBUG_CONF_HT("args hash: " << wlr.name);
524 |                 break;
525 |             default:
526 |                 DEBUG_CONF_HT("Unknown zone" << endl);
527 |                 return;
528 |         }
529 |         DEBUG_CONF_HT(endl);
530 |     }
531 | }
532 | 
533 | bool RuleParser::checkIds(unsigned long matchId, const vector<int> &wlIds) {
534 |     bool negative = false;
535 | 
536 |     for (auto &wlId : wlIds) {
537 |         if (wlId == matchId)
538 |             return true;
539 |         if (wlId == 0) // WHY ??
540 |             return true;
541 |         if (wlId < 0 && matchId >= 1000) { // manage negative whitelists, except for internal rules
542 |             negative = true;
543 |             if (matchId == -wlId) // negative wl excludes this one
544 |                 return false;
545 |         }
546 |     }
547 |     return negative;
548 | }
549 | 
550 | bool RuleParser::isWhitelistAdapted(whitelist_rule_t &wlrule, MATCH_ZONE zone, const http_rule_t &rule, MATCH_TYPE type,
551 |                                     bool targetName) {
552 |     if (zone == FILE_EXT)
553 |         zone = BODY; // FILE_EXT zone is just a hack, as it indeed targets BODY
554 | 
555 |     if (wlrule.targetName && !targetName) { // if whitelist targets arg name, but the rules hit content
556 |         DEBUG_CONF_WL("whitelist targets name, but rule matched content.");
557 |         return false;
558 |     }
559 |     if (!wlrule.targetName && targetName) { // if if the whitelist target contents, but the rule hit arg name
560 |         DEBUG_CONF_WL("whitelist targets content, but rule matched name.");
561 |         return false;
562 |     }
563 | 
564 | 
565 |     if (type == NAME_ONLY) {
566 |         DEBUG_CONF_WL("Name match in zone " <<
567 |                                             (zone == ARGS ? "ARGS" : zone == BODY ? "BODY" : zone == HEADERS ? "HEADERS"
568 |                                                                                                              : "UNKNOWN!!!!!"));
569 |         //False Positive, there was a whitelist that matches the argument name,
570 |         // But is was actually matching an existing URI name.
571 |         if (zone != wlrule.zone || wlrule.uriOnly) {
572 |             DEBUG_CONF_WL("bad whitelist, name match, but WL was only on URL.");
573 |             return false;
574 |         }
575 |         return (checkIds(rule.id, wlrule.ids));
576 |     }
577 |     if (type == URI_ONLY ||
578 |         type == MIXED) {
579 |         /* zone must match */
580 |         if (wlrule.uriOnly && type != URI_ONLY) {
581 |             DEBUG_CONF_WL("bad whitelist, type is URI_ONLY, but not whitelist");
582 |             return false;
583 |         }
584 | 
585 |         if (zone != wlrule.zone) {
586 |             DEBUG_CONF_WL("bad whitelist, URL match, but not zone");
587 |             return false;
588 |         }
589 | 
590 |         return (checkIds(rule.id, wlrule.ids));
591 |     }
592 |     DEBUG_CONF_WL("finished wl check, failed.");
593 | 
594 |     return false;
595 | }
596 | 
597 | bool RuleParser::isRuleWhitelisted(const http_rule_t &rule, const string &uri, const string &name, MATCH_ZONE zone,
598 |                                    bool targetName) {
599 |     /* Check if the rule is part of disabled rules for this location */
600 |     for (const http_rule_t &disabledRule : disabled_rules) {
601 |         if (checkIds(rule.id, disabledRule.wlIds)) { // Is rule disabled ?
602 |             /* If rule target nothing, it's whitelisted everywhere */
603 |             if (!(disabledRule.br.argsMz || disabledRule.br.headersMz ||
604 |                   disabledRule.br.bodyMz || disabledRule.br.urlMz)) {
605 |                 DEBUG_CONF_WL("rule " << rule.id << " not targeting any zone, whitelisted everywhere");
606 |                 return true;
607 |             }
608 | 
609 |             if (!disabledRule.br.active) { // if it doesn't specify zone, skip zone-check
610 |                 DEBUG_CONF_WL("rule " << rule.id << " not targeting any zone, skipping zone-check");
611 |                 continue;
612 |             }
613 | 
614 |             /* if exc is in name, but rule is not specificaly disabled for name (and targets a zone)  */
615 |             if (targetName != disabledRule.br.targetName)
616 |                 continue;
617 | 
618 |             switch (zone) {
619 |                 case ARGS:
620 |                     if (disabledRule.br.argsMz) {
621 |                         DEBUG_CONF_WL("rule " << rule.id << " is disabled in ARGS");
622 |                         return true;
623 |                     }
624 |                     break;
625 |                 case HEADERS:
626 |                     if (disabledRule.br.headersMz) {
627 |                         DEBUG_CONF_WL("rule " << rule.id << " is disabled in HEADERS");
628 |                         return true;
629 |                     }
630 |                     break;
631 |                 case BODY:
632 |                     if (disabledRule.br.bodyMz) {
633 |                         DEBUG_CONF_WL("rule " << rule.id << " is disabled in BODY");
634 |                         return true;
635 |                     }
636 |                     break;
637 |                 case FILE_EXT:
638 |                     if (disabledRule.br.fileExtMz) {
639 |                         DEBUG_CONF_WL("rule " << rule.id << " is disabled in FILE_EXT");
640 |                         return true;
641 |                     }
642 |                     break;
643 |                 case URL:
644 |                     if (disabledRule.br.urlMz) {
645 |                         DEBUG_CONF_WL("rule " << rule.id << " is disabled in URL zone:" << zone);
646 |                         return true;
647 |                     }
648 |                     break;
649 |                 default:
650 |                     break;
651 |             }
652 |         }
653 |     }
654 | 
655 |     whitelist_rule_t wlRule;
656 | 
657 |     /* check for ARGS_VAR:x(|NAME) whitelists. */
658 |     /* (name) or (#name) */
659 |     if (name.length() > 0) {
660 |         /* try to find in hashtables */
661 |         bool found = findWlInHash(wlRule, name, zone);
662 |         if (found && isWhitelistAdapted(wlRule, zone, rule, NAME_ONLY, targetName))
663 |             return true;
664 | 
665 |         string hashname = "#" + name;
666 |         DEBUG_CONF_WL("hashing varname [" << name << "] (rule:" << rule.id << ") - 'wl:X_VAR:" << name << "%V|NAME'");
667 |         found = findWlInHash(wlRule, hashname, zone);
668 |         if (found && isWhitelistAdapted(wlRule, zone, rule, NAME_ONLY, targetName))
669 |             return true;
670 |     }
671 | 
672 |     /* Plain URI whitelists */
673 |     /* check the URL no matter what zone we're in */
674 |     if (!wlUrlHash.empty()) {
675 |         /* mimic find_wl_in_hash, we are looking in a different hashtable */
676 |         string hashname = string(uri);
677 |         std::transform(hashname.begin(), hashname.end(), hashname.begin(), tolower);
678 |         DEBUG_CONF_WL("hashing uri [" << hashname << "] (rule:" << rule.id << ") 'wl:$URI:" << hashname << "|*'");
679 | 
680 |         unordered_map<string, whitelist_rule_t>::const_iterator it = wlUrlHash.find(hashname);
681 |         bool found = false;
682 |         if (it != wlUrlHash.end()) {
683 |             wlRule = it->second;
684 |             found = true;
685 |         }
686 | 
687 |         if (found && isWhitelistAdapted(wlRule, zone, rule, URI_ONLY, targetName))
688 |             return true;
689 |     }
690 | 
691 |     /* Lookup for $URL|URL (uri)*/
692 |     DEBUG_CONF_WL("hashing uri#1 [" << uri << "] (rule:" << rule.id << ") ($URL:X|URI)");
693 |     bool found = findWlInHash(wlRule, uri, zone);
694 |     if (found && isWhitelistAdapted(wlRule, zone, rule, URI_ONLY, targetName))
695 |         return true;
696 | 
697 |     /* Looking $URL:x|ZONE|NAME */
698 |     string hashname = "#" + uri;
699 |     DEBUG_CONF_WL("hashing uri#3 [" << hashname << "] (rule:" << rule.id << ") ($URL:X|ZONE|NAME)");
700 |     found = findWlInHash(wlRule, hashname, zone);
701 |     if (found && isWhitelistAdapted(wlRule, zone, rule, URI_ONLY, targetName))
702 |         return true;
703 | 
704 |     /* Maybe it was $URL+$VAR (uri#name) or (#uri#name) */
705 |     hashname.clear();
706 |     if (targetName) {
707 |         hashname += "#";
708 |     }
709 |     hashname += uri + "#" + name;
710 |     DEBUG_CONF_WL("hashing MIX [" << hashname << "] ($URL:x|$X_VAR:y) or ($URL:x|$X_VAR:y|NAME)");
711 |     found = findWlInHash(wlRule, hashname, zone);
712 |     if (found && isWhitelistAdapted(wlRule, zone, rule, MIXED, targetName))
713 |         return true;
714 | 
715 |     if (isRuleWhitelistedRx(rule, uri, name, zone, targetName)) {
716 |         DEBUG_CONF_WL("Whitelisted by RX !");
717 |         return true;
718 |     }
719 | 
720 |     return false;
721 | }
722 | 
723 | bool RuleParser::isRuleWhitelistedRx(const http_rule_t &rule, const string uri, const string &name,
724 |                                      MATCH_ZONE zone, bool targetName) {
725 |     /* Look it up in regexed whitelists for matchzones */
726 |     if (rxMzWlr.empty()) {
727 |         DEBUG_CONF_WL("No rx matchzone rules");
728 |         return false;
729 |     }
730 | 
731 |     for (const http_rule_t &rxMzRule : rxMzWlr) {
732 |         if (!rxMzRule.br.active || rxMzRule.br.customLocations.empty()) {
733 |             DEBUG_CONF_WL("Rule pushed to RXMZ, but has no custom_location.");
734 |             continue;
735 |         }
736 | 
737 |         /*
738 |         ** once we have pointer to the rxMzRule :
739 |         ** - go through each custom location (ie. ARGS_VAR_X:foobar*)
740 |         ** - verify that regular expressions match. If not, it means whitelist does not apply.
741 |         */
742 |         if (rxMzRule.br.zone != zone) {
743 |             DEBUG_CONF_WL("Not targeting same zone: custom rule loc zone: " << match_zones[rxMzRule.br.zone] <<
744 |                                                                             " current zone: " << match_zones[zone]);
745 |             continue;
746 |         }
747 | 
748 |         if (targetName != rxMzRule.br.targetName) {
749 |             DEBUG_CONF_WL("Only one target name");
750 |             continue;
751 |         }
752 | 
753 |         bool violation = false;
754 |         for (const custom_rule_location_t &loc : rxMzRule.br.customLocations) {
755 |             if (loc.bodyVar) {
756 |                 if (!loc.target.empty()) {
757 |                     if (name != loc.target) {
758 |                         violation = true;
759 |                         DEBUG_CONF_WL("[BODY] FAIL (str:" << name << ")");
760 |                         break;
761 |                     }
762 |                     DEBUG_CONF_WL("[BODY] Match (str:" << name << ")");
763 |                 } else {
764 |                     if (!regex_search(name, loc.targetRx)) {
765 |                         violation = true;
766 |                         DEBUG_CONF_WL("[BODY] RX FAIL (str:" << name << ")");
767 |                         break;
768 |                     }
769 |                     DEBUG_CONF_WL("[BODY] RX Match (str:" << name << ")");
770 |                 }
771 |             }
772 |             if (loc.argsVar) {
773 |                 if (!loc.target.empty()) {
774 |                     if (name != loc.target) {
775 |                         violation = true;
776 |                         DEBUG_CONF_WL("[ARGS] FAIL (str:" << name << ")");
777 |                         break;
778 |                     }
779 |                     DEBUG_CONF_WL("[ARGS] Match (str:" << name << ")");
780 |                 } else {
781 |                     if (!regex_search(name, loc.targetRx)) {
782 |                         violation = true;
783 |                         DEBUG_CONF_WL("[ARGS] RX FAIL (str:" << name << ")");
784 |                         break;
785 |                     }
786 |                     DEBUG_CONF_WL("[ARGS] RX Match (str:" << name << ")");
787 |                 }
788 |             }
789 |             if (loc.specificUrl) {
790 |                 if (!loc.target.empty()) {
791 |                     if (uri != loc.target) {
792 |                         violation = true;
793 |                         DEBUG_CONF_WL("[URI] FAIL (str:" << uri << ")");
794 |                         break;
795 |                     }
796 |                     DEBUG_CONF_WL("[URI] Match (str:" << uri << ")");
797 |                 } else {
798 |                     if (!regex_search(uri, loc.targetRx)) {
799 |                         violation = true;
800 |                         DEBUG_CONF_WL("[URI] RX FAIL (str:" << uri << ")");
801 |                         break;
802 |                     }
803 |                     DEBUG_CONF_WL("[URI] RX Match (str:" << uri << ")");
804 |                 }
805 |             }
806 |         }
807 | 
808 |         if (!violation) {
809 |             DEBUG_CONF_WL("rxMzRule whitelisted by rx");
810 |             if (checkIds(rule.id, rxMzRule.wlIds))
811 |                 return true;
812 |         }
813 |     }
814 |     return false;
815 | }
816 | 
817 | bool RuleParser::findWlInHash(whitelist_rule_t &wlRule, const string &key, MATCH_ZONE zone) {
818 |     if (zone == BODY || zone == FILE_EXT) {
819 |         unordered_map<string, whitelist_rule_t>::const_iterator it = wlBodyHash.find(key);
820 |         if (it != wlBodyHash.end()) {
821 |             wlRule = it->second;
822 |             return true;
823 |         }
824 |     } else if (zone == HEADERS) {
825 |         unordered_map<string, whitelist_rule_t>::const_iterator it = wlHeadersHash.find(key);
826 |         if (it != wlHeadersHash.end()) {
827 |             wlRule = it->second;
828 |             return true;
829 |         }
830 |     } else if (zone == URL) {
831 |         unordered_map<string, whitelist_rule_t>::const_iterator it = wlUrlHash.find(key);
832 |         if (it != wlUrlHash.end()) {
833 |             wlRule = it->second;
834 |             return true;
835 |         }
836 |     } else if (zone == ARGS) {
837 |         unordered_map<string, whitelist_rule_t>::const_iterator it = wlArgsHash.find(key);
838 |         if (it != wlArgsHash.end()) {
839 |             wlRule = it->second;
840 |             return true;
841 |         }
842 |     }
843 |     return false;
844 | }
845 | 
846 | string RuleParser::parseCode(std::regex_constants::error_type etype) {
847 |     switch (etype) {
848 |         case std::regex_constants::error_collate:
849 |             return "error_collate: invalid collating element request";
850 |         case std::regex_constants::error_ctype:
851 |             return "error_ctype: invalid character class";
852 |         case std::regex_constants::error_escape:
853 |             return "error_escape: invalid escape character or trailing escape";
854 |         case std::regex_constants::error_backref:
855 |             return "error_backref: invalid back reference";
856 |         case std::regex_constants::error_brack:
857 |             return "error_brack: mismatched bracket([ or ])";
858 |         case std::regex_constants::error_paren:
859 |             return "error_paren: mismatched parentheses(( or ))";
860 |         case std::regex_constants::error_brace:
861 |             return "error_brace: mismatched brace({ or })";
862 |         case std::regex_constants::error_badbrace:
863 |             return "error_badbrace: invalid range inside a { }";
864 |         case std::regex_constants::error_range:
865 |             return "erro_range: invalid character range(e.g., [z-a])";
866 |         case std::regex_constants::error_space:
867 |             return "error_space: insufficient memory to handle this regular expression";
868 |         case std::regex_constants::error_badrepeat:
869 |             return "error_badrepeat: a repetition character (*, ?, +, or {) was not preceded by a valid regular expression";
870 |         case std::regex_constants::error_complexity:
871 |             return "error_complexity: the requested match is too complex";
872 |         case std::regex_constants::error_stack:
873 |             return "error_stack: insufficient memory to evaluate a match";
874 |         default:
875 |             return "";
876 |     }
877 | }


--------------------------------------------------------------------------------