├── .gitignore ├── AUTHORS ├── COPYING ├── ChangeLog ├── KERN_WARN_MSG.txt ├── LICENSE ├── Makefile ├── README.md ├── dkms.conf ├── libc ├── Makefile ├── libc.c └── libc.h ├── pcre2 ├── AUTHORS ├── LICENCE ├── Makefile ├── config.h ├── main.c ├── pcre2.h ├── pcre2_auto_possess.c ├── pcre2_chartables.c ├── pcre2_compile.c ├── pcre2_config.c ├── pcre2_context.c ├── pcre2_dfa_match.c ├── pcre2_error.c ├── pcre2_find_bracket.c ├── pcre2_internal.h ├── pcre2_intmodedep.h ├── pcre2_jit_compile.c ├── pcre2_jit_match.c ├── pcre2_jit_misc.c ├── pcre2_maketables.c ├── pcre2_match.c ├── pcre2_match_data.c ├── pcre2_newline.c ├── pcre2_ord2utf.c ├── pcre2_pattern_info.c ├── pcre2_serialize.c ├── pcre2_string_utils.c ├── pcre2_study.c ├── pcre2_substitute.c ├── pcre2_substring.c ├── pcre2_tables.c ├── pcre2_ucd.c ├── pcre2_ucp.h ├── pcre2_valid_utf.c ├── pcre2_xclass.c ├── pcre2posix.c ├── pcre2posix.h └── sljit │ ├── sljitConfig.h │ ├── sljitConfigInternal.h │ ├── sljitExecAllocator.c │ ├── sljitLir.c │ ├── sljitLir.h │ ├── sljitNativeARM_32.c │ ├── sljitNativeARM_64.c │ ├── sljitNativeARM_T2_32.c │ ├── sljitNativeMIPS_32.c │ ├── sljitNativeMIPS_64.c │ ├── sljitNativeMIPS_common.c │ ├── sljitNativePPC_32.c │ ├── sljitNativePPC_64.c │ ├── sljitNativePPC_common.c │ ├── sljitNativeSPARC_32.c │ ├── sljitNativeSPARC_common.c │ ├── sljitNativeTILEGX-encoder.c │ ├── sljitNativeTILEGX_64.c │ ├── sljitNativeX86_32.c │ ├── sljitNativeX86_64.c │ ├── sljitNativeX86_common.c │ └── sljitUtils.c ├── ts_pcre ├── Makefile └── ts_pcre.c └── ts_regex ├── Makefile └── ts_regex.c /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # Debug files 32 | *.dSYM/ 33 | 34 | # Linux Kernel Module 35 | *.tmp_versions 36 | *.cmd 37 | *.mod.c 38 | modules.order 39 | Module.symvers 40 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Seongmyun Cho 2 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | Change Log for KPCRE 2 | -------------------- 3 | 4 | Version 1.0 23-November-2016 5 | ----------------------------- 6 | 7 | 1. Fixed an issue about executing x86 SSE2 instructions under kernel mode 8 | -------------------------------------------------------------------------------- /KERN_WARN_MSG.txt: -------------------------------------------------------------------------------- 1 | When you use PCRE text search module with iptables on x86 Linux platfrom, the following kernel warning message could appear. 2 | 3 | The warning message should be ignored because it is the result of the intended operation. 4 | 5 | Because the x86 JIT code uses SSE2 instructions inside the kernel, kernel_fpu_begin()/kernel_fpu_end() must be called, which is not an usual thing for kernel modules. 6 | 7 | The kernel prints the message when it happens to call "kernel_fpu_begin()" in softirq context. 8 | 9 | [62016.403553] ------------[ cut here ]------------ 10 | [62016.403568] WARNING: at /usr/src/kernels/3.10.0-327.36.3.el7.x86_64/arch/x86/include/asm/i387.h:43 pcre2_jit_match_8+0x248/0x260 [libpcre2_8]() 11 | [62016.403570] Modules linked in: ts_pcre(OE) libpcre2_8(OE) libc(OE) xt_string xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter coretemp iTCO_wdt ppdev iTCO_vendor_support kvm_intel lpc_ich pcspkr i2c_i801 kvm mfd_core sg parport_pc parport i7core_edac edac_core ioatdma shpchp acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic crct10dif_common ata_generic pata_acpi crc32c_intel ata_piix serio_raw libata igb ptp pps_core i2c_algo_bit i2c_core dca dm_mirror dm_region_hash dm_log dm_mod [last unloaded: libc] 12 | [62016.403624] CPU: 0 PID: 1307 Comm: redis-server Tainted: G W OE ------------ 3.10.0-327.36.3.el7.x86_64 #1 13 | [62016.403626] Hardware name: Intel Thurley/Greencity, BIOS 080015 07/09/2010 14 | [62016.403628] 0000000000000000 000000005b43eac9 ffff8801b9a034b8 ffffffff81636431 15 | [62016.403632] ffff8801b9a034f0 ffffffff8107b260 ffff8800363edd6a ffff8800b633d080 16 | [62016.403635] 0000000000000001 ffff880335e387e0 ffff88032c4778c0 ffff8801b9a03500 17 | [62016.403638] Call Trace: 18 | [62016.403639] [] dump_stack+0x19/0x1b 19 | [62016.403652] [] warn_slowpath_common+0x70/0xb0 20 | [62016.403655] [] warn_slowpath_null+0x1a/0x20 21 | [62016.403660] [] pcre2_jit_match_8+0x248/0x260 [libpcre2_8] 22 | [62016.403667] [] pcre2_match_8+0x45f/0x12b0 [libpcre2_8] 23 | [62016.403672] [] ? pcre2_match_8+0x45f/0x12b0 [libpcre2_8] 24 | [62016.403677] [] ? list_del+0xd/0x30 25 | [62016.403682] [] ? zone_statistics+0x89/0xa0 26 | [62016.403687] [] ? add_interrupt_randomness+0x50/0x1b0 27 | [62016.403693] [] ? __alloc_pages_nodemask+0x197/0xba0 28 | [62016.403697] [] ? tick_broadcast_set_event+0x3f/0xc0 29 | [62016.403700] [] ? add_interrupt_randomness+0x50/0x1b0 30 | [62016.403704] [] ? 0xffffffffa002ffff 31 | [62016.403708] [] ? list_del+0xd/0x30 32 | [62016.403711] [] pcre_find+0x7c/0x110 [ts_pcre] 33 | [62016.403716] [] skb_find_text+0x67/0xa0 34 | [62016.403720] [] string_mt+0x6e/0xa8 [xt_string] 35 | [62016.403725] [] ipt_do_table+0x262/0x710 [ip_tables] 36 | [62016.403729] [] ? nf_nat_setup_info+0x92/0x350 [nf_nat] 37 | [62016.403734] [] iptable_filter_hook+0x36/0x80 [iptable_filter] 38 | [62016.403739] [] nf_iterate+0x70/0xb0 39 | [62016.403742] [] nf_hook_slow+0xa8/0x110 40 | [62016.403745] [] ip_local_deliver+0xb2/0xd0 41 | [62016.403748] [] ? ip_rcv_finish+0x350/0x350 42 | [62016.403750] [] ip_rcv_finish+0x7d/0x350 43 | [62016.403753] [] ip_rcv+0x2b6/0x410 44 | [62016.403755] [] ? inet_del_offload+0x40/0x40 45 | [62016.403760] [] __netif_receive_skb_core+0x582/0x7d0 46 | [62016.403765] [] ? read_tsc+0x9/0x10 47 | [62016.403767] [] __netif_receive_skb+0x18/0x60 48 | [62016.403770] [] netif_receive_skb+0x40/0xc0 49 | [62016.403773] [] napi_gro_receive+0x80/0xb0 50 | [62016.403781] [] igb_clean_rx_irq+0x384/0x7a0 [igb] 51 | [62016.403786] [] ? enqueue_entity+0x237/0x8b0 52 | [62016.403793] [] igb_poll+0x37f/0x770 [igb] 53 | [62016.403797] [] ? sched_clock_cpu+0x85/0xc0 54 | [62016.403800] [] net_rx_action+0x152/0x240 55 | [62016.403804] [] __do_softirq+0xef/0x280 56 | [62016.403807] [] call_softirq+0x1c/0x30 57 | [62016.403811] [] do_softirq+0x65/0xa0 58 | [62016.403813] [] irq_exit+0x115/0x120 59 | [62016.403816] [] do_IRQ+0x58/0xf0 60 | [62016.403819] [] common_interrupt+0x6d/0x6d 61 | [62016.403821] [] ? do_last+0x6d0/0x1270 62 | [62016.403826] [] ? do_last+0x46f/0x1270 63 | [62016.403829] [] path_openat+0xc2/0x490 64 | [62016.403832] [] ? schedule_hrtimeout_range_clock+0xbe/0x150 65 | [62016.403836] [] ? hrtimer_get_res+0x50/0x50 66 | [62016.403839] [] do_filp_open+0x4b/0xb0 67 | [62016.403842] [] ? __alloc_fd+0xa7/0x130 68 | [62016.403845] [] do_sys_open+0xf3/0x1f0 69 | [62016.403848] [] SyS_open+0x1e/0x20 70 | [62016.403851] [] system_call_fastpath+0x16/0x1b 71 | [62016.403853] ---[ end trace af4b90c5524e6519 ]--- 72 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ifeq ($(strip $(CODE_WIDTH)),) 2 | export CODE_WIDTH=8 3 | endif 4 | 5 | $(info CODE_WIDTH is $(CODE_WIDTH)) 6 | 7 | SUBDIRS = \ 8 | libc \ 9 | pcre2 \ 10 | ts_pcre \ 11 | ts_regex 12 | 13 | all: 14 | @for d in $(SUBDIRS) ; do \ 15 | $(MAKE) -C $$d ; \ 16 | done 17 | 18 | modules: 19 | @for d in $(SUBDIRS) ; do \ 20 | $(MAKE) -C $$d modules ; \ 21 | done 22 | 23 | modules_install: 24 | @for d in $(SUBDIRS) ; do \ 25 | $(MAKE) -C $$d modules_install ; \ 26 | done 27 | 28 | clean: 29 | @for d in $(SUBDIRS) ; do \ 30 | $(MAKE) -C $$d clean ; \ 31 | done 32 | $(RM) cscope.* 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PCRE linux kernel module & PCRE/REGEX text search engine 2 | 3 | Keywords: Netfilter iptables PCRE REGEX Linux Kernel Module 4 | 5 | ### PCRE library kernel module (libpcre2-X.ko) 6 | 7 | The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5. 8 | PCRE has its own native API, as well as a set of wrapper functions that correspond to the POSIX regular expression API. 9 | 10 | This is a PCRE2 library ported to linux kernel. 11 | It is a full-fledged PCRE kernel module which supports JIT(Just-in-time) compilation. 12 | 13 | Original sources: ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre 14 | 15 | ### PCRE text search engine (ts_pcre.ko) 16 | 17 | This is a new text search engine based on the PCRE kernel module. 18 | 19 | ### Getting the source code 20 | 21 | To get the source code from the git repository 22 | 23 | ``` 24 | git clone https://github.com/smcho-kr/kpcre.git 25 | ``` 26 | 27 | ### Installing 28 | 29 | To install the modules follow these steps, always from the modules package root (i.e. where this file is located) 30 | 31 | To compile the modules first type: 32 | 33 | ``` 34 | make modules 35 | ``` 36 | 37 | Then, as root type: 38 | 39 | ``` 40 | make modules_install 41 | ``` 42 | 43 | That would install the pcre & ts_pcre modules for the given Linux kernel. 44 | 45 | For more detailed instructions on how to build and install the kernel modules, refer to [the installation guide](https://github.com/xnsystems/kpcre/wiki/Step-by-step-installation-guide). 46 | 47 | ### Usage 48 | 49 | Once you have installed both modules ("libpcre2-X.ko" and "ts_pcre.ko") 50 | you should type (as root): 51 | 52 | ``` 53 | modprobe ts_pcre 54 | ``` 55 | 56 | If the module has been successfully loaded you shouldn't see any message. 57 | After loading the kernel module you can use iptables to add a rule. 58 | 59 | An example rule would be (as root): 60 | 61 | ``` 62 | iptables -A INPUT -m string --string "/\x7C\x7C.+[a-z]/i" --algo pcre -j DROP 63 | ``` 64 | 65 | This wouldn't allow any incoming traffic that has the content matching the given PCRE in the payload. 66 | 67 | In case you want to stop using the ts_pcre kernel module, first remove every iptables rule for ts_pcre and then type (as root): 68 | 69 | ``` 70 | modprobe -r ts_pcre 71 | ``` 72 | 73 | There is also a REGEX text search engine. (ts_regex.ko) 74 | -------------------------------------------------------------------------------- /dkms.conf: -------------------------------------------------------------------------------- 1 | PACKAGE_NAME="kpcre" 2 | PACKAGE_VERSION="1.0.0" 3 | MAKE[0]="make KERNEL_DIR=/lib/modules/$kernelver/build modules" 4 | CLEAN="make clean" 5 | BUILT_MODULE_NAME[0]="libpcre2-8" 6 | BUILT_MODULE_NAME[1]="ts_pcre" 7 | BUILT_MODULE_NAME[2]="ts_regex" 8 | BUILT_MODULE_NAME[3]="libc" 9 | BUILT_MODULE_LOCATION[0]="pcre2/" 10 | BUILT_MODULE_LOCATION[1]="ts_pcre/" 11 | BUILT_MODULE_LOCATION[2]="ts_regex/" 12 | BUILT_MODULE_LOCATION[3]="libc/" 13 | DEST_MODULE_LOCATION[0]="/kernel/lib/" 14 | DEST_MODULE_LOCATION[1]="/kernel/lib/" 15 | DEST_MODULE_LOCATION[2]="/kernel/lib/" 16 | DEST_MODULE_LOCATION[3]="/kernel/lib/" 17 | AUTOINSTALL="yes" 18 | REMAKE_INITRD="yes" 19 | -------------------------------------------------------------------------------- /libc/Makefile: -------------------------------------------------------------------------------- 1 | ifeq ($(strip $(DEBUG)),) 2 | ccflags-y += -O2 -s 3 | else 4 | ccflags-y += -ggdb -DDEBUG 5 | endif 6 | 7 | MODULES_DIR := /lib/modules/$(shell uname -r) 8 | KERNEL_DIR ?= ${MODULES_DIR}/build 9 | 10 | obj-m := libc.o 11 | 12 | all: 13 | make -C ${KERNEL_DIR} M=$$PWD; 14 | 15 | modules: 16 | make -C ${KERNEL_DIR} M=$$PWD $@; 17 | 18 | modules_install: 19 | make -C ${KERNEL_DIR} M=$$PWD $@; 20 | depmod -a; 21 | 22 | clean: 23 | make -C ${KERNEL_DIR} M=$$PWD $@; 24 | rm -rf modules.order cscope.* 25 | -------------------------------------------------------------------------------- /libc/libc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * libc.c Kernel C library wrapper 3 | * 4 | * Copyright (C) 2016 Seongmyun Cho 5 | * 6 | * This program is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU General Public License 8 | * as published by the Free Software Foundation; version 2 of the License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 | */ 19 | 20 | #include 21 | #include /* Needed for KERN_INFO */ 22 | #include /* Needed by all modules */ 23 | #include 24 | #include 25 | #include 26 | 27 | MODULE_LICENSE("Dual BSD/GPL"); 28 | MODULE_AUTHOR("Seongmyun Cho "); 29 | MODULE_DESCRIPTION("C library"); 30 | 31 | void *malloc(size_t size) 32 | { 33 | return kmalloc(size, GFP_ATOMIC); 34 | } 35 | EXPORT_SYMBOL(malloc); 36 | 37 | void *realloc(void *ptr, size_t size) 38 | { 39 | return krealloc(ptr, size, GFP_ATOMIC); 40 | } 41 | EXPORT_SYMBOL(realloc); 42 | 43 | void *calloc(size_t nmemb, size_t size) 44 | { 45 | return kcalloc(nmemb, size, GFP_ATOMIC); 46 | } 47 | EXPORT_SYMBOL(calloc); 48 | 49 | void free(void *ptr) 50 | { 51 | kfree(ptr); 52 | } 53 | EXPORT_SYMBOL(free); 54 | 55 | long int random(void) 56 | { 57 | long int rand; 58 | 59 | get_random_bytes(&rand, sizeof(rand)); 60 | 61 | return rand; 62 | } 63 | EXPORT_SYMBOL(random); 64 | 65 | void srandom(unsigned int seed) 66 | { 67 | return; 68 | } 69 | EXPORT_SYMBOL(srandom); 70 | 71 | time64_t time(time64_t *t) 72 | { 73 | struct timespec64 ts; 74 | 75 | ktime_get_real_ts64(&ts); 76 | 77 | if (t) 78 | *t = ts.tv_sec; 79 | 80 | return ts.tv_sec; 81 | } 82 | EXPORT_SYMBOL(time); 83 | 84 | static int __init libc_init(void) 85 | { 86 | pr_debug("libc init\n"); 87 | return 0; 88 | } 89 | 90 | static void __exit libc_exit(void) 91 | { 92 | pr_debug("libc exit\n"); 93 | } 94 | 95 | module_init(libc_init); 96 | module_exit(libc_exit); 97 | -------------------------------------------------------------------------------- /libc/libc.h: -------------------------------------------------------------------------------- 1 | #ifndef _KERNEL_LIBC_H 2 | #define _KERNEL_LIBC_H 3 | 4 | #include 5 | 6 | 7 | #if defined(__GNUC__) 8 | #define __WORDSIZE (__SIZEOF_POINTER__ * 8) 9 | #endif 10 | 11 | /* We don't have #include_next. 12 | Define ANSI for standard 32-bit words. */ 13 | 14 | /* These assume 8-bit `char's, 16-bit `short int's, 15 | and 32-bit `int's and `long int's. */ 16 | 17 | /* Number of bits in a `char'. */ 18 | #ifndef CHAR_BIT 19 | # define CHAR_BIT 8 20 | #endif 21 | 22 | /* Minimum and maximum values a `signed char' can hold. */ 23 | #ifndef SCHAR_MIN 24 | # define SCHAR_MIN (-128) 25 | #endif 26 | #ifndef SCHAR_MAX 27 | # define SCHAR_MAX 127 28 | #endif 29 | 30 | /* Maximum value an `unsigned char' can hold. (Minimum is 0.) */ 31 | #ifndef UCHAR_MAX 32 | # define UCHAR_MAX 255 33 | #endif 34 | 35 | /* Minimum and maximum values a `char' can hold. */ 36 | # ifdef __CHAR_UNSIGNED__ 37 | # define CHAR_MIN 0 38 | # define CHAR_MAX UCHAR_MAX 39 | # else 40 | # define CHAR_MIN SCHAR_MIN 41 | # define CHAR_MAX SCHAR_MAX 42 | # endif 43 | 44 | #ifndef INT8_MIN 45 | /** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */ 46 | # define INT8_MIN ((int8_t)(-128)) 47 | #endif 48 | #ifndef INT16_MIN 49 | /** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */ 50 | # define INT16_MIN ((int16_t)(-32767-1)) 51 | #endif 52 | #ifndef INT32_MIN 53 | /** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */ 54 | # define INT32_MIN ((int32_t)(-2147483647-1)) 55 | #endif 56 | 57 | #ifndef INT8_MAX 58 | /** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */ 59 | # define INT8_MAX ((int8_t)(127)) 60 | #endif 61 | #ifndef INT16_MAX 62 | /** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */ 63 | # define INT16_MAX ((int16_t)(32767)) 64 | #endif 65 | #ifndef INT32_MAX 66 | /** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */ 67 | # define INT32_MAX ((int32_t)(2147483647)) 68 | #endif 69 | 70 | #ifndef UINT8_MAX 71 | /** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */ 72 | # define UINT8_MAX ((uint8_t)(255U)) 73 | #endif 74 | #ifndef UINT16_MAX 75 | /** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */ 76 | # define UINT16_MAX ((uint16_t)(65535U)) 77 | #endif 78 | #ifndef UINT32_MAX 79 | /** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */ 80 | # define UINT32_MAX ((uint32_t)(4294967295U)) 81 | #endif 82 | 83 | extern void *malloc(size_t size); 84 | extern void *realloc(void *ptr, size_t size); 85 | extern void *calloc(size_t nmemb, size_t size); 86 | extern void free(void *ptr); 87 | extern int snprintf(char *buf, size_t size, const char *fmt, ...); 88 | 89 | extern long int random(void); 90 | extern void srandom(unsigned int seed); 91 | extern time64_t time(time64_t *t); 92 | 93 | #endif /* _KERNEL_LIBC_H */ 94 | -------------------------------------------------------------------------------- /pcre2/AUTHORS: -------------------------------------------------------------------------------- 1 | THE MAIN PCRE2 LIBRARY CODE 2 | --------------------------- 3 | 4 | Written by: Philip Hazel 5 | Email local part: ph10 6 | Email domain: cam.ac.uk 7 | 8 | University of Cambridge Computing Service, 9 | Cambridge, England. 10 | 11 | Copyright (c) 1997-2016 University of Cambridge 12 | All rights reserved 13 | 14 | 15 | PCRE2 JUST-IN-TIME COMPILATION SUPPORT 16 | -------------------------------------- 17 | 18 | Written by: Zoltan Herczeg 19 | Email local part: hzmester 20 | Emain domain: freemail.hu 21 | 22 | Copyright(c) 2010-2016 Zoltan Herczeg 23 | All rights reserved. 24 | 25 | 26 | STACK-LESS JUST-IN-TIME COMPILER 27 | -------------------------------- 28 | 29 | Written by: Zoltan Herczeg 30 | Email local part: hzmester 31 | Emain domain: freemail.hu 32 | 33 | Copyright(c) 2009-2016 Zoltan Herczeg 34 | All rights reserved. 35 | 36 | 37 | PCRE2 LINUX KERNEL MODULE SUPPORT 38 | -------------------------------- 39 | 40 | Contributed by: Seongmyun Cho 41 | Email local part: highsky 42 | Emain domain: gmail.com 43 | 44 | Copyright(c) 2016 Seongmyun Cho 45 | All rights reserved. 46 | 47 | #### 48 | -------------------------------------------------------------------------------- /pcre2/LICENCE: -------------------------------------------------------------------------------- 1 | PCRE2 LICENCE 2 | ------------- 3 | 4 | PCRE2 is a library of functions to support regular expressions whose syntax 5 | and semantics are as close as possible to those of the Perl 5 language. 6 | 7 | Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as 8 | specified below. The documentation for PCRE2, supplied in the "doc" 9 | directory, is distributed under the same terms as the software itself. The data 10 | in the testdata directory is not copyrighted and is in the public domain. 11 | 12 | The basic library functions are written in C and are freestanding. Also 13 | included in the distribution is a just-in-time compiler that can be used to 14 | optimize pattern matching. This is an optional feature that can be omitted when 15 | the library is built. 16 | 17 | 18 | THE BASIC LIBRARY FUNCTIONS 19 | --------------------------- 20 | 21 | Written by: Philip Hazel 22 | Email local part: ph10 23 | Email domain: cam.ac.uk 24 | 25 | University of Cambridge Computing Service, 26 | Cambridge, England. 27 | 28 | Copyright (c) 1997-2016 University of Cambridge 29 | All rights reserved. 30 | 31 | 32 | PCRE2 JUST-IN-TIME COMPILATION SUPPORT 33 | -------------------------------------- 34 | 35 | Written by: Zoltan Herczeg 36 | Email local part: hzmester 37 | Emain domain: freemail.hu 38 | 39 | Copyright(c) 2010-2016 Zoltan Herczeg 40 | All rights reserved. 41 | 42 | 43 | STACK-LESS JUST-IN-TIME COMPILER 44 | -------------------------------- 45 | 46 | Written by: Zoltan Herczeg 47 | Email local part: hzmester 48 | Emain domain: freemail.hu 49 | 50 | Copyright(c) 2009-2016 Zoltan Herczeg 51 | All rights reserved. 52 | 53 | 54 | PCRE2 LINUX KERNEL MODULE SUPPORT 55 | -------------------------------- 56 | 57 | Contributed by: Seongmyun Cho 58 | Email local part: highsky 59 | Emain domain: gmail.com 60 | 61 | Copyright(c) 2016 Seongmyun Cho 62 | All rights reserved. 63 | 64 | 65 | THE "BSD" LICENCE 66 | ----------------- 67 | 68 | Redistribution and use in source and binary forms, with or without 69 | modification, are permitted provided that the following conditions are met: 70 | 71 | * Redistributions of source code must retain the above copyright notice, 72 | this list of conditions and the following disclaimer. 73 | 74 | * Redistributions in binary form must reproduce the above copyright 75 | notice, this list of conditions and the following disclaimer in the 76 | documentation and/or other materials provided with the distribution. 77 | 78 | * Neither the name of the University of Cambridge nor the names of any 79 | contributors may be used to endorse or promote products derived from this 80 | software without specific prior written permission. 81 | 82 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 83 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 84 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 85 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 86 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 87 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 88 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 89 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 90 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 91 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 92 | POSSIBILITY OF SUCH DAMAGE. 93 | 94 | End 95 | -------------------------------------------------------------------------------- /pcre2/Makefile: -------------------------------------------------------------------------------- 1 | CODE_WIDTH ?= 8 2 | LIBC := ${PWD}/../libc/ 3 | 4 | ifeq ($(strip $(DEBUG)),) 5 | ccflags-y += -O3 -s 6 | else 7 | ccflags-y += -ggdb -DDEBUG 8 | endif 9 | ccflags-y += -I${LIBC} \ 10 | -DHAVE_CONFIG_H -DPCRE2_CODE_UNIT_WIDTH=${CODE_WIDTH} 11 | 12 | MODULES_DIR := /lib/modules/$(shell uname -r) 13 | KERNEL_DIR ?= ${MODULES_DIR}/build 14 | KBUILD_EXTRA_SYMBOLS = ${LIBC}/Module.symvers 15 | 16 | obj-m := libpcre2-${CODE_WIDTH}.o 17 | libpcre2-${CODE_WIDTH}-y := main.o \ 18 | pcre2_auto_possess.o \ 19 | pcre2_chartables.o \ 20 | pcre2_compile.o \ 21 | pcre2_config.o \ 22 | pcre2_context.o \ 23 | pcre2_dfa_match.o \ 24 | pcre2_error.o \ 25 | pcre2_find_bracket.o \ 26 | pcre2_jit_compile.o \ 27 | pcre2_maketables.o \ 28 | pcre2_match_data.o \ 29 | pcre2_match.o \ 30 | pcre2_newline.o \ 31 | pcre2_ord2utf.o \ 32 | pcre2_pattern_info.o \ 33 | pcre2posix.o \ 34 | pcre2_serialize.o \ 35 | pcre2_string_utils.o \ 36 | pcre2_study.o \ 37 | pcre2_substitute.o \ 38 | pcre2_substring.o \ 39 | pcre2_tables.o \ 40 | pcre2_ucd.o \ 41 | pcre2_valid_utf.o \ 42 | pcre2_xclass.o 43 | 44 | all: 45 | make -C ${KERNEL_DIR} M=$$PWD; 46 | 47 | modules: 48 | make -C ${KERNEL_DIR} M=$$PWD $@; 49 | 50 | modules_install: 51 | make -C ${KERNEL_DIR} M=$$PWD $@; 52 | depmod -a; 53 | 54 | clean: 55 | make -C ${KERNEL_DIR} M=$$PWD $@; 56 | rm -rf modules.order cscope.* 57 | -------------------------------------------------------------------------------- /pcre2/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * main.c PCRE kernel module 3 | * 4 | * Copyright (C) 2016 Seongmyun Cho 5 | * 6 | * This program is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU General Public License 8 | * as published by the Free Software Foundation; version 2 of the License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 | * 19 | */ 20 | 21 | 22 | #ifdef HAVE_CONFIG_H 23 | #include "config.h" 24 | #endif 25 | 26 | #include "pcre2_internal.h" 27 | #include "pcre2.h" 28 | 29 | MODULE_LICENSE("Dual BSD/GPL"); 30 | MODULE_AUTHOR("Seongmyun Cho "); 31 | MODULE_DESCRIPTION("PCRE2 library"); 32 | 33 | #define MACHINE_STACK_SIZE 32768 34 | #define TEMP_MAX 1000 35 | 36 | #define GROUPINFO_DEFAULT_SIZE 256 37 | #define NAMED_GROUP_LIST_SIZE 20 38 | #define PARSED_PATTERN_DEFAULT_SIZE 1024 39 | #define COMPILE_WORK_SIZE (2048*LINK_SIZE) /* Size in code units */ 40 | #define C32_WORK_SIZE \ 41 | ((COMPILE_WORK_SIZE * sizeof(PCRE2_UCHAR))/sizeof(uint32_t)) 42 | 43 | // sizeof(unsigned char)*MACHINE_STACK_SIZE 44 | struct kmem_cache *local_space_cache = NULL; 45 | char *local_space_cache_str = NULL; 46 | // sizeof(PCRE2_SIZE)*TEMP_MAX 47 | struct kmem_cache *local_offsets_cache = NULL; 48 | char *local_offsets_cache_str = NULL; 49 | // sizeof(int)*TEMP_MAX 50 | struct kmem_cache *local_workspace_cache = NULL; 51 | char *local_workspace_cache_str = NULL; 52 | 53 | // sizeof(uint32_t)*GROUPINFO_DEFAULT_SIZE 54 | struct kmem_cache *stack_groupinfo_cache = NULL; 55 | char *stack_groupinfo_cache_str = NULL; 56 | // sizeof(uint32_t)*PARSED_PATTERN_DEFAULT_SIZE 57 | struct kmem_cache *stack_parsed_pattern_cache = NULL; 58 | char *stack_parsed_pattern_cache_str = NULL; 59 | // sizeof(named_group)*NAMED_GROUP_LIST_SIZE 60 | struct kmem_cache *named_groups_cache = NULL; 61 | char *named_groups_cache_str = NULL; 62 | // sizeof(uint32_t)*C32_WORK_SIZE 63 | struct kmem_cache *c16workspace_cache = NULL; 64 | char *c16workspace_cache_str = NULL; 65 | 66 | static int __init pcre2_init(void) 67 | { 68 | pr_debug("libpcre2 init\n"); 69 | 70 | local_space_cache_str = kasprintf(GFP_KERNEL, \ 71 | "local_space_cache_%p", pcre2_init); 72 | if (local_space_cache_str == NULL) 73 | goto out_of_memory; 74 | 75 | local_offsets_cache_str = kasprintf(GFP_KERNEL, \ 76 | "local_offsets_cache_%p", pcre2_init); 77 | if (local_offsets_cache_str == NULL) 78 | goto out_of_memory; 79 | 80 | local_workspace_cache_str = kasprintf(GFP_KERNEL, \ 81 | "local_workspace_cache_%p", pcre2_init); 82 | if (local_workspace_cache_str == NULL) 83 | goto out_of_memory; 84 | 85 | stack_groupinfo_cache_str = kasprintf(GFP_KERNEL, \ 86 | "stack_groupinfo_cache_%p", pcre2_init); 87 | if (stack_groupinfo_cache_str == NULL) 88 | goto out_of_memory; 89 | 90 | stack_parsed_pattern_cache_str = kasprintf(GFP_KERNEL, \ 91 | "stack_parsed_pattern_cache_%p", pcre2_init); 92 | if (stack_parsed_pattern_cache_str == NULL) 93 | goto out_of_memory; 94 | 95 | named_groups_cache_str = kasprintf(GFP_KERNEL, \ 96 | "named_groups_cache_%p", pcre2_init); 97 | if (named_groups_cache_str == NULL) 98 | goto out_of_memory; 99 | 100 | c16workspace_cache_str = kasprintf(GFP_KERNEL, \ 101 | "c16workspace_cache_%p", pcre2_init); 102 | 103 | if (c16workspace_cache_str == NULL) 104 | goto out_of_memory; 105 | 106 | local_space_cache = kmem_cache_create(local_space_cache_str, 107 | MACHINE_STACK_SIZE, 0, 0, NULL); 108 | if (local_space_cache == NULL) 109 | goto out_of_memory; 110 | 111 | local_offsets_cache = kmem_cache_create(local_offsets_cache_str, 112 | sizeof(PCRE2_SIZE) * TEMP_MAX, 0, 0, NULL); 113 | if (local_offsets_cache == NULL) 114 | goto out_of_memory; 115 | 116 | local_workspace_cache = kmem_cache_create(local_workspace_cache_str, 117 | sizeof(int) * TEMP_MAX, 0, 0, NULL); 118 | if (local_workspace_cache == NULL) 119 | goto out_of_memory; 120 | 121 | stack_groupinfo_cache = kmem_cache_create(stack_groupinfo_cache_str, 122 | sizeof(uint32_t) * 123 | GROUPINFO_DEFAULT_SIZE, 0, 0, NULL); 124 | if (stack_groupinfo_cache == NULL) 125 | goto out_of_memory; 126 | 127 | stack_parsed_pattern_cache = kmem_cache_create(stack_parsed_pattern_cache_str, 128 | sizeof(uint32_t) * 129 | PARSED_PATTERN_DEFAULT_SIZE, 0, 0, NULL); 130 | if (stack_parsed_pattern_cache == NULL) 131 | goto out_of_memory; 132 | 133 | named_groups_cache = kmem_cache_create(named_groups_cache_str, 134 | sizeof(named_group) * 135 | NAMED_GROUP_LIST_SIZE, 0, 0, NULL); 136 | if (named_groups_cache == NULL) 137 | goto out_of_memory; 138 | 139 | c16workspace_cache = kmem_cache_create(c16workspace_cache_str, 140 | sizeof(uint32_t) * C32_WORK_SIZE, 0, 0, NULL); 141 | if (c16workspace_cache == NULL) 142 | goto out_of_memory; 143 | 144 | return 0; 145 | 146 | out_of_memory: 147 | 148 | if (local_space_cache) 149 | kmem_cache_destroy(local_space_cache); 150 | 151 | if (local_offsets_cache) 152 | kmem_cache_destroy(local_offsets_cache); 153 | 154 | if (local_workspace_cache) 155 | kmem_cache_destroy(local_workspace_cache); 156 | 157 | if (stack_groupinfo_cache) 158 | kmem_cache_destroy(stack_groupinfo_cache); 159 | 160 | if (stack_parsed_pattern_cache) 161 | kmem_cache_destroy(stack_parsed_pattern_cache); 162 | 163 | if (named_groups_cache) 164 | kmem_cache_destroy(named_groups_cache); 165 | 166 | if (c16workspace_cache) 167 | kmem_cache_destroy(c16workspace_cache); 168 | 169 | if (local_space_cache_str) 170 | kfree(local_space_cache_str); 171 | 172 | if (local_offsets_cache_str) 173 | kfree(local_offsets_cache_str); 174 | 175 | if (local_workspace_cache_str) 176 | kfree(local_workspace_cache_str); 177 | 178 | if (stack_groupinfo_cache_str) 179 | kfree(stack_groupinfo_cache_str); 180 | 181 | if (stack_parsed_pattern_cache_str) 182 | kfree(stack_parsed_pattern_cache_str); 183 | 184 | if (named_groups_cache_str) 185 | kfree(named_groups_cache_str); 186 | 187 | if (c16workspace_cache_str) 188 | kfree(c16workspace_cache_str); 189 | 190 | return -ENOMEM; 191 | 192 | } 193 | 194 | static void __exit pcre2_exit(void) 195 | { 196 | pr_debug("libpcre2 exit\n"); 197 | 198 | if (local_space_cache) 199 | kmem_cache_destroy(local_space_cache); 200 | 201 | if (local_offsets_cache) 202 | kmem_cache_destroy(local_offsets_cache); 203 | 204 | if (local_workspace_cache) 205 | kmem_cache_destroy(local_workspace_cache); 206 | 207 | if (stack_groupinfo_cache) 208 | kmem_cache_destroy(stack_groupinfo_cache); 209 | 210 | if (stack_parsed_pattern_cache) 211 | kmem_cache_destroy(stack_parsed_pattern_cache); 212 | 213 | if (named_groups_cache) 214 | kmem_cache_destroy(named_groups_cache); 215 | 216 | if (c16workspace_cache) 217 | kmem_cache_destroy(c16workspace_cache); 218 | 219 | if (local_space_cache_str) 220 | kfree(local_space_cache_str); 221 | 222 | if (local_offsets_cache_str) 223 | kfree(local_offsets_cache_str); 224 | 225 | if (local_workspace_cache_str) 226 | kfree(local_workspace_cache_str); 227 | 228 | if (stack_groupinfo_cache_str) 229 | kfree(stack_groupinfo_cache_str); 230 | 231 | if (stack_parsed_pattern_cache_str) 232 | kfree(stack_parsed_pattern_cache_str); 233 | 234 | if (named_groups_cache_str) 235 | kfree(named_groups_cache_str); 236 | 237 | if (c16workspace_cache_str) 238 | kfree(c16workspace_cache_str); 239 | } 240 | 241 | module_init(pcre2_init); 242 | module_exit(pcre2_exit); 243 | -------------------------------------------------------------------------------- /pcre2/pcre2_chartables.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* This file contains character tables that are used when no external tables 6 | are passed to PCRE2 by the application that calls it. The tables are used only 7 | for characters whose code values are less than 256. 8 | 9 | This is a default version of the tables that assumes ASCII encoding. A program 10 | called dftables (which is distributed with PCRE2) can be used to build 11 | alternative versions of this file. This is necessary if you are running in an 12 | EBCDIC environment, or if you want to default to a different encoding, for 13 | example ISO-8859-1. When dftables is run, it creates these tables in the 14 | current locale. If PCRE2 is configured with --enable-rebuild-chartables, this 15 | happens automatically. 16 | 17 | The following #includes are present because without them gcc 4.x may remove the 18 | array definition from the final binary if PCRE2 is built into a static library 19 | and dead code stripping is activated. This leads to link errors. Pulling in the 20 | header ensures that the array gets flagged as "someone outside this compilation 21 | unit might reference this" and so it will always be supplied to the linker. */ 22 | 23 | #ifdef HAVE_CONFIG_H 24 | #include "config.h" 25 | #endif 26 | 27 | #include "pcre2_internal.h" 28 | 29 | const uint8_t PRIV(default_tables)[] = { 30 | 31 | /* This table is a lower casing table. */ 32 | 33 | 0, 1, 2, 3, 4, 5, 6, 7, 34 | 8, 9, 10, 11, 12, 13, 14, 15, 35 | 16, 17, 18, 19, 20, 21, 22, 23, 36 | 24, 25, 26, 27, 28, 29, 30, 31, 37 | 32, 33, 34, 35, 36, 37, 38, 39, 38 | 40, 41, 42, 43, 44, 45, 46, 47, 39 | 48, 49, 50, 51, 52, 53, 54, 55, 40 | 56, 57, 58, 59, 60, 61, 62, 63, 41 | 64, 97, 98, 99,100,101,102,103, 42 | 104,105,106,107,108,109,110,111, 43 | 112,113,114,115,116,117,118,119, 44 | 120,121,122, 91, 92, 93, 94, 95, 45 | 96, 97, 98, 99,100,101,102,103, 46 | 104,105,106,107,108,109,110,111, 47 | 112,113,114,115,116,117,118,119, 48 | 120,121,122,123,124,125,126,127, 49 | 128,129,130,131,132,133,134,135, 50 | 136,137,138,139,140,141,142,143, 51 | 144,145,146,147,148,149,150,151, 52 | 152,153,154,155,156,157,158,159, 53 | 160,161,162,163,164,165,166,167, 54 | 168,169,170,171,172,173,174,175, 55 | 176,177,178,179,180,181,182,183, 56 | 184,185,186,187,188,189,190,191, 57 | 192,193,194,195,196,197,198,199, 58 | 200,201,202,203,204,205,206,207, 59 | 208,209,210,211,212,213,214,215, 60 | 216,217,218,219,220,221,222,223, 61 | 224,225,226,227,228,229,230,231, 62 | 232,233,234,235,236,237,238,239, 63 | 240,241,242,243,244,245,246,247, 64 | 248,249,250,251,252,253,254,255, 65 | 66 | /* This table is a case flipping table. */ 67 | 68 | 0, 1, 2, 3, 4, 5, 6, 7, 69 | 8, 9, 10, 11, 12, 13, 14, 15, 70 | 16, 17, 18, 19, 20, 21, 22, 23, 71 | 24, 25, 26, 27, 28, 29, 30, 31, 72 | 32, 33, 34, 35, 36, 37, 38, 39, 73 | 40, 41, 42, 43, 44, 45, 46, 47, 74 | 48, 49, 50, 51, 52, 53, 54, 55, 75 | 56, 57, 58, 59, 60, 61, 62, 63, 76 | 64, 97, 98, 99,100,101,102,103, 77 | 104,105,106,107,108,109,110,111, 78 | 112,113,114,115,116,117,118,119, 79 | 120,121,122, 91, 92, 93, 94, 95, 80 | 96, 65, 66, 67, 68, 69, 70, 71, 81 | 72, 73, 74, 75, 76, 77, 78, 79, 82 | 80, 81, 82, 83, 84, 85, 86, 87, 83 | 88, 89, 90,123,124,125,126,127, 84 | 128,129,130,131,132,133,134,135, 85 | 136,137,138,139,140,141,142,143, 86 | 144,145,146,147,148,149,150,151, 87 | 152,153,154,155,156,157,158,159, 88 | 160,161,162,163,164,165,166,167, 89 | 168,169,170,171,172,173,174,175, 90 | 176,177,178,179,180,181,182,183, 91 | 184,185,186,187,188,189,190,191, 92 | 192,193,194,195,196,197,198,199, 93 | 200,201,202,203,204,205,206,207, 94 | 208,209,210,211,212,213,214,215, 95 | 216,217,218,219,220,221,222,223, 96 | 224,225,226,227,228,229,230,231, 97 | 232,233,234,235,236,237,238,239, 98 | 240,241,242,243,244,245,246,247, 99 | 248,249,250,251,252,253,254,255, 100 | 101 | /* This table contains bit maps for various character classes. Each map is 32 102 | bytes long and the bits run from the least significant end of each byte. The 103 | classes that have their own maps are: space, xdigit, digit, upper, lower, word, 104 | graph, print, punct, and cntrl. Other classes are built from combinations. */ 105 | 106 | 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, 107 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 108 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 109 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 110 | 111 | 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 112 | 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, 113 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 114 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 115 | 116 | 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 117 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 118 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 119 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 120 | 121 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 122 | 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, 123 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 124 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 125 | 126 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 127 | 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, 128 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 129 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 130 | 131 | 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 132 | 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, 133 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 134 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 135 | 136 | 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, 137 | 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 138 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 139 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 140 | 141 | 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, 142 | 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 143 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 144 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 145 | 146 | 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, 147 | 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, 148 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 149 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 150 | 151 | 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, 152 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, 153 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 154 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 155 | 156 | /* This table identifies various classes of character by individual bits: 157 | 0x01 white space character 158 | 0x02 letter 159 | 0x04 decimal digit 160 | 0x08 hexadecimal digit 161 | 0x10 alphanumeric or '_' 162 | 0x80 regular expression metacharacter or binary zero 163 | */ 164 | 165 | 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ 166 | 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */ 167 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ 168 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ 169 | 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ 170 | 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ 171 | 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ 172 | 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ 173 | 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ 174 | 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ 175 | 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ 176 | 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */ 177 | 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ 178 | 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ 179 | 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ 180 | 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ 181 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ 182 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ 183 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ 184 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ 185 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ 186 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ 187 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ 188 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ 189 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ 190 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ 191 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ 192 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ 193 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ 194 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ 195 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ 196 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ 197 | 198 | /* End of pcre2_chartables.c */ 199 | -------------------------------------------------------------------------------- /pcre2/pcre2_config.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | #ifdef HAVE_CONFIG_H 42 | #include "config.h" 43 | #endif 44 | 45 | /* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes 46 | its value gets changed by pcre2_internal.h to be in code units. */ 47 | 48 | static int configured_link_size = LINK_SIZE; 49 | 50 | #include "pcre2_internal.h" 51 | 52 | /* These macros are the standard way of turning unquoted text into C strings. 53 | They allow macros like PCRE2_MAJOR to be defined without quotes, which is 54 | convenient for user programs that want to test their values. */ 55 | 56 | #define STRING(a) # a 57 | #define XSTRING(s) STRING(s) 58 | 59 | 60 | /************************************************* 61 | * Return info about what features are configured * 62 | *************************************************/ 63 | 64 | /* If where is NULL, the length of memory required is returned. 65 | 66 | Arguments: 67 | what what information is required 68 | where where to put the information 69 | 70 | Returns: 0 if a numerical value is returned 71 | >= 0 if a string value 72 | PCRE2_ERROR_BADOPTION if "where" not recognized 73 | or JIT target requested when JIT not enabled 74 | */ 75 | 76 | PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 77 | pcre2_config(uint32_t what, void *where) 78 | { 79 | if (where == NULL) /* Requests a length */ 80 | { 81 | switch(what) 82 | { 83 | default: 84 | return PCRE2_ERROR_BADOPTION; 85 | 86 | case PCRE2_CONFIG_BSR: 87 | case PCRE2_CONFIG_JIT: 88 | case PCRE2_CONFIG_LINKSIZE: 89 | case PCRE2_CONFIG_MATCHLIMIT: 90 | case PCRE2_CONFIG_NEWLINE: 91 | case PCRE2_CONFIG_PARENSLIMIT: 92 | case PCRE2_CONFIG_RECURSIONLIMIT: 93 | case PCRE2_CONFIG_STACKRECURSE: 94 | case PCRE2_CONFIG_UNICODE: 95 | return sizeof(uint32_t); 96 | 97 | /* These are handled below */ 98 | 99 | case PCRE2_CONFIG_JITTARGET: 100 | case PCRE2_CONFIG_UNICODE_VERSION: 101 | case PCRE2_CONFIG_VERSION: 102 | break; 103 | } 104 | } 105 | 106 | switch (what) 107 | { 108 | default: 109 | return PCRE2_ERROR_BADOPTION; 110 | 111 | case PCRE2_CONFIG_BSR: 112 | #ifdef BSR_ANYCRLF 113 | *((uint32_t *)where) = PCRE2_BSR_ANYCRLF; 114 | #else 115 | *((uint32_t *)where) = PCRE2_BSR_UNICODE; 116 | #endif 117 | break; 118 | 119 | case PCRE2_CONFIG_JIT: 120 | #ifdef SUPPORT_JIT 121 | *((uint32_t *)where) = 1; 122 | #else 123 | *((uint32_t *)where) = 0; 124 | #endif 125 | break; 126 | 127 | case PCRE2_CONFIG_JITTARGET: 128 | #ifdef SUPPORT_JIT 129 | { 130 | const char *v = PRIV(jit_get_target)(); 131 | return (int)(1 + ((where == NULL)? 132 | strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v))); 133 | } 134 | #else 135 | return PCRE2_ERROR_BADOPTION; 136 | #endif 137 | 138 | case PCRE2_CONFIG_LINKSIZE: 139 | *((uint32_t *)where) = (uint32_t)configured_link_size; 140 | break; 141 | 142 | case PCRE2_CONFIG_MATCHLIMIT: 143 | *((uint32_t *)where) = MATCH_LIMIT; 144 | break; 145 | 146 | case PCRE2_CONFIG_NEWLINE: 147 | *((uint32_t *)where) = NEWLINE_DEFAULT; 148 | break; 149 | 150 | case PCRE2_CONFIG_PARENSLIMIT: 151 | *((uint32_t *)where) = PARENS_NEST_LIMIT; 152 | break; 153 | 154 | case PCRE2_CONFIG_RECURSIONLIMIT: 155 | *((uint32_t *)where) = MATCH_LIMIT_RECURSION; 156 | break; 157 | 158 | case PCRE2_CONFIG_STACKRECURSE: 159 | #ifdef HEAP_MATCH_RECURSE 160 | *((uint32_t *)where) = 0; 161 | #else 162 | *((uint32_t *)where) = 1; 163 | #endif 164 | break; 165 | 166 | case PCRE2_CONFIG_UNICODE_VERSION: 167 | { 168 | #if defined SUPPORT_UNICODE 169 | const char *v = PRIV(unicode_version); 170 | #else 171 | const char *v = "Unicode not supported"; 172 | #endif 173 | return (int)(1 + ((where == NULL)? 174 | strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v))); 175 | } 176 | break; 177 | 178 | case PCRE2_CONFIG_UNICODE: 179 | #if defined SUPPORT_UNICODE 180 | *((uint32_t *)where) = 1; 181 | #else 182 | *((uint32_t *)where) = 0; 183 | #endif 184 | break; 185 | 186 | /* The hackery in setting "v" below is to cope with the case when 187 | PCRE2_PRERELEASE is set to an empty string (which it is for real releases). 188 | If the second alternative is used in this case, it does not leave a space 189 | before the date. On the other hand, if all four macros are put into a single 190 | XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted. 191 | There are problems using an "obvious" approach like this: 192 | 193 | XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR) 194 | XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE) 195 | 196 | because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion 197 | of STRING(). The C standard states: "If (before argument substitution) any 198 | argument consists of no preprocessing tokens, the behavior is undefined." It 199 | turns out the gcc treats this case as a single empty string - which is what 200 | we really want - but Visual C grumbles about the lack of an argument for the 201 | macro. Unfortunately, both are within their rights. As there seems to be no 202 | way to test for a macro's value being empty at compile time, we have to 203 | resort to a runtime test. */ 204 | 205 | case PCRE2_CONFIG_VERSION: 206 | { 207 | const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)? 208 | XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) : 209 | XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE); 210 | return (int)(1 + ((where == NULL)? 211 | strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v))); 212 | } 213 | } 214 | 215 | return 0; 216 | } 217 | #ifdef __KERNEL__ 218 | EXPORT_SYMBOL(pcre2_config); 219 | #endif 220 | 221 | /* End of pcre2_config.c */ 222 | -------------------------------------------------------------------------------- /pcre2/pcre2_find_bracket.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | /* This module contains a single function that scans through a compiled pattern 43 | until it finds a capturing bracket with the given number, or, if the number is 44 | negative, an instance of OP_REVERSE for a lookbehind. The function is called 45 | from pcre2_compile.c and also from pcre2_study.c when finding the minimum 46 | matching length. */ 47 | 48 | 49 | #ifdef HAVE_CONFIG_H 50 | #include "config.h" 51 | #endif 52 | 53 | #include "pcre2_internal.h" 54 | 55 | 56 | /************************************************* 57 | * Scan compiled regex for specific bracket * 58 | *************************************************/ 59 | 60 | /* 61 | Arguments: 62 | code points to start of expression 63 | utf TRUE in UTF mode 64 | number the required bracket number or negative to find a lookbehind 65 | 66 | Returns: pointer to the opcode for the bracket, or NULL if not found 67 | */ 68 | 69 | PCRE2_SPTR 70 | PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number) 71 | { 72 | for (;;) 73 | { 74 | PCRE2_UCHAR c = *code; 75 | 76 | if (c == OP_END) return NULL; 77 | 78 | /* XCLASS is used for classes that cannot be represented just by a bit map. 79 | This includes negated single high-valued characters. CALLOUT_STR is used for 80 | callouts with string arguments. In both cases the length in the table is 81 | zero; the actual length is stored in the compiled code. */ 82 | 83 | if (c == OP_XCLASS) code += GET(code, 1); 84 | else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); 85 | 86 | /* Handle lookbehind */ 87 | 88 | else if (c == OP_REVERSE) 89 | { 90 | if (number < 0) return (PCRE2_UCHAR *)code; 91 | code += PRIV(OP_lengths)[c]; 92 | } 93 | 94 | /* Handle capturing bracket */ 95 | 96 | else if (c == OP_CBRA || c == OP_SCBRA || 97 | c == OP_CBRAPOS || c == OP_SCBRAPOS) 98 | { 99 | int n = (int)GET2(code, 1+LINK_SIZE); 100 | if (n == number) return (PCRE2_UCHAR *)code; 101 | code += PRIV(OP_lengths)[c]; 102 | } 103 | 104 | /* Otherwise, we can get the item's length from the table, except that for 105 | repeated character types, we have to test for \p and \P, which have an extra 106 | two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we 107 | must add in its length. */ 108 | 109 | else 110 | { 111 | switch(c) 112 | { 113 | case OP_TYPESTAR: 114 | case OP_TYPEMINSTAR: 115 | case OP_TYPEPLUS: 116 | case OP_TYPEMINPLUS: 117 | case OP_TYPEQUERY: 118 | case OP_TYPEMINQUERY: 119 | case OP_TYPEPOSSTAR: 120 | case OP_TYPEPOSPLUS: 121 | case OP_TYPEPOSQUERY: 122 | if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; 123 | break; 124 | 125 | case OP_TYPEUPTO: 126 | case OP_TYPEMINUPTO: 127 | case OP_TYPEEXACT: 128 | case OP_TYPEPOSUPTO: 129 | if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) 130 | code += 2; 131 | break; 132 | 133 | case OP_MARK: 134 | case OP_PRUNE_ARG: 135 | case OP_SKIP_ARG: 136 | case OP_THEN_ARG: 137 | code += code[1]; 138 | break; 139 | } 140 | 141 | /* Add in the fixed length from the table */ 142 | 143 | code += PRIV(OP_lengths)[c]; 144 | 145 | /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be 146 | followed by a multi-byte character. The length in the table is a minimum, so 147 | we have to arrange to skip the extra bytes. */ 148 | 149 | #ifdef MAYBE_UTF_MULTI 150 | if (utf) switch(c) 151 | { 152 | case OP_CHAR: 153 | case OP_CHARI: 154 | case OP_NOT: 155 | case OP_NOTI: 156 | case OP_EXACT: 157 | case OP_EXACTI: 158 | case OP_NOTEXACT: 159 | case OP_NOTEXACTI: 160 | case OP_UPTO: 161 | case OP_UPTOI: 162 | case OP_NOTUPTO: 163 | case OP_NOTUPTOI: 164 | case OP_MINUPTO: 165 | case OP_MINUPTOI: 166 | case OP_NOTMINUPTO: 167 | case OP_NOTMINUPTOI: 168 | case OP_POSUPTO: 169 | case OP_POSUPTOI: 170 | case OP_NOTPOSUPTO: 171 | case OP_NOTPOSUPTOI: 172 | case OP_STAR: 173 | case OP_STARI: 174 | case OP_NOTSTAR: 175 | case OP_NOTSTARI: 176 | case OP_MINSTAR: 177 | case OP_MINSTARI: 178 | case OP_NOTMINSTAR: 179 | case OP_NOTMINSTARI: 180 | case OP_POSSTAR: 181 | case OP_POSSTARI: 182 | case OP_NOTPOSSTAR: 183 | case OP_NOTPOSSTARI: 184 | case OP_PLUS: 185 | case OP_PLUSI: 186 | case OP_NOTPLUS: 187 | case OP_NOTPLUSI: 188 | case OP_MINPLUS: 189 | case OP_MINPLUSI: 190 | case OP_NOTMINPLUS: 191 | case OP_NOTMINPLUSI: 192 | case OP_POSPLUS: 193 | case OP_POSPLUSI: 194 | case OP_NOTPOSPLUS: 195 | case OP_NOTPOSPLUSI: 196 | case OP_QUERY: 197 | case OP_QUERYI: 198 | case OP_NOTQUERY: 199 | case OP_NOTQUERYI: 200 | case OP_MINQUERY: 201 | case OP_MINQUERYI: 202 | case OP_NOTMINQUERY: 203 | case OP_NOTMINQUERYI: 204 | case OP_POSQUERY: 205 | case OP_POSQUERYI: 206 | case OP_NOTPOSQUERY: 207 | case OP_NOTPOSQUERYI: 208 | if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); 209 | break; 210 | } 211 | #else 212 | (void)(utf); /* Keep compiler happy by referencing function argument */ 213 | #endif /* MAYBE_UTF_MULTI */ 214 | } 215 | } 216 | } 217 | 218 | /* End of pcre2_find_bracket.c */ 219 | -------------------------------------------------------------------------------- /pcre2/pcre2_jit_match.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | #ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE 42 | #error This file must be included from pcre2_jit_compile.c. 43 | #endif 44 | 45 | #ifdef SUPPORT_JIT 46 | 47 | #ifdef __KERNEL__ 48 | #include 49 | #include 50 | 51 | #if defined(SLJIT_CONFIG_X86_32) || defined(SLJIT_CONFIG_X86_64) 52 | 53 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) 54 | #include 55 | #else 56 | #include 57 | #endif /* LINUX_VERSION_CODE */ 58 | 59 | #endif 60 | extern struct kmem_cache *local_space_cache; 61 | static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func) 62 | { 63 | struct sljit_stack local_stack; 64 | int r; 65 | sljit_u8 *local_space = kmem_cache_alloc(local_space_cache, GFP_ATOMIC); 66 | 67 | if (unlikely(local_space == NULL)) 68 | panic("%s: Out of memory", __func__); 69 | 70 | local_stack.top = (sljit_sw)local_space; 71 | local_stack.base = local_stack.top; 72 | local_stack.limit = local_stack.base + MACHINE_STACK_SIZE; 73 | local_stack.max_limit = local_stack.limit; 74 | arguments->stack = &local_stack; 75 | r = executable_func(arguments); 76 | kmem_cache_free(local_space_cache, local_space); 77 | return r; 78 | } 79 | #else 80 | static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func) 81 | { 82 | sljit_u8 local_space[MACHINE_STACK_SIZE]; 83 | struct sljit_stack local_stack; 84 | 85 | local_stack.top = (sljit_sw)&local_space; 86 | local_stack.base = local_stack.top; 87 | local_stack.limit = local_stack.base + MACHINE_STACK_SIZE; 88 | local_stack.max_limit = local_stack.limit; 89 | arguments->stack = &local_stack; 90 | return executable_func(arguments); 91 | } 92 | #endif /* __KERNEL__ */ 93 | 94 | #endif 95 | 96 | 97 | /************************************************* 98 | * Do a JIT pattern match * 99 | *************************************************/ 100 | 101 | /* This function runs a JIT pattern match. 102 | 103 | Arguments: 104 | code points to the compiled expression 105 | subject points to the subject string 106 | length length of subject string (may contain binary zeros) 107 | start_offset where to start in the subject string 108 | options option bits 109 | match_data points to a match_data block 110 | mcontext points to a match context 111 | jit_stack points to a JIT stack 112 | 113 | Returns: > 0 => success; value is the number of ovector pairs filled 114 | = 0 => success, but ovector is not big enough 115 | -1 => failed to match (PCRE_ERROR_NOMATCH) 116 | < -1 => some kind of unexpected problem 117 | */ 118 | 119 | PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 120 | pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, 121 | PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, 122 | pcre2_match_context *mcontext) 123 | { 124 | #ifndef SUPPORT_JIT 125 | 126 | (void)code; 127 | (void)subject; 128 | (void)length; 129 | (void)start_offset; 130 | (void)options; 131 | (void)match_data; 132 | (void)mcontext; 133 | return PCRE2_ERROR_JIT_BADOPTION; 134 | 135 | #else /* SUPPORT_JIT */ 136 | 137 | pcre2_real_code *re = (pcre2_real_code *)code; 138 | executable_functions *functions = (executable_functions *)re->executable_jit; 139 | pcre2_jit_stack *jit_stack; 140 | uint32_t oveccount = match_data->oveccount; 141 | uint32_t max_oveccount; 142 | union { 143 | void *executable_func; 144 | jit_function call_executable_func; 145 | } convert_executable_func; 146 | jit_arguments arguments; 147 | int rc; 148 | int index = 0; 149 | 150 | if ((options & PCRE2_PARTIAL_HARD) != 0) 151 | index = 2; 152 | else if ((options & PCRE2_PARTIAL_SOFT) != 0) 153 | index = 1; 154 | 155 | if (functions->executable_funcs[index] == NULL) 156 | return PCRE2_ERROR_JIT_BADOPTION; 157 | 158 | /* Sanity checks should be handled by pcre_exec. */ 159 | arguments.str = subject + start_offset; 160 | arguments.begin = subject; 161 | arguments.end = subject + length; 162 | arguments.match_data = match_data; 163 | arguments.startchar_ptr = subject; 164 | arguments.mark_ptr = NULL; 165 | arguments.options = options; 166 | 167 | if (mcontext != NULL) 168 | { 169 | arguments.callout = mcontext->callout; 170 | arguments.callout_data = mcontext->callout_data; 171 | arguments.offset_limit = mcontext->offset_limit; 172 | arguments.limit_match = (mcontext->match_limit < re->limit_match)? 173 | mcontext->match_limit : re->limit_match; 174 | if (mcontext->jit_callback != NULL) 175 | jit_stack = mcontext->jit_callback(mcontext->jit_callback_data); 176 | else 177 | jit_stack = (pcre2_jit_stack *)mcontext->jit_callback_data; 178 | } 179 | else 180 | { 181 | arguments.callout = NULL; 182 | arguments.callout_data = NULL; 183 | arguments.offset_limit = PCRE2_UNSET; 184 | arguments.limit_match = (MATCH_LIMIT < re->limit_match)? 185 | MATCH_LIMIT : re->limit_match; 186 | jit_stack = NULL; 187 | } 188 | 189 | /* JIT only need two offsets for each ovector entry. Hence 190 | the last 1/3 of the ovector will never be touched. */ 191 | 192 | max_oveccount = functions->top_bracket; 193 | if (oveccount > max_oveccount) 194 | oveccount = max_oveccount; 195 | arguments.oveccount = oveccount << 1; 196 | 197 | 198 | convert_executable_func.executable_func = functions->executable_funcs[index]; 199 | 200 | #ifdef __KERNEL__ 201 | #if defined(SLJIT_CONFIG_X86_32) || defined(SLJIT_CONFIG_X86_64) 202 | kernel_fpu_begin(); 203 | #endif 204 | #endif 205 | 206 | if (jit_stack != NULL) 207 | { 208 | arguments.stack = (struct sljit_stack *)(jit_stack->stack); 209 | rc = convert_executable_func.call_executable_func(&arguments); 210 | } 211 | else 212 | rc = jit_machine_stack_exec(&arguments, convert_executable_func.call_executable_func); 213 | 214 | #ifdef __KERNEL__ 215 | #if defined(SLJIT_CONFIG_X86_32) || defined(SLJIT_CONFIG_X86_64) 216 | kernel_fpu_end(); 217 | #endif 218 | #endif 219 | 220 | if (rc > (int)oveccount) 221 | rc = 0; 222 | match_data->code = re; 223 | match_data->subject = subject; 224 | match_data->rc = rc; 225 | match_data->startchar = arguments.startchar_ptr - subject; 226 | match_data->leftchar = 0; 227 | match_data->rightchar = 0; 228 | match_data->mark = arguments.mark_ptr; 229 | match_data->matchedby = PCRE2_MATCHEDBY_JIT; 230 | 231 | return match_data->rc; 232 | 233 | #endif /* SUPPORT_JIT */ 234 | } 235 | #ifdef __KERNEL__ 236 | EXPORT_SYMBOL(pcre2_jit_match); 237 | #endif 238 | 239 | /* End of pcre2_jit_match.c */ 240 | -------------------------------------------------------------------------------- /pcre2/pcre2_jit_misc.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | #ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE 43 | #error This file must be included from pcre2_jit_compile.c. 44 | #endif 45 | 46 | 47 | 48 | /************************************************* 49 | * Free JIT read-only data * 50 | *************************************************/ 51 | 52 | void 53 | PRIV(jit_free_rodata)(void *cur, void *allocator_data) 54 | { 55 | #ifndef SUPPORT_JIT 56 | (void)cur; 57 | (void)allocator_data; 58 | #else /* SUPPORT_JIT */ 59 | void *next; 60 | 61 | SLJIT_UNUSED_ARG(allocator_data); 62 | 63 | while (cur != NULL) 64 | { 65 | next = *(void**)cur; 66 | SLJIT_FREE(cur, allocator_data); 67 | cur = next; 68 | } 69 | 70 | #endif /* SUPPORT_JIT */ 71 | } 72 | 73 | /************************************************* 74 | * Free JIT compiled code * 75 | *************************************************/ 76 | 77 | void 78 | PRIV(jit_free)(void *executable_jit, pcre2_memctl *memctl) 79 | { 80 | #ifndef SUPPORT_JIT 81 | (void)executable_jit; 82 | (void)memctl; 83 | #else /* SUPPORT_JIT */ 84 | 85 | executable_functions *functions = (executable_functions *)executable_jit; 86 | void *allocator_data = memctl; 87 | int i; 88 | 89 | for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) 90 | { 91 | if (functions->executable_funcs[i] != NULL) 92 | sljit_free_code(functions->executable_funcs[i]); 93 | PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data); 94 | } 95 | 96 | SLJIT_FREE(functions, allocator_data); 97 | 98 | #endif /* SUPPORT_JIT */ 99 | } 100 | 101 | 102 | /************************************************* 103 | * Free unused JIT memory * 104 | *************************************************/ 105 | 106 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION 107 | pcre2_jit_free_unused_memory(pcre2_general_context *gcontext) 108 | { 109 | #ifndef SUPPORT_JIT 110 | (void)gcontext; /* Suppress warning */ 111 | #else /* SUPPORT_JIT */ 112 | SLJIT_UNUSED_ARG(gcontext); 113 | sljit_free_unused_memory_exec(); 114 | #endif /* SUPPORT_JIT */ 115 | } 116 | #ifdef __KERNEL__ 117 | EXPORT_SYMBOL(pcre2_jit_free_unused_memory); 118 | #endif 119 | 120 | 121 | 122 | /************************************************* 123 | * Allocate a JIT stack * 124 | *************************************************/ 125 | 126 | PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION 127 | pcre2_jit_stack_create(size_t startsize, size_t maxsize, 128 | pcre2_general_context *gcontext) 129 | { 130 | #ifndef SUPPORT_JIT 131 | 132 | (void)gcontext; 133 | (void)startsize; 134 | (void)maxsize; 135 | return NULL; 136 | 137 | #else /* SUPPORT_JIT */ 138 | 139 | pcre2_jit_stack *jit_stack; 140 | 141 | if (startsize < 1 || maxsize < 1) 142 | return NULL; 143 | if (startsize > maxsize) 144 | startsize = maxsize; 145 | startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); 146 | maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); 147 | 148 | jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext); 149 | if (jit_stack == NULL) return NULL; 150 | jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl); 151 | return jit_stack; 152 | 153 | #endif 154 | } 155 | #ifdef __KERNEL__ 156 | EXPORT_SYMBOL(pcre2_jit_stack_create); 157 | #endif 158 | 159 | 160 | /************************************************* 161 | * Assign a JIT stack to a pattern * 162 | *************************************************/ 163 | 164 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION 165 | pcre2_jit_stack_assign(pcre2_match_context *mcontext, pcre2_jit_callback callback, 166 | void *callback_data) 167 | { 168 | #ifndef SUPPORT_JIT 169 | (void)mcontext; 170 | (void)callback; 171 | (void)callback_data; 172 | #else /* SUPPORT_JIT */ 173 | 174 | if (mcontext == NULL) return; 175 | mcontext->jit_callback = callback; 176 | mcontext->jit_callback_data = callback_data; 177 | 178 | #endif /* SUPPORT_JIT */ 179 | } 180 | #ifdef __KERNEL__ 181 | EXPORT_SYMBOL(pcre2_jit_stack_assign); 182 | #endif 183 | 184 | 185 | /************************************************* 186 | * Free a JIT stack * 187 | *************************************************/ 188 | 189 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION 190 | pcre2_jit_stack_free(pcre2_jit_stack *jit_stack) 191 | { 192 | #ifndef SUPPORT_JIT 193 | (void)jit_stack; 194 | #else /* SUPPORT_JIT */ 195 | if (jit_stack != NULL) 196 | { 197 | sljit_free_stack((struct sljit_stack *)(jit_stack->stack), &jit_stack->memctl); 198 | jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data); 199 | } 200 | #endif /* SUPPORT_JIT */ 201 | } 202 | #ifdef __KERNEL__ 203 | EXPORT_SYMBOL(pcre2_jit_stack_free); 204 | #endif 205 | 206 | /************************************************* 207 | * Get target CPU type * 208 | *************************************************/ 209 | 210 | const char* 211 | PRIV(jit_get_target)(void) 212 | { 213 | #ifndef SUPPORT_JIT 214 | return "JIT is not supported"; 215 | #else /* SUPPORT_JIT */ 216 | return sljit_get_platform_name(); 217 | #endif /* SUPPORT_JIT */ 218 | } 219 | 220 | 221 | /************************************************* 222 | * Get size of JIT code * 223 | *************************************************/ 224 | 225 | size_t 226 | PRIV(jit_get_size)(void *executable_jit) 227 | { 228 | #ifndef SUPPORT_JIT 229 | (void)executable_jit; 230 | return 0; 231 | #else /* SUPPORT_JIT */ 232 | sljit_uw *executable_sizes = ((executable_functions *)executable_jit)->executable_sizes; 233 | SLJIT_COMPILE_ASSERT(JIT_NUMBER_OF_COMPILE_MODES == 3, number_of_compile_modes_changed); 234 | return executable_sizes[0] + executable_sizes[1] + executable_sizes[2]; 235 | #endif 236 | } 237 | 238 | /* End of pcre2_jit_misc.c */ 239 | -------------------------------------------------------------------------------- /pcre2/pcre2_maketables.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | /* This module contains the external function pcre2_maketables(), which builds 43 | character tables for PCRE2 in the current locale. The file is compiled on its 44 | own as part of the PCRE2 library. However, it is also included in the 45 | compilation of dftables.c, in which case the macro DFTABLES is defined. */ 46 | 47 | #ifndef DFTABLES 48 | # ifdef HAVE_CONFIG_H 49 | # include "config.h" 50 | # endif 51 | # include "pcre2_internal.h" 52 | #endif 53 | 54 | 55 | 56 | /************************************************* 57 | * Create PCRE2 character tables * 58 | *************************************************/ 59 | 60 | /* This function builds a set of character tables for use by PCRE2 and returns 61 | a pointer to them. They are build using the ctype functions, and consequently 62 | their contents will depend upon the current locale setting. When compiled as 63 | part of the library, the store is obtained via a general context malloc, if 64 | supplied, but when DFTABLES is defined (when compiling the dftables auxiliary 65 | program) malloc() is used, and the function has a different name so as not to 66 | clash with the prototype in pcre2.h. 67 | 68 | Arguments: none when DFTABLES is defined 69 | else a PCRE2 general context or NULL 70 | Returns: pointer to the contiguous block of data 71 | */ 72 | 73 | #ifdef DFTABLES /* Included in freestanding dftables.c program */ 74 | static const uint8_t *maketables(void) 75 | { 76 | uint8_t *yield = (uint8_t *)malloc(tables_length); 77 | 78 | #else /* Not DFTABLES, compiling the library */ 79 | PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION 80 | pcre2_maketables(pcre2_general_context *gcontext) 81 | { 82 | uint8_t *yield = (uint8_t *)((gcontext != NULL)? 83 | gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) : 84 | malloc(tables_length)); 85 | #endif /* DFTABLES */ 86 | 87 | int i; 88 | uint8_t *p; 89 | 90 | if (yield == NULL) return NULL; 91 | p = yield; 92 | 93 | /* First comes the lower casing table */ 94 | 95 | for (i = 0; i < 256; i++) *p++ = tolower(i); 96 | 97 | /* Next the case-flipping table */ 98 | 99 | for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); 100 | 101 | /* Then the character class tables. Don't try to be clever and save effort on 102 | exclusive ones - in some locales things may be different. 103 | 104 | Note that the table for "space" includes everything "isspace" gives, including 105 | VT in the default locale. This makes it work for the POSIX class [:space:]. 106 | From release 8.34 is is also correct for Perl space, because Perl added VT at 107 | release 5.18. 108 | 109 | Note also that it is possible for a character to be alnum or alpha without 110 | being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the 111 | fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must 112 | test for alnum specially. */ 113 | 114 | memset(p, 0, cbit_length); 115 | for (i = 0; i < 256; i++) 116 | { 117 | if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7); 118 | if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7); 119 | if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7); 120 | if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7); 121 | if (i == '_') p[cbit_word + i/8] |= 1 << (i&7); 122 | if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7); 123 | if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7); 124 | if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7); 125 | if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7); 126 | if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7); 127 | if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7); 128 | } 129 | p += cbit_length; 130 | 131 | /* Finally, the character type table. In this, we used to exclude VT from the 132 | white space chars, because Perl didn't recognize it as such for \s and for 133 | comments within regexes. However, Perl changed at release 5.18, so PCRE changed 134 | at release 8.34. */ 135 | 136 | for (i = 0; i < 256; i++) 137 | { 138 | int x = 0; 139 | if (isspace(i)) x += ctype_space; 140 | if (isalpha(i)) x += ctype_letter; 141 | if (isdigit(i)) x += ctype_digit; 142 | if (isxdigit(i)) x += ctype_xdigit; 143 | if (isalnum(i) || i == '_') x += ctype_word; 144 | 145 | /* Note: strchr includes the terminating zero in the characters it considers. 146 | In this instance, that is ok because we want binary zero to be flagged as a 147 | meta-character, which in this sense is any character that terminates a run 148 | of data characters. */ 149 | 150 | if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta; 151 | *p++ = x; 152 | } 153 | 154 | return yield; 155 | } 156 | #ifdef __KERNEL__ 157 | EXPORT_SYMBOL(pcre2_maketables); 158 | #endif 159 | 160 | /* End of pcre2_maketables.c */ 161 | -------------------------------------------------------------------------------- /pcre2/pcre2_match_data.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | #ifdef HAVE_CONFIG_H 43 | #include "config.h" 44 | #endif 45 | 46 | #include "pcre2_internal.h" 47 | 48 | 49 | 50 | /************************************************* 51 | * Create a match data block given ovector size * 52 | *************************************************/ 53 | 54 | /* A minimum of 1 is imposed on the number of ovector triplets. */ 55 | 56 | PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION 57 | pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext) 58 | { 59 | pcre2_match_data *yield; 60 | if (oveccount < 1) oveccount = 1; 61 | yield = PRIV(memctl_malloc)( 62 | sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_SIZE), 63 | (pcre2_memctl *)gcontext); 64 | if (yield == NULL) return NULL; 65 | yield->oveccount = oveccount; 66 | return yield; 67 | } 68 | #ifdef __KERNEL__ 69 | EXPORT_SYMBOL(pcre2_match_data_create); 70 | #endif 71 | 72 | 73 | 74 | /************************************************* 75 | * Create a match data block using pattern data * 76 | *************************************************/ 77 | 78 | /* If no context is supplied, use the memory allocator from the code. */ 79 | 80 | PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION 81 | pcre2_match_data_create_from_pattern(const pcre2_code *code, 82 | pcre2_general_context *gcontext) 83 | { 84 | if (gcontext == NULL) gcontext = (pcre2_general_context *)code; 85 | return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1, 86 | gcontext); 87 | } 88 | #ifdef __KERNEL__ 89 | EXPORT_SYMBOL(pcre2_match_data_create_from_pattern); 90 | #endif 91 | 92 | 93 | 94 | /************************************************* 95 | * Free a match data block * 96 | *************************************************/ 97 | 98 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION 99 | pcre2_match_data_free(pcre2_match_data *match_data) 100 | { 101 | if (match_data != NULL) 102 | match_data->memctl.free(match_data, match_data->memctl.memory_data); 103 | } 104 | #ifdef __KERNEL__ 105 | EXPORT_SYMBOL(pcre2_match_data_free); 106 | #endif 107 | 108 | 109 | 110 | /************************************************* 111 | * Get last mark in match * 112 | *************************************************/ 113 | 114 | PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION 115 | pcre2_get_mark(pcre2_match_data *match_data) 116 | { 117 | return match_data->mark; 118 | } 119 | #ifdef __KERNEL__ 120 | EXPORT_SYMBOL(pcre2_get_mark); 121 | #endif 122 | 123 | 124 | 125 | /************************************************* 126 | * Get pointer to ovector * 127 | *************************************************/ 128 | 129 | PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION 130 | pcre2_get_ovector_pointer(pcre2_match_data *match_data) 131 | { 132 | return match_data->ovector; 133 | } 134 | #ifdef __KERNEL__ 135 | EXPORT_SYMBOL(pcre2_get_ovector_pointer); 136 | #endif 137 | 138 | 139 | 140 | /************************************************* 141 | * Get number of ovector slots * 142 | *************************************************/ 143 | 144 | PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION 145 | pcre2_get_ovector_count(pcre2_match_data *match_data) 146 | { 147 | return match_data->oveccount; 148 | } 149 | #ifdef __KERNEL__ 150 | EXPORT_SYMBOL(pcre2_get_ovector_count); 151 | #endif 152 | 153 | 154 | 155 | /************************************************* 156 | * Get starting code unit in match * 157 | *************************************************/ 158 | 159 | PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION 160 | pcre2_get_startchar(pcre2_match_data *match_data) 161 | { 162 | return match_data->startchar; 163 | } 164 | #ifdef __KERNEL__ 165 | EXPORT_SYMBOL(pcre2_get_startchar); 166 | #endif 167 | 168 | /* End of pcre2_match_data.c */ 169 | -------------------------------------------------------------------------------- /pcre2/pcre2_newline.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | /* This module contains internal functions for testing newlines when more than 43 | one kind of newline is to be recognized. When a newline is found, its length is 44 | returned. In principle, we could implement several newline "types", each 45 | referring to a different set of newline characters. At present, PCRE2 supports 46 | only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, 47 | and NLTYPE_ANY. The full list of Unicode newline characters is taken from 48 | http://unicode.org/unicode/reports/tr18/. */ 49 | 50 | 51 | #ifdef HAVE_CONFIG_H 52 | #include "config.h" 53 | #endif 54 | 55 | #include "pcre2_internal.h" 56 | 57 | 58 | 59 | /************************************************* 60 | * Check for newline at given position * 61 | *************************************************/ 62 | 63 | /* This function is called only via the IS_NEWLINE macro, which does so only 64 | when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed 65 | newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit 66 | pointed to by ptr is less than the end of the string. 67 | 68 | Arguments: 69 | ptr pointer to possible newline 70 | type the newline type 71 | endptr pointer to the end of the string 72 | lenptr where to return the length 73 | utf TRUE if in utf mode 74 | 75 | Returns: TRUE or FALSE 76 | */ 77 | 78 | BOOL 79 | PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr, 80 | uint32_t *lenptr, BOOL utf) 81 | { 82 | uint32_t c; 83 | 84 | #ifdef SUPPORT_UNICODE 85 | if (utf) { GETCHAR(c, ptr); } else c = *ptr; 86 | #else 87 | (void)utf; 88 | c = *ptr; 89 | #endif /* SUPPORT_UNICODE */ 90 | 91 | if (type == NLTYPE_ANYCRLF) switch(c) 92 | { 93 | case CHAR_LF: 94 | *lenptr = 1; 95 | return TRUE; 96 | 97 | case CHAR_CR: 98 | *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; 99 | return TRUE; 100 | 101 | default: 102 | return FALSE; 103 | } 104 | 105 | /* NLTYPE_ANY */ 106 | 107 | else switch(c) 108 | { 109 | #ifdef EBCDIC 110 | case CHAR_NEL: 111 | #endif 112 | case CHAR_LF: 113 | case CHAR_VT: 114 | case CHAR_FF: 115 | *lenptr = 1; 116 | return TRUE; 117 | 118 | case CHAR_CR: 119 | *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; 120 | return TRUE; 121 | 122 | #ifndef EBCDIC 123 | #if PCRE2_CODE_UNIT_WIDTH == 8 124 | case CHAR_NEL: 125 | *lenptr = utf? 2 : 1; 126 | return TRUE; 127 | 128 | case 0x2028: /* LS */ 129 | case 0x2029: /* PS */ 130 | *lenptr = 3; 131 | return TRUE; 132 | 133 | #else /* 16-bit or 32-bit code units */ 134 | case CHAR_NEL: 135 | case 0x2028: /* LS */ 136 | case 0x2029: /* PS */ 137 | *lenptr = 1; 138 | return TRUE; 139 | #endif 140 | #endif /* Not EBCDIC */ 141 | 142 | default: 143 | return FALSE; 144 | } 145 | } 146 | 147 | 148 | 149 | /************************************************* 150 | * Check for newline at previous position * 151 | *************************************************/ 152 | 153 | /* This function is called only via the WAS_NEWLINE macro, which does so only 154 | when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed 155 | newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial 156 | value of ptr is greater than the start of the string that is being processed. 157 | 158 | Arguments: 159 | ptr pointer to possible newline 160 | type the newline type 161 | startptr pointer to the start of the string 162 | lenptr where to return the length 163 | utf TRUE if in utf mode 164 | 165 | Returns: TRUE or FALSE 166 | */ 167 | 168 | BOOL 169 | PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr, 170 | uint32_t *lenptr, BOOL utf) 171 | { 172 | uint32_t c; 173 | ptr--; 174 | 175 | #ifdef SUPPORT_UNICODE 176 | if (utf) 177 | { 178 | BACKCHAR(ptr); 179 | GETCHAR(c, ptr); 180 | } 181 | else c = *ptr; 182 | #else 183 | (void)utf; 184 | c = *ptr; 185 | #endif /* SUPPORT_UNICODE */ 186 | 187 | if (type == NLTYPE_ANYCRLF) switch(c) 188 | { 189 | case CHAR_LF: 190 | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; 191 | return TRUE; 192 | 193 | case CHAR_CR: 194 | *lenptr = 1; 195 | return TRUE; 196 | 197 | default: 198 | return FALSE; 199 | } 200 | 201 | /* NLTYPE_ANY */ 202 | 203 | else switch(c) 204 | { 205 | case CHAR_LF: 206 | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; 207 | return TRUE; 208 | 209 | #ifdef EBCDIC 210 | case CHAR_NEL: 211 | #endif 212 | case CHAR_VT: 213 | case CHAR_FF: 214 | case CHAR_CR: 215 | *lenptr = 1; 216 | return TRUE; 217 | 218 | #ifndef EBCDIC 219 | #if PCRE2_CODE_UNIT_WIDTH == 8 220 | case CHAR_NEL: 221 | *lenptr = utf? 2 : 1; 222 | return TRUE; 223 | 224 | case 0x2028: /* LS */ 225 | case 0x2029: /* PS */ 226 | *lenptr = 3; 227 | return TRUE; 228 | 229 | #else /* 16-bit or 32-bit code units */ 230 | case CHAR_NEL: 231 | case 0x2028: /* LS */ 232 | case 0x2029: /* PS */ 233 | *lenptr = 1; 234 | return TRUE; 235 | #endif 236 | #endif /* Not EBCDIC */ 237 | 238 | default: 239 | return FALSE; 240 | } 241 | } 242 | 243 | /* End of pcre2_newline.c */ 244 | -------------------------------------------------------------------------------- /pcre2/pcre2_ord2utf.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | /* This file contains a function that converts a Unicode character code point 43 | into a UTF string. The behaviour is different for each code unit width. */ 44 | 45 | 46 | #ifdef HAVE_CONFIG_H 47 | #include "config.h" 48 | #endif 49 | 50 | #include "pcre2_internal.h" 51 | 52 | 53 | /* If SUPPORT_UNICODE is not defined, this function will never be called. 54 | Supply a dummy function because some compilers do not like empty source 55 | modules. */ 56 | 57 | #ifndef SUPPORT_UNICODE 58 | unsigned int 59 | PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer) 60 | { 61 | (void)(cvalue); 62 | (void)(buffer); 63 | return 0; 64 | } 65 | #else /* SUPPORT_UNICODE */ 66 | 67 | 68 | /************************************************* 69 | * Convert code point to UTF * 70 | *************************************************/ 71 | 72 | /* 73 | Arguments: 74 | cvalue the character value 75 | buffer pointer to buffer for result 76 | 77 | Returns: number of code units placed in the buffer 78 | */ 79 | 80 | unsigned int 81 | PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer) 82 | { 83 | /* Convert to UTF-8 */ 84 | 85 | #if PCRE2_CODE_UNIT_WIDTH == 8 86 | int i, j; 87 | for (i = 0; i < PRIV(utf8_table1_size); i++) 88 | if ((int)cvalue <= PRIV(utf8_table1)[i]) break; 89 | buffer += i; 90 | for (j = i; j > 0; j--) 91 | { 92 | *buffer-- = 0x80 | (cvalue & 0x3f); 93 | cvalue >>= 6; 94 | } 95 | *buffer = PRIV(utf8_table2)[i] | cvalue; 96 | return i + 1; 97 | 98 | /* Convert to UTF-16 */ 99 | 100 | #elif PCRE2_CODE_UNIT_WIDTH == 16 101 | if (cvalue <= 0xffff) 102 | { 103 | *buffer = (PCRE2_UCHAR)cvalue; 104 | return 1; 105 | } 106 | cvalue -= 0x10000; 107 | *buffer++ = 0xd800 | (cvalue >> 10); 108 | *buffer = 0xdc00 | (cvalue & 0x3ff); 109 | return 2; 110 | 111 | /* Convert to UTF-32 */ 112 | 113 | #else 114 | *buffer = (PCRE2_UCHAR)cvalue; 115 | return 1; 116 | #endif 117 | } 118 | #endif /* SUPPORT_UNICODE */ 119 | 120 | /* End of pcre_ord2utf.c */ 121 | -------------------------------------------------------------------------------- /pcre2/pcre2_pattern_info.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | #ifdef HAVE_CONFIG_H 43 | #include "config.h" 44 | #endif 45 | 46 | #include "pcre2_internal.h" 47 | 48 | 49 | /************************************************* 50 | * Return info about compiled pattern * 51 | *************************************************/ 52 | 53 | /* 54 | Arguments: 55 | code points to compiled code 56 | what what information is required 57 | where where to put the information; if NULL, return length 58 | 59 | Returns: 0 when data returned 60 | > 0 when length requested 61 | < 0 on error or unset value 62 | */ 63 | 64 | PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 65 | pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where) 66 | { 67 | const pcre2_real_code *re = (pcre2_real_code *)code; 68 | 69 | if (where == NULL) /* Requests field length */ 70 | { 71 | switch(what) 72 | { 73 | case PCRE2_INFO_ALLOPTIONS: 74 | case PCRE2_INFO_ARGOPTIONS: 75 | case PCRE2_INFO_BACKREFMAX: 76 | case PCRE2_INFO_BSR: 77 | case PCRE2_INFO_CAPTURECOUNT: 78 | case PCRE2_INFO_FIRSTCODETYPE: 79 | case PCRE2_INFO_FIRSTCODEUNIT: 80 | case PCRE2_INFO_HASBACKSLASHC: 81 | case PCRE2_INFO_HASCRORLF: 82 | case PCRE2_INFO_JCHANGED: 83 | case PCRE2_INFO_LASTCODETYPE: 84 | case PCRE2_INFO_LASTCODEUNIT: 85 | case PCRE2_INFO_MATCHEMPTY: 86 | case PCRE2_INFO_MATCHLIMIT: 87 | case PCRE2_INFO_MAXLOOKBEHIND: 88 | case PCRE2_INFO_MINLENGTH: 89 | case PCRE2_INFO_NAMEENTRYSIZE: 90 | case PCRE2_INFO_NAMECOUNT: 91 | case PCRE2_INFO_NEWLINE: 92 | case PCRE2_INFO_RECURSIONLIMIT: 93 | return sizeof(uint32_t); 94 | 95 | case PCRE2_INFO_FIRSTBITMAP: 96 | return sizeof(const uint8_t *); 97 | 98 | case PCRE2_INFO_JITSIZE: 99 | case PCRE2_INFO_SIZE: 100 | return sizeof(size_t); 101 | 102 | case PCRE2_INFO_NAMETABLE: 103 | return sizeof(PCRE2_SPTR); 104 | } 105 | } 106 | 107 | if (re == NULL) return PCRE2_ERROR_NULL; 108 | 109 | /* Check that the first field in the block is the magic number. If it is not, 110 | return with PCRE2_ERROR_BADMAGIC. */ 111 | 112 | if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; 113 | 114 | /* Check that this pattern was compiled in the correct bit mode */ 115 | 116 | if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE; 117 | 118 | switch(what) 119 | { 120 | case PCRE2_INFO_ALLOPTIONS: 121 | *((uint32_t *)where) = re->overall_options; 122 | break; 123 | 124 | case PCRE2_INFO_ARGOPTIONS: 125 | *((uint32_t *)where) = re->compile_options; 126 | break; 127 | 128 | case PCRE2_INFO_BACKREFMAX: 129 | *((uint32_t *)where) = re->top_backref; 130 | break; 131 | 132 | case PCRE2_INFO_BSR: 133 | *((uint32_t *)where) = re->bsr_convention; 134 | break; 135 | 136 | case PCRE2_INFO_CAPTURECOUNT: 137 | *((uint32_t *)where) = re->top_bracket; 138 | break; 139 | 140 | case PCRE2_INFO_FIRSTCODETYPE: 141 | *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 : 142 | ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0; 143 | break; 144 | 145 | case PCRE2_INFO_FIRSTCODEUNIT: 146 | *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 147 | re->first_codeunit : 0; 148 | break; 149 | 150 | case PCRE2_INFO_FIRSTBITMAP: 151 | *((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)? 152 | &(re->start_bitmap[0]) : NULL; 153 | break; 154 | 155 | case PCRE2_INFO_HASBACKSLASHC: 156 | *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0; 157 | break; 158 | 159 | case PCRE2_INFO_HASCRORLF: 160 | *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0; 161 | break; 162 | 163 | case PCRE2_INFO_JCHANGED: 164 | *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0; 165 | break; 166 | 167 | case PCRE2_INFO_JITSIZE: 168 | #ifdef SUPPORT_JIT 169 | *((size_t *)where) = (re->executable_jit != NULL)? 170 | PRIV(jit_get_size)(re->executable_jit) : 0; 171 | #else 172 | *((size_t *)where) = 0; 173 | #endif 174 | break; 175 | 176 | case PCRE2_INFO_LASTCODETYPE: 177 | *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0; 178 | break; 179 | 180 | case PCRE2_INFO_LASTCODEUNIT: 181 | *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 182 | re->last_codeunit : 0; 183 | break; 184 | 185 | case PCRE2_INFO_MATCHEMPTY: 186 | *((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0; 187 | break; 188 | 189 | case PCRE2_INFO_MATCHLIMIT: 190 | *((uint32_t *)where) = re->limit_match; 191 | if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET; 192 | break; 193 | 194 | case PCRE2_INFO_MAXLOOKBEHIND: 195 | *((uint32_t *)where) = re->max_lookbehind; 196 | break; 197 | 198 | case PCRE2_INFO_MINLENGTH: 199 | *((uint32_t *)where) = re->minlength; 200 | break; 201 | 202 | case PCRE2_INFO_NAMEENTRYSIZE: 203 | *((uint32_t *)where) = re->name_entry_size; 204 | break; 205 | 206 | case PCRE2_INFO_NAMECOUNT: 207 | *((uint32_t *)where) = re->name_count; 208 | break; 209 | 210 | case PCRE2_INFO_NAMETABLE: 211 | *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code)); 212 | break; 213 | 214 | case PCRE2_INFO_NEWLINE: 215 | *((uint32_t *)where) = re->newline_convention; 216 | break; 217 | 218 | case PCRE2_INFO_RECURSIONLIMIT: 219 | *((uint32_t *)where) = re->limit_recursion; 220 | if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET; 221 | break; 222 | 223 | case PCRE2_INFO_SIZE: 224 | *((size_t *)where) = re->blocksize; 225 | break; 226 | 227 | default: return PCRE2_ERROR_BADOPTION; 228 | } 229 | 230 | return 0; 231 | } 232 | #ifdef __KERNEL__ 233 | EXPORT_SYMBOL(pcre2_pattern_info); 234 | #endif 235 | 236 | 237 | 238 | /************************************************* 239 | * Callout enumerator * 240 | *************************************************/ 241 | 242 | /* 243 | Arguments: 244 | code points to compiled code 245 | callback function called for each callout block 246 | callout_data user data passed to the callback 247 | 248 | Returns: 0 when successfully completed 249 | < 0 on local error 250 | != 0 for callback error 251 | */ 252 | 253 | PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 254 | pcre2_callout_enumerate(const pcre2_code *code, 255 | int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data) 256 | { 257 | pcre2_real_code *re = (pcre2_real_code *)code; 258 | pcre2_callout_enumerate_block cb; 259 | PCRE2_SPTR cc; 260 | #ifdef SUPPORT_UNICODE 261 | BOOL utf = (re->overall_options & PCRE2_UTF) != 0; 262 | #endif 263 | 264 | if (re == NULL) return PCRE2_ERROR_NULL; 265 | 266 | /* Check that the first field in the block is the magic number. If it is not, 267 | return with PCRE2_ERROR_BADMAGIC. */ 268 | 269 | if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; 270 | 271 | /* Check that this pattern was compiled in the correct bit mode */ 272 | 273 | if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE; 274 | 275 | cb.version = 0; 276 | cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)) 277 | + re->name_count * re->name_entry_size; 278 | 279 | while (TRUE) 280 | { 281 | int rc; 282 | switch (*cc) 283 | { 284 | case OP_END: 285 | return 0; 286 | 287 | case OP_CHAR: 288 | case OP_CHARI: 289 | case OP_NOT: 290 | case OP_NOTI: 291 | case OP_STAR: 292 | case OP_MINSTAR: 293 | case OP_PLUS: 294 | case OP_MINPLUS: 295 | case OP_QUERY: 296 | case OP_MINQUERY: 297 | case OP_UPTO: 298 | case OP_MINUPTO: 299 | case OP_EXACT: 300 | case OP_POSSTAR: 301 | case OP_POSPLUS: 302 | case OP_POSQUERY: 303 | case OP_POSUPTO: 304 | case OP_STARI: 305 | case OP_MINSTARI: 306 | case OP_PLUSI: 307 | case OP_MINPLUSI: 308 | case OP_QUERYI: 309 | case OP_MINQUERYI: 310 | case OP_UPTOI: 311 | case OP_MINUPTOI: 312 | case OP_EXACTI: 313 | case OP_POSSTARI: 314 | case OP_POSPLUSI: 315 | case OP_POSQUERYI: 316 | case OP_POSUPTOI: 317 | case OP_NOTSTAR: 318 | case OP_NOTMINSTAR: 319 | case OP_NOTPLUS: 320 | case OP_NOTMINPLUS: 321 | case OP_NOTQUERY: 322 | case OP_NOTMINQUERY: 323 | case OP_NOTUPTO: 324 | case OP_NOTMINUPTO: 325 | case OP_NOTEXACT: 326 | case OP_NOTPOSSTAR: 327 | case OP_NOTPOSPLUS: 328 | case OP_NOTPOSQUERY: 329 | case OP_NOTPOSUPTO: 330 | case OP_NOTSTARI: 331 | case OP_NOTMINSTARI: 332 | case OP_NOTPLUSI: 333 | case OP_NOTMINPLUSI: 334 | case OP_NOTQUERYI: 335 | case OP_NOTMINQUERYI: 336 | case OP_NOTUPTOI: 337 | case OP_NOTMINUPTOI: 338 | case OP_NOTEXACTI: 339 | case OP_NOTPOSSTARI: 340 | case OP_NOTPOSPLUSI: 341 | case OP_NOTPOSQUERYI: 342 | case OP_NOTPOSUPTOI: 343 | cc += PRIV(OP_lengths)[*cc]; 344 | #ifdef SUPPORT_UNICODE 345 | if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 346 | #endif 347 | break; 348 | 349 | case OP_TYPESTAR: 350 | case OP_TYPEMINSTAR: 351 | case OP_TYPEPLUS: 352 | case OP_TYPEMINPLUS: 353 | case OP_TYPEQUERY: 354 | case OP_TYPEMINQUERY: 355 | case OP_TYPEUPTO: 356 | case OP_TYPEMINUPTO: 357 | case OP_TYPEEXACT: 358 | case OP_TYPEPOSSTAR: 359 | case OP_TYPEPOSPLUS: 360 | case OP_TYPEPOSQUERY: 361 | case OP_TYPEPOSUPTO: 362 | cc += PRIV(OP_lengths)[*cc]; 363 | #ifdef SUPPORT_UNICODE 364 | if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2; 365 | #endif 366 | break; 367 | 368 | #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 369 | case OP_XCLASS: 370 | cc += GET(cc, 1); 371 | break; 372 | #endif 373 | 374 | case OP_MARK: 375 | case OP_PRUNE_ARG: 376 | case OP_SKIP_ARG: 377 | case OP_THEN_ARG: 378 | cc += PRIV(OP_lengths)[*cc] + cc[1]; 379 | break; 380 | 381 | case OP_CALLOUT: 382 | cb.pattern_position = GET(cc, 1); 383 | cb.next_item_length = GET(cc, 1 + LINK_SIZE); 384 | cb.callout_number = cc[1 + 2*LINK_SIZE]; 385 | cb.callout_string_offset = 0; 386 | cb.callout_string_length = 0; 387 | cb.callout_string = NULL; 388 | rc = callback(&cb, callout_data); 389 | if (rc != 0) return rc; 390 | cc += PRIV(OP_lengths)[*cc]; 391 | break; 392 | 393 | case OP_CALLOUT_STR: 394 | cb.pattern_position = GET(cc, 1); 395 | cb.next_item_length = GET(cc, 1 + LINK_SIZE); 396 | cb.callout_number = 0; 397 | cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE); 398 | cb.callout_string_length = 399 | GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2; 400 | cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1; 401 | rc = callback(&cb, callout_data); 402 | if (rc != 0) return rc; 403 | cc += GET(cc, 1 + 2*LINK_SIZE); 404 | break; 405 | 406 | default: 407 | cc += PRIV(OP_lengths)[*cc]; 408 | break; 409 | } 410 | } 411 | } 412 | #ifdef __KERNEL__ 413 | EXPORT_SYMBOL(pcre2_callout_enumerate); 414 | #endif 415 | 416 | /* End of pcre2_pattern_info.c */ 417 | -------------------------------------------------------------------------------- /pcre2/pcre2_serialize.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | /* This module contains functions for serializing and deserializing 42 | a sequence of compiled codes. */ 43 | 44 | 45 | #ifdef HAVE_CONFIG_H 46 | #include "config.h" 47 | #endif 48 | 49 | 50 | #include "pcre2_internal.h" 51 | 52 | /* Magic number to provide a small check against being handed junk. */ 53 | 54 | #define SERIALIZED_DATA_MAGIC 0x50523253u 55 | 56 | /* Deserialization is limited to the current PCRE version and 57 | character width. */ 58 | 59 | #define SERIALIZED_DATA_VERSION \ 60 | ((PCRE2_MAJOR) | ((PCRE2_MINOR) << 16)) 61 | 62 | #define SERIALIZED_DATA_CONFIG \ 63 | (sizeof(PCRE2_UCHAR) | ((sizeof(void*)) << 8) | ((sizeof(PCRE2_SIZE)) << 16)) 64 | 65 | 66 | 67 | /************************************************* 68 | * Serialize compiled patterns * 69 | *************************************************/ 70 | 71 | PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION 72 | pcre2_serialize_encode(const pcre2_code **codes, int32_t number_of_codes, 73 | uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size, 74 | pcre2_general_context *gcontext) 75 | { 76 | uint8_t *bytes; 77 | uint8_t *dst_bytes; 78 | int32_t i; 79 | PCRE2_SIZE total_size; 80 | const pcre2_real_code *re; 81 | const uint8_t *tables; 82 | pcre2_serialized_data *data; 83 | 84 | const pcre2_memctl *memctl = (gcontext != NULL) ? 85 | &gcontext->memctl : &PRIV(default_compile_context).memctl; 86 | 87 | if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL) 88 | return PCRE2_ERROR_NULL; 89 | 90 | if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA; 91 | 92 | /* Compute total size. */ 93 | total_size = sizeof(pcre2_serialized_data) + tables_length; 94 | tables = NULL; 95 | 96 | for (i = 0; i < number_of_codes; i++) 97 | { 98 | if (codes[i] == NULL) return PCRE2_ERROR_NULL; 99 | re = (const pcre2_real_code *)(codes[i]); 100 | if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; 101 | if (tables == NULL) 102 | tables = re->tables; 103 | else if (tables != re->tables) 104 | return PCRE2_ERROR_MIXEDTABLES; 105 | total_size += re->blocksize; 106 | } 107 | 108 | /* Initialize the byte stream. */ 109 | bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data); 110 | if (bytes == NULL) return PCRE2_ERROR_NOMEMORY; 111 | 112 | /* The controller is stored as a hidden parameter. */ 113 | memcpy(bytes, memctl, sizeof(pcre2_memctl)); 114 | bytes += sizeof(pcre2_memctl); 115 | 116 | data = (pcre2_serialized_data *)bytes; 117 | data->magic = SERIALIZED_DATA_MAGIC; 118 | data->version = SERIALIZED_DATA_VERSION; 119 | data->config = SERIALIZED_DATA_CONFIG; 120 | data->number_of_codes = number_of_codes; 121 | 122 | /* Copy all compiled code data. */ 123 | dst_bytes = bytes + sizeof(pcre2_serialized_data); 124 | memcpy(dst_bytes, tables, tables_length); 125 | dst_bytes += tables_length; 126 | 127 | for (i = 0; i < number_of_codes; i++) 128 | { 129 | re = (const pcre2_real_code *)(codes[i]); 130 | memcpy(dst_bytes, (char *)re, re->blocksize); 131 | dst_bytes += re->blocksize; 132 | } 133 | 134 | *serialized_bytes = bytes; 135 | *serialized_size = total_size; 136 | return number_of_codes; 137 | } 138 | #ifdef __KERNEL__ 139 | EXPORT_SYMBOL(pcre2_serialize_encode); 140 | #endif 141 | 142 | 143 | /************************************************* 144 | * Deserialize compiled patterns * 145 | *************************************************/ 146 | 147 | PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION 148 | pcre2_serialize_decode(pcre2_code **codes, int32_t number_of_codes, 149 | const uint8_t *bytes, pcre2_general_context *gcontext) 150 | { 151 | const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes; 152 | const pcre2_memctl *memctl = (gcontext != NULL) ? 153 | &gcontext->memctl : &PRIV(default_compile_context).memctl; 154 | 155 | const uint8_t *src_bytes; 156 | pcre2_real_code *dst_re; 157 | uint8_t *tables; 158 | int32_t i, j; 159 | 160 | /* Sanity checks. */ 161 | 162 | if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL; 163 | if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA; 164 | if (data->number_of_codes <= 0) return PCRE2_ERROR_BADSERIALIZEDDATA; 165 | if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC; 166 | if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE; 167 | if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE; 168 | 169 | if (number_of_codes > data->number_of_codes) 170 | number_of_codes = data->number_of_codes; 171 | 172 | src_bytes = bytes + sizeof(pcre2_serialized_data); 173 | 174 | /* Decode tables. The reference count for the tables is stored immediately 175 | following them. */ 176 | 177 | tables = memctl->malloc(tables_length + sizeof(PCRE2_SIZE), memctl->memory_data); 178 | if (tables == NULL) return PCRE2_ERROR_NOMEMORY; 179 | 180 | memcpy(tables, src_bytes, tables_length); 181 | *(PCRE2_SIZE *)(tables + tables_length) = number_of_codes; 182 | src_bytes += tables_length; 183 | 184 | /* Decode the byte stream. We must not try to read the size from the compiled 185 | code block in the stream, because it might be unaligned, which causes errors on 186 | hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type 187 | of the blocksize field is given its own name to ensure that it is the same here 188 | as in the block. */ 189 | 190 | for (i = 0; i < number_of_codes; i++) 191 | { 192 | CODE_BLOCKSIZE_TYPE blocksize; 193 | memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize), 194 | sizeof(CODE_BLOCKSIZE_TYPE)); 195 | if (blocksize <= sizeof(pcre2_real_code)) 196 | return PCRE2_ERROR_BADSERIALIZEDDATA; 197 | 198 | /* The allocator provided by gcontext replaces the original one. */ 199 | 200 | dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize, 201 | (pcre2_memctl *)gcontext); 202 | if (dst_re == NULL) 203 | { 204 | memctl->free(tables, memctl->memory_data); 205 | for (j = 0; j < i; j++) 206 | { 207 | memctl->free(codes[j], memctl->memory_data); 208 | codes[j] = NULL; 209 | } 210 | return PCRE2_ERROR_NOMEMORY; 211 | } 212 | 213 | /* The new allocator must be preserved. */ 214 | 215 | memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl), 216 | src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl)); 217 | if (dst_re->magic_number != MAGIC_NUMBER || 218 | dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 || 219 | dst_re->name_count > MAX_NAME_COUNT) 220 | return PCRE2_ERROR_BADSERIALIZEDDATA; 221 | 222 | /* At the moment only one table is supported. */ 223 | 224 | dst_re->tables = tables; 225 | dst_re->executable_jit = NULL; 226 | dst_re->flags |= PCRE2_DEREF_TABLES; 227 | 228 | codes[i] = dst_re; 229 | src_bytes += blocksize; 230 | } 231 | 232 | return number_of_codes; 233 | } 234 | #ifdef __KERNEL__ 235 | EXPORT_SYMBOL(pcre2_serialize_decode); 236 | #endif 237 | 238 | 239 | /************************************************* 240 | * Get the number of serialized patterns * 241 | *************************************************/ 242 | 243 | PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION 244 | pcre2_serialize_get_number_of_codes(const uint8_t *bytes) 245 | { 246 | const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes; 247 | 248 | if (data == NULL) return PCRE2_ERROR_NULL; 249 | if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC; 250 | if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE; 251 | if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE; 252 | 253 | return data->number_of_codes; 254 | } 255 | #ifdef __KERNEL__ 256 | EXPORT_SYMBOL(pcre2_serialize_get_number_of_codes); 257 | #endif 258 | 259 | 260 | /************************************************* 261 | * Free the allocated stream * 262 | *************************************************/ 263 | 264 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION 265 | pcre2_serialize_free(uint8_t *bytes) 266 | { 267 | if (bytes != NULL) 268 | { 269 | pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl)); 270 | memctl->free(memctl, memctl->memory_data); 271 | } 272 | } 273 | #ifdef __KERNEL__ 274 | EXPORT_SYMBOL(pcre2_serialize_free); 275 | #endif 276 | 277 | /* End of pcre2_serialize.c */ 278 | -------------------------------------------------------------------------------- /pcre2/pcre2_string_utils.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | /* This module contains internal functions for comparing and finding the length 42 | of strings. These are used instead of strcmp() etc because the standard 43 | functions work only on 8-bit data. */ 44 | 45 | 46 | #ifdef HAVE_CONFIG_H 47 | #include "config.h" 48 | #endif 49 | 50 | #include "pcre2_internal.h" 51 | 52 | 53 | /************************************************* 54 | * Compare two zero-terminated PCRE2 strings * 55 | *************************************************/ 56 | 57 | /* 58 | Arguments: 59 | str1 first string 60 | str2 second string 61 | 62 | Returns: 0, 1, or -1 63 | */ 64 | 65 | int 66 | PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2) 67 | { 68 | PCRE2_UCHAR c1, c2; 69 | while (*str1 != '\0' || *str2 != '\0') 70 | { 71 | c1 = *str1++; 72 | c2 = *str2++; 73 | if (c1 != c2) return ((c1 > c2) << 1) - 1; 74 | } 75 | return 0; 76 | } 77 | 78 | 79 | /************************************************* 80 | * Compare zero-terminated PCRE2 & 8-bit strings * 81 | *************************************************/ 82 | 83 | /* As the 8-bit string is almost always a literal, its type is specified as 84 | const char *. 85 | 86 | Arguments: 87 | str1 first string 88 | str2 second string 89 | 90 | Returns: 0, 1, or -1 91 | */ 92 | 93 | int 94 | PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2) 95 | { 96 | PCRE2_UCHAR c1, c2; 97 | while (*str1 != '\0' || *str2 != '\0') 98 | { 99 | c1 = *str1++; 100 | c2 = *str2++; 101 | if (c1 != c2) return ((c1 > c2) << 1) - 1; 102 | } 103 | return 0; 104 | } 105 | 106 | 107 | /************************************************* 108 | * Compare two PCRE2 strings, given a length * 109 | *************************************************/ 110 | 111 | /* 112 | Arguments: 113 | str1 first string 114 | str2 second string 115 | len the length 116 | 117 | Returns: 0, 1, or -1 118 | */ 119 | 120 | int 121 | PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len) 122 | { 123 | PCRE2_UCHAR c1, c2; 124 | for (; len > 0; len--) 125 | { 126 | c1 = *str1++; 127 | c2 = *str2++; 128 | if (c1 != c2) return ((c1 > c2) << 1) - 1; 129 | } 130 | return 0; 131 | } 132 | 133 | 134 | /************************************************* 135 | * Compare PCRE2 string to 8-bit string by length * 136 | *************************************************/ 137 | 138 | /* As the 8-bit string is almost always a literal, its type is specified as 139 | const char *. 140 | 141 | Arguments: 142 | str1 first string 143 | str2 second string 144 | len the length 145 | 146 | Returns: 0, 1, or -1 147 | */ 148 | 149 | int 150 | PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len) 151 | { 152 | PCRE2_UCHAR c1, c2; 153 | for (; len > 0; len--) 154 | { 155 | c1 = *str1++; 156 | c2 = *str2++; 157 | if (c1 != c2) return ((c1 > c2) << 1) - 1; 158 | } 159 | return 0; 160 | } 161 | 162 | 163 | /************************************************* 164 | * Find the length of a PCRE2 string * 165 | *************************************************/ 166 | 167 | /* 168 | Argument: the string 169 | Returns: the length 170 | */ 171 | 172 | PCRE2_SIZE 173 | PRIV(strlen)(PCRE2_SPTR str) 174 | { 175 | PCRE2_SIZE c = 0; 176 | while (*str++ != 0) c++; 177 | return c; 178 | } 179 | 180 | 181 | /************************************************* 182 | * Copy 8-bit 0-terminated string to PCRE2 string * 183 | *************************************************/ 184 | 185 | /* Arguments: 186 | str1 buffer to receive the string 187 | str2 8-bit string to be copied 188 | 189 | Returns: the number of code units used (excluding trailing zero) 190 | */ 191 | 192 | PCRE2_SIZE 193 | PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2) 194 | { 195 | PCRE2_UCHAR *t = str1; 196 | while (*str2 != 0) *t++ = *str2++; 197 | *t = 0; 198 | return t - str1; 199 | } 200 | 201 | /* End of pcre2_string_utils.c */ 202 | -------------------------------------------------------------------------------- /pcre2/pcre2_ucp.h: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD 43 | #define PCRE2_UCP_H_IDEMPOTENT_GUARD 44 | 45 | /* This file contains definitions of the property values that are returned by 46 | the UCD access macros. New values that are added for new releases of Unicode 47 | should always be at the end of each enum, for backwards compatibility. 48 | 49 | IMPORTANT: Note also that the specific numeric values of the enums have to be 50 | the same as the values that are generated by the maint/MultiStage2.py script, 51 | where the equivalent property descriptive names are listed in vectors. 52 | 53 | ALSO: The specific values of the first two enums are assumed for the table 54 | called catposstab in pcre2_compile.c. */ 55 | 56 | /* These are the general character categories. */ 57 | 58 | enum { 59 | ucp_C, /* Other */ 60 | ucp_L, /* Letter */ 61 | ucp_M, /* Mark */ 62 | ucp_N, /* Number */ 63 | ucp_P, /* Punctuation */ 64 | ucp_S, /* Symbol */ 65 | ucp_Z /* Separator */ 66 | }; 67 | 68 | /* These are the particular character categories. */ 69 | 70 | enum { 71 | ucp_Cc, /* Control */ 72 | ucp_Cf, /* Format */ 73 | ucp_Cn, /* Unassigned */ 74 | ucp_Co, /* Private use */ 75 | ucp_Cs, /* Surrogate */ 76 | ucp_Ll, /* Lower case letter */ 77 | ucp_Lm, /* Modifier letter */ 78 | ucp_Lo, /* Other letter */ 79 | ucp_Lt, /* Title case letter */ 80 | ucp_Lu, /* Upper case letter */ 81 | ucp_Mc, /* Spacing mark */ 82 | ucp_Me, /* Enclosing mark */ 83 | ucp_Mn, /* Non-spacing mark */ 84 | ucp_Nd, /* Decimal number */ 85 | ucp_Nl, /* Letter number */ 86 | ucp_No, /* Other number */ 87 | ucp_Pc, /* Connector punctuation */ 88 | ucp_Pd, /* Dash punctuation */ 89 | ucp_Pe, /* Close punctuation */ 90 | ucp_Pf, /* Final punctuation */ 91 | ucp_Pi, /* Initial punctuation */ 92 | ucp_Po, /* Other punctuation */ 93 | ucp_Ps, /* Open punctuation */ 94 | ucp_Sc, /* Currency symbol */ 95 | ucp_Sk, /* Modifier symbol */ 96 | ucp_Sm, /* Mathematical symbol */ 97 | ucp_So, /* Other symbol */ 98 | ucp_Zl, /* Line separator */ 99 | ucp_Zp, /* Paragraph separator */ 100 | ucp_Zs /* Space separator */ 101 | }; 102 | 103 | /* These are grapheme break properties. Note that the code for processing them 104 | assumes that the values are less than 16. If more values are added that take 105 | the number to 16 or more, the code will have to be rewritten. */ 106 | 107 | enum { 108 | ucp_gbCR, /* 0 */ 109 | ucp_gbLF, /* 1 */ 110 | ucp_gbControl, /* 2 */ 111 | ucp_gbExtend, /* 3 */ 112 | ucp_gbPrepend, /* 4 */ 113 | ucp_gbSpacingMark, /* 5 */ 114 | ucp_gbL, /* 6 Hangul syllable type L */ 115 | ucp_gbV, /* 7 Hangul syllable type V */ 116 | ucp_gbT, /* 8 Hangul syllable type T */ 117 | ucp_gbLV, /* 9 Hangul syllable type LV */ 118 | ucp_gbLVT, /* 10 Hangul syllable type LVT */ 119 | ucp_gbRegionalIndicator, /* 11 */ 120 | ucp_gbOther /* 12 */ 121 | }; 122 | 123 | /* These are the script identifications. */ 124 | 125 | enum { 126 | ucp_Arabic, 127 | ucp_Armenian, 128 | ucp_Bengali, 129 | ucp_Bopomofo, 130 | ucp_Braille, 131 | ucp_Buginese, 132 | ucp_Buhid, 133 | ucp_Canadian_Aboriginal, 134 | ucp_Cherokee, 135 | ucp_Common, 136 | ucp_Coptic, 137 | ucp_Cypriot, 138 | ucp_Cyrillic, 139 | ucp_Deseret, 140 | ucp_Devanagari, 141 | ucp_Ethiopic, 142 | ucp_Georgian, 143 | ucp_Glagolitic, 144 | ucp_Gothic, 145 | ucp_Greek, 146 | ucp_Gujarati, 147 | ucp_Gurmukhi, 148 | ucp_Han, 149 | ucp_Hangul, 150 | ucp_Hanunoo, 151 | ucp_Hebrew, 152 | ucp_Hiragana, 153 | ucp_Inherited, 154 | ucp_Kannada, 155 | ucp_Katakana, 156 | ucp_Kharoshthi, 157 | ucp_Khmer, 158 | ucp_Lao, 159 | ucp_Latin, 160 | ucp_Limbu, 161 | ucp_Linear_B, 162 | ucp_Malayalam, 163 | ucp_Mongolian, 164 | ucp_Myanmar, 165 | ucp_New_Tai_Lue, 166 | ucp_Ogham, 167 | ucp_Old_Italic, 168 | ucp_Old_Persian, 169 | ucp_Oriya, 170 | ucp_Osmanya, 171 | ucp_Runic, 172 | ucp_Shavian, 173 | ucp_Sinhala, 174 | ucp_Syloti_Nagri, 175 | ucp_Syriac, 176 | ucp_Tagalog, 177 | ucp_Tagbanwa, 178 | ucp_Tai_Le, 179 | ucp_Tamil, 180 | ucp_Telugu, 181 | ucp_Thaana, 182 | ucp_Thai, 183 | ucp_Tibetan, 184 | ucp_Tifinagh, 185 | ucp_Ugaritic, 186 | ucp_Yi, 187 | /* New for Unicode 5.0: */ 188 | ucp_Balinese, 189 | ucp_Cuneiform, 190 | ucp_Nko, 191 | ucp_Phags_Pa, 192 | ucp_Phoenician, 193 | /* New for Unicode 5.1: */ 194 | ucp_Carian, 195 | ucp_Cham, 196 | ucp_Kayah_Li, 197 | ucp_Lepcha, 198 | ucp_Lycian, 199 | ucp_Lydian, 200 | ucp_Ol_Chiki, 201 | ucp_Rejang, 202 | ucp_Saurashtra, 203 | ucp_Sundanese, 204 | ucp_Vai, 205 | /* New for Unicode 5.2: */ 206 | ucp_Avestan, 207 | ucp_Bamum, 208 | ucp_Egyptian_Hieroglyphs, 209 | ucp_Imperial_Aramaic, 210 | ucp_Inscriptional_Pahlavi, 211 | ucp_Inscriptional_Parthian, 212 | ucp_Javanese, 213 | ucp_Kaithi, 214 | ucp_Lisu, 215 | ucp_Meetei_Mayek, 216 | ucp_Old_South_Arabian, 217 | ucp_Old_Turkic, 218 | ucp_Samaritan, 219 | ucp_Tai_Tham, 220 | ucp_Tai_Viet, 221 | /* New for Unicode 6.0.0: */ 222 | ucp_Batak, 223 | ucp_Brahmi, 224 | ucp_Mandaic, 225 | /* New for Unicode 6.1.0: */ 226 | ucp_Chakma, 227 | ucp_Meroitic_Cursive, 228 | ucp_Meroitic_Hieroglyphs, 229 | ucp_Miao, 230 | ucp_Sharada, 231 | ucp_Sora_Sompeng, 232 | ucp_Takri, 233 | /* New for Unicode 7.0.0: */ 234 | ucp_Bassa_Vah, 235 | ucp_Caucasian_Albanian, 236 | ucp_Duployan, 237 | ucp_Elbasan, 238 | ucp_Grantha, 239 | ucp_Khojki, 240 | ucp_Khudawadi, 241 | ucp_Linear_A, 242 | ucp_Mahajani, 243 | ucp_Manichaean, 244 | ucp_Mende_Kikakui, 245 | ucp_Modi, 246 | ucp_Mro, 247 | ucp_Nabataean, 248 | ucp_Old_North_Arabian, 249 | ucp_Old_Permic, 250 | ucp_Pahawh_Hmong, 251 | ucp_Palmyrene, 252 | ucp_Psalter_Pahlavi, 253 | ucp_Pau_Cin_Hau, 254 | ucp_Siddham, 255 | ucp_Tirhuta, 256 | ucp_Warang_Citi, 257 | /* New for Unicode 8.0.0: */ 258 | ucp_Ahom, 259 | ucp_Anatolian_Hieroglyphs, 260 | ucp_Hatran, 261 | ucp_Multani, 262 | ucp_Old_Hungarian, 263 | ucp_SignWriting 264 | }; 265 | 266 | #endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ 267 | 268 | /* End of pcre2_ucp.h */ 269 | -------------------------------------------------------------------------------- /pcre2/pcre2_xclass.c: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | /* This module contains an internal function that is used to match an extended 42 | class. It is used by pcre2_auto_possessify() and by both pcre2_match() and 43 | pcre2_def_match(). */ 44 | 45 | 46 | #ifdef HAVE_CONFIG_H 47 | #include "config.h" 48 | #endif 49 | 50 | 51 | #include "pcre2_internal.h" 52 | 53 | /************************************************* 54 | * Match character against an XCLASS * 55 | *************************************************/ 56 | 57 | /* This function is called to match a character against an extended class that 58 | might contain codepoints above 255 and/or Unicode properties. 59 | 60 | Arguments: 61 | c the character 62 | data points to the flag code unit of the XCLASS data 63 | utf TRUE if in UTF mode 64 | 65 | Returns: TRUE if character matches, else FALSE 66 | */ 67 | 68 | BOOL 69 | PRIV(xclass)(uint32_t c, PCRE2_SPTR data, BOOL utf) 70 | { 71 | PCRE2_UCHAR t; 72 | BOOL negated = (*data & XCL_NOT) != 0; 73 | 74 | #if PCRE2_CODE_UNIT_WIDTH == 8 75 | /* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */ 76 | utf = TRUE; 77 | #endif 78 | 79 | /* Code points < 256 are matched against a bitmap, if one is present. If not, 80 | we still carry on, because there may be ranges that start below 256 in the 81 | additional data. */ 82 | 83 | if (c < 256) 84 | { 85 | if ((*data & XCL_HASPROP) == 0) 86 | { 87 | if ((*data & XCL_MAP) == 0) return negated; 88 | return (((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0; 89 | } 90 | if ((*data & XCL_MAP) != 0 && 91 | (((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0) 92 | return !negated; /* char found */ 93 | } 94 | 95 | /* First skip the bit map if present. Then match against the list of Unicode 96 | properties or large chars or ranges that end with a large char. We won't ever 97 | encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */ 98 | 99 | if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(PCRE2_UCHAR); 100 | 101 | while ((t = *data++) != XCL_END) 102 | { 103 | uint32_t x, y; 104 | if (t == XCL_SINGLE) 105 | { 106 | #ifdef SUPPORT_UNICODE 107 | if (utf) 108 | { 109 | GETCHARINC(x, data); /* macro generates multiple statements */ 110 | } 111 | else 112 | #endif 113 | x = *data++; 114 | if (c == x) return !negated; 115 | } 116 | else if (t == XCL_RANGE) 117 | { 118 | #ifdef SUPPORT_UNICODE 119 | if (utf) 120 | { 121 | GETCHARINC(x, data); /* macro generates multiple statements */ 122 | GETCHARINC(y, data); /* macro generates multiple statements */ 123 | } 124 | else 125 | #endif 126 | { 127 | x = *data++; 128 | y = *data++; 129 | } 130 | if (c >= x && c <= y) return !negated; 131 | } 132 | 133 | #ifdef SUPPORT_UNICODE 134 | else /* XCL_PROP & XCL_NOTPROP */ 135 | { 136 | const ucd_record *prop = GET_UCD(c); 137 | BOOL isprop = t == XCL_PROP; 138 | 139 | switch(*data) 140 | { 141 | case PT_ANY: 142 | if (isprop) return !negated; 143 | break; 144 | 145 | case PT_LAMP: 146 | if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || 147 | prop->chartype == ucp_Lt) == isprop) return !negated; 148 | break; 149 | 150 | case PT_GC: 151 | if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop) 152 | return !negated; 153 | break; 154 | 155 | case PT_PC: 156 | if ((data[1] == prop->chartype) == isprop) return !negated; 157 | break; 158 | 159 | case PT_SC: 160 | if ((data[1] == prop->script) == isprop) return !negated; 161 | break; 162 | 163 | case PT_ALNUM: 164 | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || 165 | PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop) 166 | return !negated; 167 | break; 168 | 169 | /* Perl space used to exclude VT, but from Perl 5.18 it is included, 170 | which means that Perl space and POSIX space are now identical. PCRE 171 | was changed at release 8.34. */ 172 | 173 | case PT_SPACE: /* Perl space */ 174 | case PT_PXSPACE: /* POSIX space */ 175 | switch(c) 176 | { 177 | HSPACE_CASES: 178 | VSPACE_CASES: 179 | if (isprop) return !negated; 180 | break; 181 | 182 | default: 183 | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop) 184 | return !negated; 185 | break; 186 | } 187 | break; 188 | 189 | case PT_WORD: 190 | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || 191 | PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE) 192 | == isprop) 193 | return !negated; 194 | break; 195 | 196 | case PT_UCNC: 197 | if (c < 0xa0) 198 | { 199 | if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || 200 | c == CHAR_GRAVE_ACCENT) == isprop) 201 | return !negated; 202 | } 203 | else 204 | { 205 | if ((c < 0xd800 || c > 0xdfff) == isprop) 206 | return !negated; 207 | } 208 | break; 209 | 210 | /* The following three properties can occur only in an XCLASS, as there 211 | is no \p or \P coding for them. */ 212 | 213 | /* Graphic character. Implement this as not Z (space or separator) and 214 | not C (other), except for Cf (format) with a few exceptions. This seems 215 | to be what Perl does. The exceptional characters are: 216 | 217 | U+061C Arabic Letter Mark 218 | U+180E Mongolian Vowel Separator 219 | U+2066 - U+2069 Various "isolate"s 220 | */ 221 | 222 | case PT_PXGRAPH: 223 | if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z && 224 | (PRIV(ucp_gentype)[prop->chartype] != ucp_C || 225 | (prop->chartype == ucp_Cf && 226 | c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069)) 227 | )) == isprop) 228 | return !negated; 229 | break; 230 | 231 | /* Printable character: same as graphic, with the addition of Zs, i.e. 232 | not Zl and not Zp, and U+180E. */ 233 | 234 | case PT_PXPRINT: 235 | if ((prop->chartype != ucp_Zl && 236 | prop->chartype != ucp_Zp && 237 | (PRIV(ucp_gentype)[prop->chartype] != ucp_C || 238 | (prop->chartype == ucp_Cf && 239 | c != 0x061c && (c < 0x2066 || c > 0x2069)) 240 | )) == isprop) 241 | return !negated; 242 | break; 243 | 244 | /* Punctuation: all Unicode punctuation, plus ASCII characters that 245 | Unicode treats as symbols rather than punctuation, for Perl 246 | compatibility (these are $+<=>^`|~). */ 247 | 248 | case PT_PXPUNCT: 249 | if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P || 250 | (c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop) 251 | return !negated; 252 | break; 253 | 254 | /* This should never occur, but compilers may mutter if there is no 255 | default. */ 256 | 257 | default: 258 | return FALSE; 259 | } 260 | 261 | data += 2; 262 | } 263 | #else 264 | (void)utf; /* Avoid compiler warning */ 265 | #endif /* SUPPORT_UNICODE */ 266 | } 267 | 268 | return negated; /* char did not match */ 269 | } 270 | 271 | /* End of pcre2_xclass.c */ 272 | -------------------------------------------------------------------------------- /pcre2/pcre2posix.h: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* PCRE2 is a library of functions to support regular expressions whose syntax 6 | and semantics are as close as possible to those of the Perl 5 language. 7 | 8 | Written by Philip Hazel 9 | Original API code Copyright (c) 1997-2012 University of Cambridge 10 | New API code Copyright (c) 2016 University of Cambridge 11 | 12 | ----------------------------------------------------------------------------- 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, 17 | this list of conditions and the following disclaimer. 18 | 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | 23 | * Neither the name of the University of Cambridge nor the names of its 24 | contributors may be used to endorse or promote products derived from 25 | this software without specific prior written permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | POSSIBILITY OF SUCH DAMAGE. 38 | ----------------------------------------------------------------------------- 39 | */ 40 | 41 | 42 | /* Have to include stdlib.h in order to ensure that size_t is defined. */ 43 | 44 | #ifdef __KERNEL__ 45 | #include 46 | #else 47 | #include 48 | #endif 49 | 50 | /* Allow for C++ users */ 51 | 52 | #ifdef __cplusplus 53 | extern "C" { 54 | #endif 55 | 56 | /* Options, mostly defined by POSIX, but with some extras. */ 57 | 58 | #define REG_ICASE 0x0001 /* Maps to PCRE2_CASELESS */ 59 | #define REG_NEWLINE 0x0002 /* Maps to PCRE2_MULTILINE */ 60 | #define REG_NOTBOL 0x0004 /* Maps to PCRE2_NOTBOL */ 61 | #define REG_NOTEOL 0x0008 /* Maps to PCRE2_NOTEOL */ 62 | #define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE2_DOTALL */ 63 | #define REG_NOSUB 0x0020 /* Do not report what was matched */ 64 | #define REG_UTF 0x0040 /* NOT defined by POSIX; maps to PCRE2_UTF */ 65 | #define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */ 66 | #define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE2_NOTEMPTY */ 67 | #define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE2_UNGREEDY */ 68 | #define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE2_UCP */ 69 | 70 | /* This is not used by PCRE2, but by defining it we make it easier 71 | to slot PCRE2 into existing programs that make POSIX calls. */ 72 | 73 | #define REG_EXTENDED 0 74 | 75 | /* Error values. Not all these are relevant or used by the wrapper. */ 76 | 77 | enum { 78 | REG_ASSERT = 1, /* internal error ? */ 79 | REG_BADBR, /* invalid repeat counts in {} */ 80 | REG_BADPAT, /* pattern error */ 81 | REG_BADRPT, /* ? * + invalid */ 82 | REG_EBRACE, /* unbalanced {} */ 83 | REG_EBRACK, /* unbalanced [] */ 84 | REG_ECOLLATE, /* collation error - not relevant */ 85 | REG_ECTYPE, /* bad class */ 86 | REG_EESCAPE, /* bad escape sequence */ 87 | REG_EMPTY, /* empty expression */ 88 | REG_EPAREN, /* unbalanced () */ 89 | REG_ERANGE, /* bad range inside [] */ 90 | REG_ESIZE, /* expression too big */ 91 | REG_ESPACE, /* failed to get memory */ 92 | REG_ESUBREG, /* bad back reference */ 93 | REG_INVARG, /* bad argument */ 94 | REG_NOMATCH /* match failed */ 95 | }; 96 | 97 | 98 | /* The structure representing a compiled regular expression. */ 99 | 100 | typedef struct { 101 | void *re_pcre2_code; 102 | void *re_match_data; 103 | size_t re_nsub; 104 | size_t re_erroffset; 105 | int re_cflags; 106 | } regex_t; 107 | 108 | /* The structure in which a captured offset is returned. */ 109 | 110 | typedef int regoff_t; 111 | 112 | typedef struct { 113 | regoff_t rm_so; 114 | regoff_t rm_eo; 115 | } regmatch_t; 116 | 117 | /* When an application links to a PCRE2 DLL in Windows, the symbols that are 118 | imported have to be identified as such. When building PCRE2, the appropriate 119 | export settings are needed, and are set in pcre2posix.c before including this 120 | file. */ 121 | 122 | #if defined(_WIN32) && !defined(PCRE2_STATIC) && !defined(PCRE2POSIX_EXP_DECL) 123 | # define PCRE2POSIX_EXP_DECL extern __declspec(dllimport) 124 | # define PCRE2POSIX_EXP_DEFN __declspec(dllimport) 125 | #endif 126 | 127 | /* By default, we use the standard "extern" declarations. */ 128 | 129 | #ifndef PCRE2POSIX_EXP_DECL 130 | # ifdef __cplusplus 131 | # define PCRE2POSIX_EXP_DECL extern "C" 132 | # define PCRE2POSIX_EXP_DEFN extern "C" 133 | # else 134 | # define PCRE2POSIX_EXP_DECL extern 135 | # define PCRE2POSIX_EXP_DEFN extern 136 | # endif 137 | #endif 138 | 139 | /* The functions */ 140 | 141 | PCRE2POSIX_EXP_DECL int regcomp(regex_t *, const char *, int); 142 | PCRE2POSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t, 143 | regmatch_t *, int); 144 | PCRE2POSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t); 145 | PCRE2POSIX_EXP_DECL void regfree(regex_t *); 146 | 147 | #ifdef __cplusplus 148 | } /* extern "C" */ 149 | #endif 150 | 151 | /* End of pcre2posix.h */ 152 | -------------------------------------------------------------------------------- /pcre2/sljit/sljitConfig.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Stack-less Just-In-Time compiler 3 | * 4 | * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, are 7 | * permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this list of 10 | * conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 | * of conditions and the following disclaimer in the documentation and/or other materials 14 | * provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 | * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef _SLJIT_CONFIG_H_ 28 | #define _SLJIT_CONFIG_H_ 29 | 30 | /* --------------------------------------------------------------------- */ 31 | /* Custom defines */ 32 | /* --------------------------------------------------------------------- */ 33 | 34 | /* Put your custom defines here. This empty section will never change 35 | which helps maintaining patches (with diff / patch utilities). */ 36 | 37 | /* --------------------------------------------------------------------- */ 38 | /* Architecture */ 39 | /* --------------------------------------------------------------------- */ 40 | 41 | /* Architecture selection. */ 42 | /* #define SLJIT_CONFIG_X86_32 1 */ 43 | /* #define SLJIT_CONFIG_X86_64 1 */ 44 | /* #define SLJIT_CONFIG_ARM_V5 1 */ 45 | /* #define SLJIT_CONFIG_ARM_V7 1 */ 46 | /* #define SLJIT_CONFIG_ARM_THUMB2 1 */ 47 | /* #define SLJIT_CONFIG_ARM_64 1 */ 48 | /* #define SLJIT_CONFIG_PPC_32 1 */ 49 | /* #define SLJIT_CONFIG_PPC_64 1 */ 50 | /* #define SLJIT_CONFIG_MIPS_32 1 */ 51 | /* #define SLJIT_CONFIG_MIPS_64 1 */ 52 | /* #define SLJIT_CONFIG_SPARC_32 1 */ 53 | /* #define SLJIT_CONFIG_TILEGX 1 */ 54 | 55 | /* #define SLJIT_CONFIG_AUTO 1 */ 56 | /* #define SLJIT_CONFIG_UNSUPPORTED 1 */ 57 | 58 | /* --------------------------------------------------------------------- */ 59 | /* Utilities */ 60 | /* --------------------------------------------------------------------- */ 61 | 62 | /* Useful for thread-safe compiling of global functions. */ 63 | #ifndef SLJIT_UTIL_GLOBAL_LOCK 64 | /* Enabled by default */ 65 | #define SLJIT_UTIL_GLOBAL_LOCK 1 66 | #endif 67 | 68 | /* Implements a stack like data structure (by using mmap / VirtualAlloc). */ 69 | #ifndef SLJIT_UTIL_STACK 70 | /* Enabled by default */ 71 | #define SLJIT_UTIL_STACK 1 72 | #endif 73 | 74 | /* Single threaded application. Does not require any locks. */ 75 | #ifndef SLJIT_SINGLE_THREADED 76 | /* Disabled by default. */ 77 | #define SLJIT_SINGLE_THREADED 0 78 | #endif 79 | 80 | /* --------------------------------------------------------------------- */ 81 | /* Configuration */ 82 | /* --------------------------------------------------------------------- */ 83 | 84 | /* If SLJIT_STD_MACROS_DEFINED is not defined, the application should 85 | define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMCPY, and NULL. */ 86 | #ifndef SLJIT_STD_MACROS_DEFINED 87 | /* Disabled by default. */ 88 | #define SLJIT_STD_MACROS_DEFINED 0 89 | #endif 90 | 91 | /* Executable code allocation: 92 | If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should 93 | define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_EXEC_OFFSET. */ 94 | #ifndef SLJIT_EXECUTABLE_ALLOCATOR 95 | /* Enabled by default. */ 96 | #define SLJIT_EXECUTABLE_ALLOCATOR 1 97 | 98 | /* When SLJIT_PROT_EXECUTABLE_ALLOCATOR is enabled SLJIT uses 99 | an allocator which does not set writable and executable 100 | permission flags at the same time. The trade-of is increased 101 | memory consumption and disabled dynamic code modifications. */ 102 | #ifndef SLJIT_PROT_EXECUTABLE_ALLOCATOR 103 | /* Disabled by default. */ 104 | #define SLJIT_PROT_EXECUTABLE_ALLOCATOR 0 105 | #endif 106 | 107 | #endif 108 | 109 | /* Force cdecl calling convention even if a better calling 110 | convention (e.g. fastcall) is supported by the C compiler. 111 | If this option is enabled, C functions without 112 | SLJIT_CALL can also be called from JIT code. */ 113 | #ifndef SLJIT_USE_CDECL_CALLING_CONVENTION 114 | /* Disabled by default */ 115 | #define SLJIT_USE_CDECL_CALLING_CONVENTION 0 116 | #endif 117 | 118 | /* Return with error when an invalid argument is passed. */ 119 | #ifndef SLJIT_ARGUMENT_CHECKS 120 | /* Disabled by default */ 121 | #define SLJIT_ARGUMENT_CHECKS 0 122 | #endif 123 | 124 | /* Debug checks (assertions, etc.). */ 125 | #ifndef SLJIT_DEBUG 126 | /* Enabled by default */ 127 | #define SLJIT_DEBUG 1 128 | #endif 129 | 130 | /* Verbose operations. */ 131 | #ifndef SLJIT_VERBOSE 132 | /* Enabled by default */ 133 | #define SLJIT_VERBOSE 1 134 | #endif 135 | 136 | /* 137 | SLJIT_IS_FPU_AVAILABLE 138 | The availability of the FPU can be controlled by SLJIT_IS_FPU_AVAILABLE. 139 | zero value - FPU is NOT present. 140 | nonzero value - FPU is present. 141 | */ 142 | 143 | /* For further configurations, see the beginning of sljitConfigInternal.h */ 144 | 145 | #endif 146 | -------------------------------------------------------------------------------- /pcre2/sljit/sljitExecAllocator.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Stack-less Just-In-Time compiler 3 | * 4 | * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, are 7 | * permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this list of 10 | * conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 | * of conditions and the following disclaimer in the documentation and/or other materials 14 | * provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 | * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | /* 28 | This file contains a simple executable memory allocator 29 | 30 | It is assumed, that executable code blocks are usually medium (or sometimes 31 | large) memory blocks, and the allocator is not too frequently called (less 32 | optimized than other allocators). Thus, using it as a generic allocator is 33 | not suggested. 34 | 35 | How does it work: 36 | Memory is allocated in continuous memory areas called chunks by alloc_chunk() 37 | Chunk format: 38 | [ block ][ block ] ... [ block ][ block terminator ] 39 | 40 | All blocks and the block terminator is started with block_header. The block 41 | header contains the size of the previous and the next block. These sizes 42 | can also contain special values. 43 | Block size: 44 | 0 - The block is a free_block, with a different size member. 45 | 1 - The block is a block terminator. 46 | n - The block is used at the moment, and the value contains its size. 47 | Previous block size: 48 | 0 - This is the first block of the memory chunk. 49 | n - The size of the previous block. 50 | 51 | Using these size values we can go forward or backward on the block chain. 52 | The unused blocks are stored in a chain list pointed by free_blocks. This 53 | list is useful if we need to find a suitable memory area when the allocator 54 | is called. 55 | 56 | When a block is freed, the new free block is connected to its adjacent free 57 | blocks if possible. 58 | 59 | [ free block ][ used block ][ free block ] 60 | and "used block" is freed, the three blocks are connected together: 61 | [ one big free block ] 62 | */ 63 | 64 | /* --------------------------------------------------------------------- */ 65 | /* System (OS) functions */ 66 | /* --------------------------------------------------------------------- */ 67 | 68 | /* 64 KByte. */ 69 | #define CHUNK_SIZE 0x10000 70 | 71 | /* 72 | alloc_chunk / free_chunk : 73 | * allocate executable system memory chunks 74 | * the size is always divisible by CHUNK_SIZE 75 | allocator_grab_lock / allocator_release_lock : 76 | * make the allocator thread safe 77 | * can be empty if the OS (or the application) does not support threading 78 | * only the allocator requires this lock, sljit is fully thread safe 79 | as it only uses local variables 80 | */ 81 | 82 | #ifdef _WIN32 83 | 84 | static SLJIT_INLINE void* alloc_chunk(sljit_uw size) 85 | { 86 | return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); 87 | } 88 | 89 | static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) 90 | { 91 | SLJIT_UNUSED_ARG(size); 92 | VirtualFree(chunk, 0, MEM_RELEASE); 93 | } 94 | 95 | #elif defined(__KERNEL__) 96 | 97 | #include 98 | #include 99 | #include 100 | 101 | static SLJIT_INLINE void* alloc_chunk(sljit_uw size) 102 | { 103 | 104 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) 105 | return __vmalloc(size, GFP_ATOMIC | __GFP_HIGHMEM); 106 | #else 107 | return __vmalloc(size, GFP_ATOMIC | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); 108 | #endif /* LINUX_VERSION_CODE */ 109 | } 110 | 111 | static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size) 112 | { 113 | SLJIT_UNUSED_ARG(size); 114 | vfree(chunk); 115 | } 116 | 117 | #else 118 | 119 | static SLJIT_INLINE void* alloc_chunk(sljit_uw size) 120 | { 121 | void *retval; 122 | 123 | #ifdef MAP_ANON 124 | retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); 125 | #else 126 | if (dev_zero < 0) { 127 | if (open_dev_zero()) 128 | return NULL; 129 | } 130 | retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, dev_zero, 0); 131 | #endif 132 | 133 | return (retval != MAP_FAILED) ? retval : NULL; 134 | } 135 | 136 | static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) 137 | { 138 | munmap(chunk, size); 139 | } 140 | 141 | #endif 142 | 143 | /* --------------------------------------------------------------------- */ 144 | /* Common functions */ 145 | /* --------------------------------------------------------------------- */ 146 | 147 | #define CHUNK_MASK (~(CHUNK_SIZE - 1)) 148 | 149 | struct block_header { 150 | sljit_uw size; 151 | sljit_uw prev_size; 152 | }; 153 | 154 | struct free_block { 155 | struct block_header header; 156 | struct free_block *next; 157 | struct free_block *prev; 158 | sljit_uw size; 159 | }; 160 | 161 | #define AS_BLOCK_HEADER(base, offset) \ 162 | ((struct block_header*)(((sljit_u8*)base) + offset)) 163 | #define AS_FREE_BLOCK(base, offset) \ 164 | ((struct free_block*)(((sljit_u8*)base) + offset)) 165 | #define MEM_START(base) ((void*)(((sljit_u8*)base) + sizeof(struct block_header))) 166 | #define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7) 167 | 168 | static struct free_block* free_blocks; 169 | static sljit_uw allocated_size; 170 | static sljit_uw total_size; 171 | 172 | static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size) 173 | { 174 | free_block->header.size = 0; 175 | free_block->size = size; 176 | 177 | free_block->next = free_blocks; 178 | free_block->prev = NULL; 179 | if (free_blocks) 180 | free_blocks->prev = free_block; 181 | free_blocks = free_block; 182 | } 183 | 184 | static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block) 185 | { 186 | if (free_block->next) 187 | free_block->next->prev = free_block->prev; 188 | 189 | if (free_block->prev) 190 | free_block->prev->next = free_block->next; 191 | else { 192 | SLJIT_ASSERT(free_blocks == free_block); 193 | free_blocks = free_block->next; 194 | } 195 | } 196 | 197 | SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) 198 | { 199 | struct block_header *header; 200 | struct block_header *next_header; 201 | struct free_block *free_block; 202 | sljit_uw chunk_size; 203 | 204 | allocator_grab_lock(); 205 | if (size < (64 - sizeof(struct block_header))) 206 | size = (64 - sizeof(struct block_header)); 207 | size = ALIGN_SIZE(size); 208 | 209 | free_block = free_blocks; 210 | while (free_block) { 211 | if (free_block->size >= size) { 212 | chunk_size = free_block->size; 213 | if (chunk_size > size + 64) { 214 | /* We just cut a block from the end of the free block. */ 215 | chunk_size -= size; 216 | free_block->size = chunk_size; 217 | header = AS_BLOCK_HEADER(free_block, chunk_size); 218 | header->prev_size = chunk_size; 219 | AS_BLOCK_HEADER(header, size)->prev_size = size; 220 | } 221 | else { 222 | sljit_remove_free_block(free_block); 223 | header = (struct block_header*)free_block; 224 | size = chunk_size; 225 | } 226 | allocated_size += size; 227 | header->size = size; 228 | allocator_release_lock(); 229 | return MEM_START(header); 230 | } 231 | free_block = free_block->next; 232 | } 233 | 234 | chunk_size = (size + sizeof(struct block_header) + CHUNK_SIZE - 1) & CHUNK_MASK; 235 | header = (struct block_header*)alloc_chunk(chunk_size); 236 | if (!header) { 237 | allocator_release_lock(); 238 | return NULL; 239 | } 240 | 241 | chunk_size -= sizeof(struct block_header); 242 | total_size += chunk_size; 243 | 244 | header->prev_size = 0; 245 | if (chunk_size > size + 64) { 246 | /* Cut the allocated space into a free and a used block. */ 247 | allocated_size += size; 248 | header->size = size; 249 | chunk_size -= size; 250 | 251 | free_block = AS_FREE_BLOCK(header, size); 252 | free_block->header.prev_size = size; 253 | sljit_insert_free_block(free_block, chunk_size); 254 | next_header = AS_BLOCK_HEADER(free_block, chunk_size); 255 | } 256 | else { 257 | /* All space belongs to this allocation. */ 258 | allocated_size += chunk_size; 259 | header->size = chunk_size; 260 | next_header = AS_BLOCK_HEADER(header, chunk_size); 261 | } 262 | next_header->size = 1; 263 | next_header->prev_size = chunk_size; 264 | allocator_release_lock(); 265 | return MEM_START(header); 266 | } 267 | 268 | SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) 269 | { 270 | struct block_header *header; 271 | struct free_block* free_block; 272 | 273 | allocator_grab_lock(); 274 | header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header)); 275 | allocated_size -= header->size; 276 | 277 | /* Connecting free blocks together if possible. */ 278 | 279 | /* If header->prev_size == 0, free_block will equal to header. 280 | In this case, free_block->header.size will be > 0. */ 281 | free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size); 282 | if (SLJIT_UNLIKELY(!free_block->header.size)) { 283 | free_block->size += header->size; 284 | header = AS_BLOCK_HEADER(free_block, free_block->size); 285 | header->prev_size = free_block->size; 286 | } 287 | else { 288 | free_block = (struct free_block*)header; 289 | sljit_insert_free_block(free_block, header->size); 290 | } 291 | 292 | header = AS_BLOCK_HEADER(free_block, free_block->size); 293 | if (SLJIT_UNLIKELY(!header->size)) { 294 | free_block->size += ((struct free_block*)header)->size; 295 | sljit_remove_free_block((struct free_block*)header); 296 | header = AS_BLOCK_HEADER(free_block, free_block->size); 297 | header->prev_size = free_block->size; 298 | } 299 | 300 | /* The whole chunk is free. */ 301 | if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) { 302 | /* If this block is freed, we still have (allocated_size / 2) free space. */ 303 | if (total_size - free_block->size > (allocated_size * 3 / 2)) { 304 | total_size -= free_block->size; 305 | sljit_remove_free_block(free_block); 306 | free_chunk(free_block, free_block->size + sizeof(struct block_header)); 307 | } 308 | } 309 | 310 | allocator_release_lock(); 311 | } 312 | 313 | SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) 314 | { 315 | struct free_block* free_block; 316 | struct free_block* next_free_block; 317 | 318 | allocator_grab_lock(); 319 | 320 | free_block = free_blocks; 321 | while (free_block) { 322 | next_free_block = free_block->next; 323 | if (!free_block->header.prev_size && 324 | AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) { 325 | total_size -= free_block->size; 326 | sljit_remove_free_block(free_block); 327 | free_chunk(free_block, free_block->size + sizeof(struct block_header)); 328 | } 329 | free_block = next_free_block; 330 | } 331 | 332 | SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks)); 333 | allocator_release_lock(); 334 | } 335 | -------------------------------------------------------------------------------- /pcre2/sljit/sljitNativePPC_32.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Stack-less Just-In-Time compiler 3 | * 4 | * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, are 7 | * permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this list of 10 | * conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 | * of conditions and the following disclaimer in the documentation and/or other materials 14 | * provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 | * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | /* ppc 32-bit arch dependent functions. */ 28 | 29 | static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) 30 | { 31 | if (imm <= SIMM_MAX && imm >= SIMM_MIN) 32 | return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); 33 | 34 | if (!(imm & ~0xffff)) 35 | return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm)); 36 | 37 | FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); 38 | return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; 39 | } 40 | 41 | #define INS_CLEAR_LEFT(dst, src, from) \ 42 | (RLWINM | S(src) | A(dst) | ((from) << 6) | (31 << 1)) 43 | 44 | static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, 45 | sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) 46 | { 47 | switch (op) { 48 | case SLJIT_MOV: 49 | case SLJIT_MOV_U32: 50 | case SLJIT_MOV_S32: 51 | case SLJIT_MOV_P: 52 | SLJIT_ASSERT(src1 == TMP_REG1); 53 | if (dst != src2) 54 | return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); 55 | return SLJIT_SUCCESS; 56 | 57 | case SLJIT_MOV_U8: 58 | case SLJIT_MOV_S8: 59 | SLJIT_ASSERT(src1 == TMP_REG1); 60 | if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 61 | if (op == SLJIT_MOV_S8) 62 | return push_inst(compiler, EXTSB | S(src2) | A(dst)); 63 | return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); 64 | } 65 | else if ((flags & REG_DEST) && op == SLJIT_MOV_S8) 66 | return push_inst(compiler, EXTSB | S(src2) | A(dst)); 67 | else { 68 | SLJIT_ASSERT(dst == src2); 69 | } 70 | return SLJIT_SUCCESS; 71 | 72 | case SLJIT_MOV_U16: 73 | case SLJIT_MOV_S16: 74 | SLJIT_ASSERT(src1 == TMP_REG1); 75 | if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 76 | if (op == SLJIT_MOV_S16) 77 | return push_inst(compiler, EXTSH | S(src2) | A(dst)); 78 | return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); 79 | } 80 | else { 81 | SLJIT_ASSERT(dst == src2); 82 | } 83 | return SLJIT_SUCCESS; 84 | 85 | case SLJIT_NOT: 86 | SLJIT_ASSERT(src1 == TMP_REG1); 87 | return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); 88 | 89 | case SLJIT_NEG: 90 | SLJIT_ASSERT(src1 == TMP_REG1); 91 | return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2)); 92 | 93 | case SLJIT_CLZ: 94 | SLJIT_ASSERT(src1 == TMP_REG1); 95 | return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst)); 96 | 97 | case SLJIT_ADD: 98 | if (flags & ALT_FORM1) { 99 | /* Flags does not set: BIN_IMM_EXTS unnecessary. */ 100 | SLJIT_ASSERT(src2 == TMP_REG2); 101 | return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm); 102 | } 103 | if (flags & ALT_FORM2) { 104 | /* Flags does not set: BIN_IMM_EXTS unnecessary. */ 105 | SLJIT_ASSERT(src2 == TMP_REG2); 106 | return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); 107 | } 108 | if (flags & ALT_FORM3) { 109 | SLJIT_ASSERT(src2 == TMP_REG2); 110 | return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); 111 | } 112 | if (flags & ALT_FORM4) { 113 | /* Flags does not set: BIN_IMM_EXTS unnecessary. */ 114 | FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff))); 115 | return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))); 116 | } 117 | if (!(flags & ALT_SET_FLAGS)) 118 | return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); 119 | return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); 120 | 121 | case SLJIT_ADDC: 122 | if (flags & ALT_FORM1) { 123 | FAIL_IF(push_inst(compiler, MFXER | D(0))); 124 | FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2))); 125 | return push_inst(compiler, MTXER | S(0)); 126 | } 127 | return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); 128 | 129 | case SLJIT_SUB: 130 | if (flags & ALT_FORM1) { 131 | /* Flags does not set: BIN_IMM_EXTS unnecessary. */ 132 | SLJIT_ASSERT(src2 == TMP_REG2); 133 | return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); 134 | } 135 | if (flags & (ALT_FORM2 | ALT_FORM3)) { 136 | SLJIT_ASSERT(src2 == TMP_REG2); 137 | if (flags & ALT_FORM2) 138 | FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm)); 139 | if (flags & ALT_FORM3) 140 | return push_inst(compiler, CMPLI | CRD(4) | A(src1) | compiler->imm); 141 | return SLJIT_SUCCESS; 142 | } 143 | if (flags & (ALT_FORM4 | ALT_FORM5)) { 144 | if (flags & ALT_FORM4) 145 | FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2))); 146 | if (flags & ALT_FORM5) 147 | FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2))); 148 | return SLJIT_SUCCESS; 149 | } 150 | if (!(flags & ALT_SET_FLAGS)) 151 | return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); 152 | if (flags & ALT_FORM6) 153 | FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2))); 154 | return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); 155 | 156 | case SLJIT_SUBC: 157 | if (flags & ALT_FORM1) { 158 | FAIL_IF(push_inst(compiler, MFXER | D(0))); 159 | FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1))); 160 | return push_inst(compiler, MTXER | S(0)); 161 | } 162 | return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); 163 | 164 | case SLJIT_MUL: 165 | if (flags & ALT_FORM1) { 166 | SLJIT_ASSERT(src2 == TMP_REG2); 167 | return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); 168 | } 169 | return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1)); 170 | 171 | case SLJIT_AND: 172 | if (flags & ALT_FORM1) { 173 | SLJIT_ASSERT(src2 == TMP_REG2); 174 | return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm); 175 | } 176 | if (flags & ALT_FORM2) { 177 | SLJIT_ASSERT(src2 == TMP_REG2); 178 | return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm); 179 | } 180 | return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2)); 181 | 182 | case SLJIT_OR: 183 | if (flags & ALT_FORM1) { 184 | SLJIT_ASSERT(src2 == TMP_REG2); 185 | return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm); 186 | } 187 | if (flags & ALT_FORM2) { 188 | SLJIT_ASSERT(src2 == TMP_REG2); 189 | return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm); 190 | } 191 | if (flags & ALT_FORM3) { 192 | SLJIT_ASSERT(src2 == TMP_REG2); 193 | FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm))); 194 | return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); 195 | } 196 | return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2)); 197 | 198 | case SLJIT_XOR: 199 | if (flags & ALT_FORM1) { 200 | SLJIT_ASSERT(src2 == TMP_REG2); 201 | return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm); 202 | } 203 | if (flags & ALT_FORM2) { 204 | SLJIT_ASSERT(src2 == TMP_REG2); 205 | return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm); 206 | } 207 | if (flags & ALT_FORM3) { 208 | SLJIT_ASSERT(src2 == TMP_REG2); 209 | FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm))); 210 | return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); 211 | } 212 | return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2)); 213 | 214 | case SLJIT_SHL: 215 | if (flags & ALT_FORM1) { 216 | SLJIT_ASSERT(src2 == TMP_REG2); 217 | compiler->imm &= 0x1f; 218 | return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1)); 219 | } 220 | return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2)); 221 | 222 | case SLJIT_LSHR: 223 | if (flags & ALT_FORM1) { 224 | SLJIT_ASSERT(src2 == TMP_REG2); 225 | compiler->imm &= 0x1f; 226 | return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1)); 227 | } 228 | return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2)); 229 | 230 | case SLJIT_ASHR: 231 | if (flags & ALT_FORM3) 232 | FAIL_IF(push_inst(compiler, MFXER | D(0))); 233 | if (flags & ALT_FORM1) { 234 | SLJIT_ASSERT(src2 == TMP_REG2); 235 | compiler->imm &= 0x1f; 236 | FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11))); 237 | } 238 | else 239 | FAIL_IF(push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2))); 240 | return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS; 241 | } 242 | 243 | SLJIT_ASSERT_STOP(); 244 | return SLJIT_SUCCESS; 245 | } 246 | 247 | static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value) 248 | { 249 | FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 16))); 250 | return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); 251 | } 252 | 253 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) 254 | { 255 | sljit_ins *inst = (sljit_ins *)addr; 256 | 257 | inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff); 258 | inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff); 259 | inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 260 | SLJIT_CACHE_FLUSH(inst, inst + 2); 261 | } 262 | 263 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) 264 | { 265 | sljit_ins *inst = (sljit_ins *)addr; 266 | 267 | inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff); 268 | inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff); 269 | inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 270 | SLJIT_CACHE_FLUSH(inst, inst + 2); 271 | } 272 | -------------------------------------------------------------------------------- /pcre2/sljit/sljitNativeSPARC_32.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Stack-less Just-In-Time compiler 3 | * 4 | * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, are 7 | * permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this list of 10 | * conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 | * of conditions and the following disclaimer in the documentation and/or other materials 14 | * provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 | * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw imm) 28 | { 29 | if (imm <= SIMM_MAX && imm >= SIMM_MIN) 30 | return push_inst(compiler, OR | D(dst) | S1(0) | IMM(imm), DR(dst)); 31 | 32 | FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((imm >> 10) & 0x3fffff), DR(dst))); 33 | return (imm & 0x3ff) ? push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (imm & 0x3ff), DR(dst)) : SLJIT_SUCCESS; 34 | } 35 | 36 | #define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2)) 37 | 38 | static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, 39 | sljit_s32 dst, sljit_s32 src1, sljit_sw src2) 40 | { 41 | SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same); 42 | 43 | switch (op) { 44 | case SLJIT_MOV: 45 | case SLJIT_MOV_U32: 46 | case SLJIT_MOV_S32: 47 | case SLJIT_MOV_P: 48 | SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 49 | if (dst != src2) 50 | return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst)); 51 | return SLJIT_SUCCESS; 52 | 53 | case SLJIT_MOV_U8: 54 | case SLJIT_MOV_S8: 55 | SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 56 | if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 57 | if (op == SLJIT_MOV_U8) 58 | return push_inst(compiler, AND | D(dst) | S1(src2) | IMM(0xff), DR(dst)); 59 | FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst))); 60 | return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst)); 61 | } 62 | else if (dst != src2) 63 | SLJIT_ASSERT_STOP(); 64 | return SLJIT_SUCCESS; 65 | 66 | case SLJIT_MOV_U16: 67 | case SLJIT_MOV_S16: 68 | SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 69 | if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 70 | FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst))); 71 | return push_inst(compiler, (op == SLJIT_MOV_S16 ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst)); 72 | } 73 | else if (dst != src2) 74 | SLJIT_ASSERT_STOP(); 75 | return SLJIT_SUCCESS; 76 | 77 | case SLJIT_NOT: 78 | SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 79 | return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DR(dst) | (flags & SET_FLAGS)); 80 | 81 | case SLJIT_CLZ: 82 | SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 83 | /* sparc 32 does not support SLJIT_KEEP_FLAGS. Not sure I can fix this. */ 84 | FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS)); 85 | FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1))); 86 | FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS)); 87 | FAIL_IF(push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS | (flags & SET_FLAGS))); 88 | FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst))); 89 | 90 | /* Loop. */ 91 | FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS)); 92 | FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1))); 93 | FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS)); 94 | return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS | (flags & SET_FLAGS)); 95 | 96 | case SLJIT_ADD: 97 | return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); 98 | 99 | case SLJIT_ADDC: 100 | return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); 101 | 102 | case SLJIT_SUB: 103 | return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); 104 | 105 | case SLJIT_SUBC: 106 | return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); 107 | 108 | case SLJIT_MUL: 109 | FAIL_IF(push_inst(compiler, SMUL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); 110 | if (!(flags & SET_FLAGS)) 111 | return SLJIT_SUCCESS; 112 | FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1))); 113 | FAIL_IF(push_inst(compiler, RDY | D(TMP_LINK), DR(TMP_LINK))); 114 | return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS); 115 | 116 | case SLJIT_AND: 117 | return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); 118 | 119 | case SLJIT_OR: 120 | return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); 121 | 122 | case SLJIT_XOR: 123 | return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); 124 | 125 | case SLJIT_SHL: 126 | FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); 127 | return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); 128 | 129 | case SLJIT_LSHR: 130 | FAIL_IF(push_inst(compiler, SRL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); 131 | return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); 132 | 133 | case SLJIT_ASHR: 134 | FAIL_IF(push_inst(compiler, SRA | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); 135 | return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); 136 | } 137 | 138 | SLJIT_ASSERT_STOP(); 139 | return SLJIT_SUCCESS; 140 | } 141 | 142 | static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) 143 | { 144 | FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst))); 145 | return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst)); 146 | } 147 | 148 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) 149 | { 150 | sljit_ins *inst = (sljit_ins *)addr; 151 | 152 | inst[0] = (inst[0] & 0xffc00000) | ((new_target >> 10) & 0x3fffff); 153 | inst[1] = (inst[1] & 0xfffffc00) | (new_target & 0x3ff); 154 | inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 155 | SLJIT_CACHE_FLUSH(inst, inst + 2); 156 | } 157 | 158 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) 159 | { 160 | sljit_ins *inst = (sljit_ins *)addr; 161 | 162 | inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff); 163 | inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff); 164 | inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 165 | SLJIT_CACHE_FLUSH(inst, inst + 2); 166 | } 167 | -------------------------------------------------------------------------------- /pcre2/sljit/sljitUtils.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Stack-less Just-In-Time compiler 3 | * 4 | * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, are 7 | * permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this list of 10 | * conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 | * of conditions and the following disclaimer in the documentation and/or other materials 14 | * provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 | * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | /* ------------------------------------------------------------------------ */ 28 | /* Locks */ 29 | /* ------------------------------------------------------------------------ */ 30 | 31 | #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) || (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) 32 | 33 | #if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) 34 | 35 | #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) 36 | 37 | static SLJIT_INLINE void allocator_grab_lock(void) 38 | { 39 | /* Always successful. */ 40 | } 41 | 42 | static SLJIT_INLINE void allocator_release_lock(void) 43 | { 44 | /* Always successful. */ 45 | } 46 | 47 | #endif /* SLJIT_EXECUTABLE_ALLOCATOR */ 48 | 49 | #if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) 50 | 51 | SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void) 52 | { 53 | /* Always successful. */ 54 | } 55 | 56 | SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void) 57 | { 58 | /* Always successful. */ 59 | } 60 | 61 | #endif /* SLJIT_UTIL_GLOBAL_LOCK */ 62 | 63 | #elif defined(_WIN32) /* SLJIT_SINGLE_THREADED */ 64 | 65 | #include "windows.h" 66 | 67 | #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) 68 | 69 | static HANDLE allocator_mutex = 0; 70 | 71 | static SLJIT_INLINE void allocator_grab_lock(void) 72 | { 73 | /* No idea what to do if an error occures. Static mutexes should never fail... */ 74 | if (!allocator_mutex) 75 | allocator_mutex = CreateMutex(NULL, TRUE, NULL); 76 | else 77 | WaitForSingleObject(allocator_mutex, INFINITE); 78 | } 79 | 80 | static SLJIT_INLINE void allocator_release_lock(void) 81 | { 82 | ReleaseMutex(allocator_mutex); 83 | } 84 | 85 | #endif /* SLJIT_EXECUTABLE_ALLOCATOR */ 86 | 87 | #if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) 88 | 89 | static HANDLE global_mutex = 0; 90 | 91 | SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void) 92 | { 93 | /* No idea what to do if an error occures. Static mutexes should never fail... */ 94 | if (!global_mutex) 95 | global_mutex = CreateMutex(NULL, TRUE, NULL); 96 | else 97 | WaitForSingleObject(global_mutex, INFINITE); 98 | } 99 | 100 | SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void) 101 | { 102 | ReleaseMutex(global_mutex); 103 | } 104 | 105 | #endif /* SLJIT_UTIL_GLOBAL_LOCK */ 106 | 107 | #else /* _WIN32 */ 108 | 109 | #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) 110 | 111 | #ifdef __KERNEL__ 112 | #include 113 | #else 114 | #include 115 | #endif 116 | 117 | #ifdef __KERNEL__ 118 | DEFINE_SPINLOCK(allocator_mutex); 119 | #else 120 | static pthread_mutex_t allocator_mutex = PTHREAD_MUTEX_INITIALIZER; 121 | #endif 122 | 123 | static SLJIT_INLINE void allocator_grab_lock(void) 124 | { 125 | #ifdef __KERNEL__ 126 | spin_lock(&allocator_mutex); 127 | #else 128 | pthread_mutex_lock(&allocator_mutex); 129 | #endif 130 | } 131 | 132 | static SLJIT_INLINE void allocator_release_lock(void) 133 | { 134 | #ifdef __KERNEL__ 135 | spin_unlock(&allocator_mutex); 136 | #else 137 | pthread_mutex_unlock(&allocator_mutex); 138 | #endif 139 | } 140 | 141 | #endif /* SLJIT_EXECUTABLE_ALLOCATOR */ 142 | 143 | #if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) 144 | 145 | #ifdef __KERNEL__ 146 | #include 147 | #else 148 | #include 149 | #endif 150 | 151 | #ifdef __KERNEL__ 152 | DEFINE_SPINLOCK(global_mutex); 153 | #else 154 | static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER; 155 | #endif 156 | 157 | SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void) 158 | { 159 | #ifdef __KERNEL__ 160 | spin_lock(&global_mutex); 161 | #else 162 | pthread_mutex_lock(&global_mutex); 163 | #endif 164 | } 165 | 166 | SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void) 167 | { 168 | #ifdef __KERNEL__ 169 | spin_unlock(&global_mutex); 170 | #else 171 | pthread_mutex_unlock(&global_mutex); 172 | #endif 173 | } 174 | 175 | #endif /* SLJIT_UTIL_GLOBAL_LOCK */ 176 | 177 | #endif /* _WIN32 */ 178 | 179 | /* ------------------------------------------------------------------------ */ 180 | /* Stack */ 181 | /* ------------------------------------------------------------------------ */ 182 | 183 | #if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) || (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) 184 | 185 | #ifdef _WIN32 186 | #include "windows.h" 187 | #else 188 | #ifdef __KERNEL__ 189 | #include "libc.h" 190 | #else 191 | /* Provides mmap function. */ 192 | #include 193 | /* For detecting the page size. */ 194 | #include 195 | #endif 196 | 197 | #ifndef MAP_ANON 198 | 199 | #ifndef __KERNEL__ 200 | #include 201 | 202 | /* Some old systems does not have MAP_ANON. */ 203 | static sljit_s32 dev_zero = -1; 204 | #endif 205 | 206 | #if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) 207 | 208 | static SLJIT_INLINE sljit_s32 open_dev_zero(void) 209 | { 210 | dev_zero = open("/dev/zero", O_RDWR); 211 | return dev_zero < 0; 212 | } 213 | 214 | #else /* SLJIT_SINGLE_THREADED */ 215 | 216 | #ifdef __KERNEL__ 217 | #include 218 | #else 219 | #include 220 | 221 | static pthread_mutex_t dev_zero_mutex = PTHREAD_MUTEX_INITIALIZER; 222 | 223 | static SLJIT_INLINE sljit_s32 open_dev_zero(void) 224 | { 225 | pthread_mutex_lock(&dev_zero_mutex); 226 | /* The dev_zero might be initialized by another thread during the waiting. */ 227 | if (dev_zero < 0) { 228 | dev_zero = open("/dev/zero", O_RDWR); 229 | } 230 | pthread_mutex_unlock(&dev_zero_mutex); 231 | return dev_zero < 0; 232 | } 233 | #endif 234 | 235 | #endif /* SLJIT_SINGLE_THREADED */ 236 | 237 | #endif 238 | 239 | #endif 240 | 241 | #endif /* SLJIT_UTIL_STACK || SLJIT_EXECUTABLE_ALLOCATOR */ 242 | 243 | #if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) 244 | 245 | /* Planning to make it even more clever in the future. */ 246 | static sljit_sw sljit_page_align = 0; 247 | 248 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data) 249 | { 250 | struct sljit_stack *stack; 251 | union { 252 | void *ptr; 253 | sljit_uw uw; 254 | } base; 255 | #ifdef _WIN32 256 | SYSTEM_INFO si; 257 | #endif 258 | 259 | SLJIT_UNUSED_ARG(allocator_data); 260 | if (limit > max_limit || limit < 1) 261 | return NULL; 262 | 263 | #ifdef _WIN32 264 | if (!sljit_page_align) { 265 | GetSystemInfo(&si); 266 | sljit_page_align = si.dwPageSize - 1; 267 | } 268 | #else 269 | if (!sljit_page_align) { 270 | #ifdef __KERNEL__ 271 | sljit_page_align = PAGE_SIZE; 272 | #else 273 | sljit_page_align = sysconf(_SC_PAGESIZE); 274 | #endif 275 | /* Should never happen. */ 276 | if (sljit_page_align < 0) 277 | sljit_page_align = 4096; 278 | sljit_page_align--; 279 | } 280 | #endif 281 | 282 | /* Align limit and max_limit. */ 283 | max_limit = (max_limit + sljit_page_align) & ~sljit_page_align; 284 | 285 | stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data); 286 | if (!stack) 287 | return NULL; 288 | 289 | #ifdef _WIN32 290 | base.ptr = VirtualAlloc(NULL, max_limit, MEM_RESERVE, PAGE_READWRITE); 291 | if (!base.ptr) { 292 | SLJIT_FREE(stack, allocator_data); 293 | return NULL; 294 | } 295 | stack->base = base.uw; 296 | stack->limit = stack->base; 297 | stack->max_limit = stack->base + max_limit; 298 | if (sljit_stack_resize(stack, stack->base + limit)) { 299 | sljit_free_stack(stack, allocator_data); 300 | return NULL; 301 | } 302 | #elif defined(__KERNEL__) 303 | base.ptr = malloc(max_limit); 304 | 305 | if (base.ptr == NULL) { 306 | SLJIT_FREE(stack, allocator_data); 307 | return NULL; 308 | } 309 | stack->base = base.uw; 310 | stack->limit = stack->base + limit; 311 | stack->max_limit = stack->base + max_limit; 312 | #else 313 | #ifdef MAP_ANON 314 | base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 315 | #else 316 | if (dev_zero < 0) { 317 | if (open_dev_zero()) { 318 | SLJIT_FREE(stack, allocator_data); 319 | return NULL; 320 | } 321 | } 322 | base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0); 323 | #endif 324 | if (base.ptr == MAP_FAILED) { 325 | SLJIT_FREE(stack, allocator_data); 326 | return NULL; 327 | } 328 | stack->base = base.uw; 329 | stack->limit = stack->base + limit; 330 | stack->max_limit = stack->base + max_limit; 331 | #endif 332 | stack->top = stack->base; 333 | return stack; 334 | } 335 | 336 | #undef PAGE_ALIGN 337 | 338 | SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack, void *allocator_data) 339 | { 340 | SLJIT_UNUSED_ARG(allocator_data); 341 | #ifdef _WIN32 342 | VirtualFree((void*)stack->base, 0, MEM_RELEASE); 343 | #elif defined(__KERNEL__) 344 | free((void*)stack->base); 345 | #else 346 | munmap((void*)stack->base, stack->max_limit - stack->base); 347 | #endif 348 | SLJIT_FREE(stack, allocator_data); 349 | } 350 | 351 | SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_uw new_limit) 352 | { 353 | sljit_uw aligned_old_limit; 354 | sljit_uw aligned_new_limit; 355 | 356 | if ((new_limit > stack->max_limit) || (new_limit < stack->base)) 357 | return -1; 358 | #ifdef _WIN32 359 | aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align; 360 | aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align; 361 | if (aligned_new_limit != aligned_old_limit) { 362 | if (aligned_new_limit > aligned_old_limit) { 363 | if (!VirtualAlloc((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, MEM_COMMIT, PAGE_READWRITE)) 364 | return -1; 365 | } 366 | else { 367 | if (!VirtualFree((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MEM_DECOMMIT)) 368 | return -1; 369 | } 370 | } 371 | stack->limit = new_limit; 372 | return 0; 373 | #else 374 | if (new_limit >= stack->limit) { 375 | stack->limit = new_limit; 376 | return 0; 377 | } 378 | aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align; 379 | aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align; 380 | /* If madvise is available, we release the unnecessary space. */ 381 | #if defined(MADV_DONTNEED) 382 | if (aligned_new_limit < aligned_old_limit) 383 | madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MADV_DONTNEED); 384 | #elif defined(POSIX_MADV_DONTNEED) 385 | if (aligned_new_limit < aligned_old_limit) 386 | posix_madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, POSIX_MADV_DONTNEED); 387 | #endif 388 | stack->limit = new_limit; 389 | return 0; 390 | #endif 391 | } 392 | 393 | #endif /* SLJIT_UTIL_STACK */ 394 | 395 | #endif 396 | -------------------------------------------------------------------------------- /ts_pcre/Makefile: -------------------------------------------------------------------------------- 1 | CODE_WIDTH ?= 8 2 | LIBC := ${PWD}/../libc 3 | PCRE2 := ${PWD}/../pcre2 4 | 5 | ifeq ($(strip $(DEBUG)),) 6 | ccflags-y += -O2 -s 7 | else 8 | ccflags-y += -ggdb -DDEBUG 9 | endif 10 | ccflags-y += -I$(LIBC) -I$(PCRE2) \ 11 | -DPCRE2_CODE_UNIT_WIDTH=${CODE_WIDTH} 12 | 13 | MODULES_DIR := /lib/modules/$(shell uname -r) 14 | KERNEL_DIR ?= ${MODULES_DIR}/build 15 | KBUILD_EXTRA_SYMBOLS := ${LIBC}/Module.symvers ${PCRE2}/Module.symvers 16 | 17 | obj-m := ts_pcre.o 18 | 19 | all: 20 | make -C ${KERNEL_DIR} M=$$PWD; 21 | 22 | modules: 23 | make -C ${KERNEL_DIR} M=$$PWD $@; 24 | 25 | modules_install: 26 | make -C ${KERNEL_DIR} M=$$PWD $@; 27 | depmod -a; 28 | 29 | clean: 30 | make -C ${KERNEL_DIR} M=$$PWD $@; 31 | rm -rf modules.order cscope.* 32 | -------------------------------------------------------------------------------- /ts_pcre/ts_pcre.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ts_pcre.c PCRE search implementation 3 | * 4 | * Copyright (C) 2016 Seongmyun Cho 5 | * 6 | * This program is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU General Public License 8 | * as published by the Free Software Foundation; version 2 of the License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 | * 19 | * ===================================================================== 20 | * 21 | * Implements PCRE matching algorithm: 22 | * 23 | * Note: Obviously, it's possible that a matching could be spread over 24 | * multiple blocks, in that case this code won't find any coincidence. 25 | * 26 | */ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | #include "libc.h" 38 | #include "pcre2.h" 39 | 40 | MODULE_LICENSE("Dual BSD/GPL"); 41 | MODULE_AUTHOR("Seongmyun Cho "); 42 | MODULE_DESCRIPTION("PCRE text search engine"); 43 | 44 | #define PARSE_REGEX "(?offset; 90 | 91 | preempt_disable(); 92 | 93 | _match_data = __get_cpu_var(match_data); 94 | _match_context = __get_cpu_var(match_context); 95 | 96 | for (;;) { 97 | text_len = conf->get_next_block(consumed, &text, conf, state); 98 | 99 | if (unlikely(text_len == 0)) 100 | break; 101 | 102 | rc = pcre2_match(pcre->re, text, text_len, 0, 0, 103 | _match_data, _match_context); 104 | 105 | if (unlikely(rc >= 0)) { 106 | #ifdef DEBUG 107 | PCRE2_UCHAR *str; 108 | PCRE2_SIZE slen; 109 | int i; 110 | 111 | rc = pcre2_substring_get_bynumber(_match_data, 0, \ 112 | &str, &slen); 113 | 114 | if (rc < 0) { 115 | pr_debug("%s: pcre2_substring_get_bynumber(pcre) failed", 116 | __func__); 117 | break; 118 | } else { 119 | printk("\n"); 120 | for (i = 0; i < slen; i++) { 121 | if (isprint(str[i])) 122 | printk("%c", str[i]); 123 | else 124 | printk("|%02X|", str[i]); 125 | } 126 | printk("\n"); 127 | 128 | pcre2_substring_free(str); 129 | } 130 | #endif 131 | ovector = pcre2_get_ovector_pointer(_match_data); 132 | match = consumed + ovector[0]; 133 | // state->offset = consumed + ovector[1]; 134 | pr_debug("%s: matched |%s| at offset %u", __func__, pcre->pcre_str, match); 135 | goto found; 136 | } 137 | 138 | consumed += text_len; 139 | // state->offset = consumed; 140 | } 141 | 142 | match = UINT_MAX; 143 | 144 | found: 145 | preempt_enable(); 146 | return match; 147 | } 148 | 149 | static inline int 150 | pattern_parse(const char *pattern, PCRE2_UCHAR ** pcre, PCRE2_UCHAR ** op_str) 151 | { 152 | PCRE2_SIZE relen, oplen; 153 | pcre2_match_data *match_data; 154 | int res, rc; 155 | 156 | match_data = pcre2_match_data_create(4, NULL); 157 | if (!match_data) { 158 | return -ENOMEM; 159 | } 160 | 161 | res = pcre2_match(parse_regex, pattern, -1, 0, 0, match_data, NULL); 162 | if (res <= 0) { 163 | pr_debug("%s: pcre2_match failed", __func__); 164 | pcre2_match_data_free(match_data); 165 | return -EINVAL; 166 | } 167 | 168 | relen = 0; 169 | oplen = 0; 170 | 171 | rc = pcre2_substring_get_bynumber(match_data, 1, pcre, &relen); 172 | if (rc < 0) { 173 | pr_debug("%s: pcre2_substring_get_bynumber(pcre) failed", 174 | __func__); 175 | return -EINVAL; 176 | } 177 | 178 | if (res > 2) { 179 | rc = pcre2_substring_get_bynumber(match_data, 2, op_str, 180 | &oplen); 181 | if (rc < 0) { 182 | pr_debug 183 | ("%s: pcre2_substring_get_bynumber(opts) failed", 184 | __func__); 185 | return -EINVAL; 186 | } 187 | } 188 | #ifdef DEBUG 189 | if (relen > 0) { 190 | pr_debug("pcre: %lu|%s|", relen, *pcre); 191 | } 192 | 193 | if (oplen > 0) { 194 | pr_debug("opts: %lu|%s|", oplen, *op_str); 195 | } 196 | #endif 197 | 198 | pcre2_match_data_free(match_data); 199 | 200 | return 0; 201 | } 202 | 203 | static inline void opts_parse(char *op_str, int *_opts) 204 | { 205 | char *op = NULL; 206 | int opts = 0; 207 | 208 | op = op_str; 209 | *_opts = 0; 210 | 211 | if (op != NULL) { 212 | while (*op) { 213 | switch (*op) { 214 | case 'A': 215 | opts |= PCRE2_ANCHORED; 216 | break; 217 | case 'E': 218 | opts |= PCRE2_DOLLAR_ENDONLY; 219 | break; 220 | case 'G': 221 | opts |= PCRE2_UNGREEDY; 222 | break; 223 | 224 | case 'i': 225 | opts |= PCRE2_CASELESS; 226 | break; 227 | case 'm': 228 | opts |= PCRE2_MULTILINE; 229 | break; 230 | case 's': 231 | opts |= PCRE2_DOTALL; 232 | break; 233 | case 'x': 234 | opts |= PCRE2_EXTENDED; 235 | break; 236 | 237 | default: 238 | pr_info("%s: unknown regex modifier '%c'", 239 | __func__, *op); 240 | break; 241 | } 242 | op++; 243 | } 244 | } 245 | 246 | *_opts = opts; 247 | } 248 | 249 | static struct ts_config *pcre_init(const void *pattern, unsigned int len, 250 | gfp_t gfp_mask, int flags) 251 | { 252 | struct ts_config *conf = ERR_PTR(-EINVAL); 253 | struct ts_pcre pcre; 254 | PCRE2_SIZE erroffset; 255 | int errorcode, rc; 256 | size_t priv_size = sizeof(struct ts_pcre); 257 | 258 | pr_debug("%s: |%s|", __func__, (char *)pattern); 259 | 260 | pcre.patlen = len; 261 | pcre.pattern = calloc(len + 1, sizeof(u8)); 262 | 263 | if (!pcre.pattern) { 264 | pr_debug("%s: %s", __func__, "err_pattern"); 265 | goto err_pattern; 266 | } 267 | 268 | memcpy(pcre.pattern, pattern, len); 269 | 270 | rc = pattern_parse((char *)pattern, &pcre.pcre_str, &pcre.op_str); 271 | if (rc < 0) { 272 | pr_debug("%s: %s", __func__, "err_pattern_parse"); 273 | goto err_pattern_parse; 274 | } 275 | pr_debug("%s: |%s|%s|", __func__, pcre.pcre_str, pcre.op_str); 276 | 277 | opts_parse(pcre.op_str, &pcre.opts); 278 | 279 | pcre.re = pcre2_compile(pcre.pcre_str, PCRE2_ZERO_TERMINATED, pcre.opts, 280 | &errorcode, &erroffset, NULL); 281 | if (!pcre.re) { 282 | pr_debug("%s: %s", __func__, "err_pcre_compile"); 283 | goto err_pcre_compile; 284 | } 285 | 286 | if (jit_enable) { 287 | 288 | rc = pcre2_jit_compile(pcre.re, PCRE2_JIT_COMPLETE); 289 | if (rc < 0) { 290 | pr_debug("%s: %s", __func__, "err_jit_compile"); 291 | goto err_jit_compile; 292 | } 293 | 294 | } 295 | 296 | conf = alloc_ts_config(priv_size, gfp_mask); 297 | if (IS_ERR(conf)) { 298 | pr_debug("%s: %s", __func__, "err_alloc_conf"); 299 | goto err_alloc_conf; 300 | } 301 | 302 | conf->flags = flags; 303 | memcpy(ts_config_priv(conf), &pcre, priv_size); 304 | 305 | return conf; 306 | 307 | err_alloc_conf: 308 | err_jit_compile: 309 | pcre2_code_free(pcre.re); 310 | 311 | err_pcre_compile: 312 | err_pattern_parse: 313 | err_pattern: 314 | free(pcre.pattern); 315 | 316 | pr_info("%s failed: it's probably a regex pattern error", __func__); 317 | return conf; 318 | } 319 | 320 | static void pcre_destroy(struct ts_config *conf) 321 | { 322 | struct ts_pcre *pcre; 323 | 324 | pcre = ts_config_priv(conf); 325 | 326 | pr_debug("%s: |%s|", __func__, pcre->pattern); 327 | 328 | if (pcre->pattern) 329 | free(pcre->pattern); 330 | 331 | if (pcre->re) 332 | pcre2_code_free(pcre->re); 333 | 334 | if (pcre->pcre_str) 335 | pcre2_substring_free(pcre->pcre_str); 336 | 337 | if (pcre->op_str) 338 | pcre2_substring_free(pcre->op_str); 339 | 340 | } 341 | 342 | static void *pcre_get_pattern(struct ts_config *conf) 343 | { 344 | struct ts_pcre *pcre = ts_config_priv(conf); 345 | return pcre->pattern; 346 | } 347 | 348 | static unsigned int pcre_get_pattern_len(struct ts_config *conf) 349 | { 350 | struct ts_pcre *pcre = ts_config_priv(conf); 351 | return pcre->patlen; 352 | } 353 | 354 | static struct ts_ops pcre_ops = { 355 | .name = "pcre", 356 | .find = pcre_find, 357 | .init = pcre_init, 358 | .destroy = pcre_destroy, 359 | .get_pattern = pcre_get_pattern, 360 | .get_pattern_len = pcre_get_pattern_len, 361 | .owner = THIS_MODULE, 362 | .list = LIST_HEAD_INIT(pcre_ops.list) 363 | }; 364 | 365 | static int __init ts_pcre_init(void) 366 | { 367 | PCRE2_SIZE erroffset; 368 | int errorcode; 369 | int i; 370 | 371 | if (jit_stack_start > jit_stack_max) 372 | jit_stack_max = jit_stack_start; 373 | 374 | pr_debug("%s j: %u s: %u m: %u", \ 375 | __func__, jit_enable, jit_stack_start, jit_stack_max); 376 | 377 | parse_regex = pcre2_compile(PARSE_REGEX, 378 | PCRE2_ZERO_TERMINATED, 0, &errorcode, 379 | &erroffset, NULL); 380 | 381 | if (!parse_regex) 382 | goto err_compile; 383 | 384 | for_each_online_cpu(i) { 385 | 386 | pcre2_match_data *_match_data = pcre2_match_data_create(OVECTOR_SIZE, NULL); 387 | pcre2_match_context *_match_context = pcre2_match_context_create(NULL); 388 | 389 | pcre2_jit_stack *_jit_stack = pcre2_jit_stack_create(jit_stack_start, jit_stack_max, NULL); 390 | 391 | pcre2_jit_stack_assign(_match_context, NULL, _jit_stack); 392 | 393 | per_cpu(match_data, i) = _match_data; 394 | per_cpu(match_context, i) = _match_context; 395 | per_cpu(jit_stack, i) = _jit_stack; 396 | 397 | } 398 | 399 | return textsearch_register(&pcre_ops); 400 | 401 | err_compile: 402 | return -ENOMEM; 403 | } 404 | 405 | static void __exit ts_pcre_exit(void) 406 | { 407 | int i; 408 | pr_debug("%s", __func__); 409 | 410 | for_each_online_cpu(i) { 411 | pcre2_match_data *_match_data = per_cpu(match_data, i); 412 | pcre2_match_context *_match_context = per_cpu(match_context, i); 413 | pcre2_jit_stack *_jit_stack = per_cpu(jit_stack, i); 414 | 415 | pcre2_match_data_free(_match_data); 416 | pcre2_match_context_free(_match_context); 417 | pcre2_jit_stack_free(_jit_stack); 418 | } 419 | 420 | if (parse_regex) 421 | pcre2_code_free(parse_regex); 422 | 423 | pcre2_jit_free_unused_memory(NULL); 424 | 425 | textsearch_unregister(&pcre_ops); 426 | } 427 | 428 | module_init(ts_pcre_init); 429 | module_exit(ts_pcre_exit); 430 | -------------------------------------------------------------------------------- /ts_regex/Makefile: -------------------------------------------------------------------------------- 1 | CODE_WIDTH ?= 8 2 | LIBC := ${PWD}/../libc 3 | PCRE2 := ${PWD}/../pcre2 4 | 5 | ifeq ($(strip $(DEBUG)),) 6 | ccflags-y += -O2 -s 7 | else 8 | ccflags-y += -ggdb -DDEBUG 9 | endif 10 | ccflags-y += -I$(LIBC) -I$(PCRE2) \ 11 | -DPCRE2_CODE_UNIT_WIDTH=${CODE_WIDTH} 12 | 13 | MODULES_DIR := /lib/modules/$(shell uname -r) 14 | KERNEL_DIR ?= ${MODULES_DIR}/build 15 | KBUILD_EXTRA_SYMBOLS += $(LIBC)/Module.symvers $(PCRE2)/Module.symvers 16 | 17 | obj-m := ts_regex.o 18 | 19 | all: 20 | make -C ${KERNEL_DIR} M=$$PWD; 21 | 22 | modules: 23 | make -C ${KERNEL_DIR} M=$$PWD $@; 24 | 25 | modules_install: 26 | make -C ${KERNEL_DIR} M=$$PWD $@; 27 | depmod -a; 28 | 29 | clean: 30 | make -C ${KERNEL_DIR} M=$$PWD $@; 31 | rm -rf modules.order cscope.* 32 | -------------------------------------------------------------------------------- /ts_regex/ts_regex.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ts_regex.c REGEX search implementation 3 | * 4 | * Copyright (C) 2016 Seongmyun Cho 5 | * 6 | * This program is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU General Public License 8 | * as published by the Free Software Foundation; version 2 of the License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 | * 19 | * ===================================================================== 20 | * 21 | * Implements REGEX matching algorithm: 22 | * 23 | * Note: Obviously, it's possible that a matching could be spread over 24 | * multiple blocks, in that case this code won't find any coincidence. 25 | * 26 | */ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "libc.h" 37 | #include "pcre2.h" 38 | #include "pcre2posix.h" 39 | 40 | MODULE_LICENSE("Dual BSD/GPL"); 41 | MODULE_AUTHOR("Seongmyun Cho "); 42 | MODULE_DESCRIPTION("REGEX text search engine"); 43 | 44 | #define PARSE_REGEX "(?offset; 63 | int rc; 64 | const u8 *text; 65 | unsigned int slen; 66 | 67 | pr_debug("%s: finding |%s| at offset %u", __func__, regex->pattern, consumed); 68 | 69 | /* POSIX regex functions deal with only null-terminated strings. */ 70 | /* They can't properly handle patterns with null character inside. */ 71 | for (;;) { 72 | 73 | text_len = conf->get_next_block(consumed, &text, conf, state); 74 | slen = strlen(text); 75 | 76 | pr_debug("next block size: %u(%u)", text_len, slen); 77 | 78 | if (unlikely(text_len == 0)) 79 | break; 80 | 81 | rc = regexec(®ex->re, (char *) text, NS, regex->subs, regex->eopts); 82 | 83 | if (unlikely(rc == 0)) { 84 | consumed += regex->subs[0].rm_so; 85 | pr_debug("%s: matched |%s| at offset %u", __func__, regex->pattern, consumed); 86 | return consumed; 87 | } 88 | 89 | while (!text[slen]) 90 | slen++; 91 | 92 | consumed += min(slen, text_len) ; 93 | } 94 | 95 | return UINT_MAX; 96 | } 97 | 98 | static inline int 99 | pattern_parse(const char *pattern, PCRE2_UCHAR ** pcre, PCRE2_UCHAR ** op_str) 100 | { 101 | PCRE2_SIZE relen, oplen; 102 | pcre2_match_data *match_data; 103 | int res, rc; 104 | 105 | match_data = pcre2_match_data_create(4, NULL); 106 | if (!match_data) { 107 | return -ENOMEM; 108 | } 109 | 110 | res = pcre2_match(parse_regex, pattern, -1, 0, 0, match_data, NULL); 111 | if (res <= 0) { 112 | pr_debug("%s: pcre2_match failed", __func__); 113 | pcre2_match_data_free(match_data); 114 | return -EINVAL; 115 | } 116 | 117 | relen = 0; 118 | oplen = 0; 119 | 120 | rc = pcre2_substring_get_bynumber(match_data, 1, pcre, &relen); 121 | if (rc < 0) { 122 | pr_debug("%s: pcre2_substring_get_bynumber(pcre) failed", 123 | __func__); 124 | return -EINVAL; 125 | } 126 | 127 | if (res > 2) { 128 | rc = pcre2_substring_get_bynumber(match_data, 2, op_str, 129 | &oplen); 130 | if (rc < 0) { 131 | pr_debug 132 | ("%s: pcre2_substring_get_bynumber(opts) failed", 133 | __func__); 134 | return -EINVAL; 135 | } 136 | } 137 | #ifdef DEBUG 138 | if (relen > 0) { 139 | pr_debug("pcre: %lu|%s|", relen, *pcre); 140 | } 141 | 142 | if (oplen > 0) { 143 | pr_debug("opts: %lu|%s|", oplen, *op_str); 144 | } 145 | #endif 146 | 147 | pcre2_match_data_free(match_data); 148 | 149 | return 0; 150 | } 151 | 152 | static inline void opts_parse(char *op_str, int *copts, int *eopts) 153 | { 154 | char *op = NULL; 155 | int _copts = 0; 156 | int _eopts = 0; 157 | 158 | op = op_str; 159 | 160 | if (op != NULL) { 161 | while (*op) { 162 | switch (*op) { 163 | case 'N': 164 | _copts |= REG_NOSUB; 165 | break; 166 | case 'G': 167 | _copts |= REG_UNGREEDY; 168 | break; 169 | 170 | case 'f': 171 | _copts |= REG_UTF; 172 | break; 173 | case 'p': 174 | _copts |= REG_UCP; 175 | break; 176 | case 'i': 177 | _copts |= REG_ICASE; 178 | break; 179 | case 'm': 180 | _copts |= REG_NEWLINE; 181 | break; 182 | case 's': 183 | _copts |= REG_DOTALL; 184 | break; 185 | case 'x': 186 | _copts |= REG_EXTENDED; 187 | break; 188 | 189 | case '1': 190 | _eopts |= REG_NOTBOL; 191 | break; 192 | case '2': 193 | _eopts |= REG_NOTEOL; 194 | break; 195 | case '3': 196 | _eopts |= REG_NOTEMPTY; 197 | break; 198 | 199 | default: 200 | pr_info("%s: unknown regex modifier '%c'", 201 | __func__, *op); 202 | break; 203 | } 204 | op++; 205 | } 206 | } 207 | 208 | *copts = _copts; 209 | *eopts = _eopts; 210 | } 211 | 212 | 213 | static struct ts_config *regex_init(const void *pattern, unsigned int len, 214 | gfp_t gfp_mask, int flags) 215 | { 216 | struct ts_config *conf = ERR_PTR(-EINVAL); 217 | struct ts_regex regex; 218 | size_t priv_size = sizeof(struct ts_regex); 219 | int rc; 220 | 221 | pr_debug("%s: |%s|", __func__, (char *)pattern); 222 | 223 | regex.copts = REG_EXTENDED; 224 | regex.eopts = 0; 225 | regex.patlen = len; 226 | regex.pattern = calloc(len + 1, sizeof(u8)); 227 | 228 | if (!regex.pattern) { 229 | pr_debug("%s: %s", __func__, "err_pattern"); 230 | goto err_pattern; 231 | } 232 | 233 | memcpy(regex.pattern, pattern, len); 234 | 235 | rc = pattern_parse((char *)pattern, ®ex.regex_str, ®ex.op_str); 236 | if (rc < 0) { 237 | pr_debug("%s: %s", __func__, "err_pattern_parse"); 238 | goto err_pattern_parse; 239 | } 240 | pr_debug("%s: |%s|%s|", __func__, regex.regex_str, regex.op_str); 241 | 242 | opts_parse(regex.op_str, ®ex.copts, ®ex.eopts); 243 | 244 | rc = regcomp(®ex.re, regex.regex_str, regex.copts); 245 | if (rc) { 246 | pr_debug("%s: %s", __func__, "err_regcomp"); 247 | goto err_regcomp; 248 | } 249 | 250 | conf = alloc_ts_config(priv_size, gfp_mask); 251 | if (IS_ERR(conf)) { 252 | goto err_alloc_conf; 253 | } 254 | 255 | conf->flags = flags; 256 | memcpy(ts_config_priv(conf), ®ex, priv_size); 257 | 258 | return conf; 259 | 260 | err_alloc_conf: 261 | err_regcomp: 262 | err_pattern_parse: 263 | err_pattern: 264 | free(regex.pattern); 265 | 266 | pr_info("%s failed: it's probably a regex pattern error", __func__); 267 | return conf; 268 | } 269 | 270 | static void regex_destroy(struct ts_config *conf) 271 | { 272 | struct ts_regex *regex; 273 | 274 | regex = ts_config_priv(conf); 275 | 276 | pr_debug("%s: %s", __func__, regex->pattern); 277 | 278 | if (regex->pattern) 279 | free(regex->pattern); 280 | 281 | if (regex->regex_str) 282 | pcre2_substring_free(regex->regex_str); 283 | 284 | if (regex->op_str) 285 | pcre2_substring_free(regex->op_str); 286 | 287 | regfree(®ex->re); 288 | } 289 | 290 | static void *regex_get_pattern(struct ts_config *conf) 291 | { 292 | struct ts_regex *regex = ts_config_priv(conf); 293 | return regex->pattern; 294 | } 295 | 296 | static unsigned int regex_get_pattern_len(struct ts_config *conf) 297 | { 298 | struct ts_regex *regex = ts_config_priv(conf); 299 | return regex->patlen; 300 | } 301 | 302 | static struct ts_ops regex_ops = { 303 | .name = "regex", 304 | .find = regex_find, 305 | .init = regex_init, 306 | .destroy = regex_destroy, 307 | .get_pattern = regex_get_pattern, 308 | .get_pattern_len = regex_get_pattern_len, 309 | .owner = THIS_MODULE, 310 | .list = LIST_HEAD_INIT(regex_ops.list) 311 | }; 312 | 313 | static int __init ts_regex_init(void) 314 | { 315 | PCRE2_SIZE erroffset; 316 | int errorcode; 317 | 318 | pr_debug("%s", __func__); 319 | 320 | parse_regex = pcre2_compile(PARSE_REGEX, 321 | PCRE2_ZERO_TERMINATED, 0, &errorcode, 322 | &erroffset, NULL); 323 | 324 | if (!parse_regex) { 325 | #ifdef DEBUG 326 | PCRE2_UCHAR8 buffer[120]; 327 | (void)pcre2_get_error_message(errorcode, buffer, 120); 328 | pr_debug("%s: %s", __func__, buffer); 329 | #endif 330 | return -ENOMEM; 331 | } 332 | 333 | return textsearch_register(®ex_ops); 334 | } 335 | 336 | static void __exit ts_regex_exit(void) 337 | { 338 | pr_debug("%s", __func__); 339 | 340 | if (parse_regex) 341 | pcre2_code_free(parse_regex); 342 | 343 | textsearch_unregister(®ex_ops); 344 | } 345 | 346 | module_init(ts_regex_init); 347 | module_exit(ts_regex_exit); 348 | --------------------------------------------------------------------------------