├── oniguruma ├── AUTHORS ├── testc.c ├── README.ja ├── win32 │ ├── testc.c │ ├── config.h │ └── Makefile ├── sample │ ├── Makefile.am │ ├── simple.c │ ├── syntax.c │ ├── names.c │ ├── sql.c │ ├── posix.c │ ├── listcap.c │ └── crnl.c ├── config.h.in ├── COPYING ├── onig-config.in ├── st.h ├── CMakeLists.txt ├── regversion.c ├── enc │ ├── ascii.c │ ├── euc_tw.c │ ├── iso8859_6.c │ ├── iso8859_8.c │ ├── iso8859_11.c │ ├── euc_kr.c │ ├── utf32_be.c │ ├── utf32_le.c │ ├── big5.c │ ├── utf16_be.c │ ├── utf16_le.c │ ├── euc_jp.c │ ├── cp1251.c │ ├── koi8_r.c │ └── iso8859_7.c ├── regtrav.c ├── Makefile.am ├── oniggnu.h ├── regposerr.c ├── reggnu.c ├── onigposix.h ├── README └── regext.c ├── contrib ├── ab_wsesscache.README ├── make_release.sh ├── perftest.sh ├── release_prep.sh ├── ab_wsesscache.diff └── git_changelog.py ├── .gitignore ├── htparse ├── Makefile ├── htparse.h └── test.c ├── evthr ├── Makefile ├── test.c ├── evthr.h └── README ├── test_basic.c ├── CMakeModules ├── BaseConfig.cmake └── FindLibEvent.cmake ├── LICENSE ├── README.markdown └── CMakeLists.txt /oniguruma/AUTHORS: -------------------------------------------------------------------------------- 1 | sndgk393 AT ybb DOT ne DOT jp (K.Kosako) 2 | -------------------------------------------------------------------------------- /oniguruma/testc.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/b/libevhtp/master/oniguruma/testc.c -------------------------------------------------------------------------------- /oniguruma/README.ja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/b/libevhtp/master/oniguruma/README.ja -------------------------------------------------------------------------------- /oniguruma/win32/testc.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/b/libevhtp/master/oniguruma/win32/testc.c -------------------------------------------------------------------------------- /contrib/ab_wsesscache.README: -------------------------------------------------------------------------------- 1 | Patches Apache Bench to support the ability to read in a client session-id file. 2 | 3 | Usage: 4 | openssl s_client -connect your_server:your_port -sess_out -ssl3 5 | ab [opts] -I 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # cmake manages these; they shouldn't go in version control 2 | 3 | /CMakeCache.txt 4 | /CMakeFiles/** 5 | /Makefile 6 | /cmake_install.cmake 7 | 8 | # generated .h files 9 | 10 | /compat/sys/tree.h 11 | /oniguruma/config.h 12 | 13 | # compiled files 14 | 15 | /libevhtp.a 16 | /test 17 | /test_basic 18 | 19 | -------------------------------------------------------------------------------- /contrib/make_release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mv ChangeLog bak.ChangeLog 4 | 5 | rev_list=`git rev-list --tags --max-count=1..HEAD` 6 | tag_desc=`git describe --tags $rev_list` 7 | cur_desc=`git flow release | awk '{print $2}'` 8 | 9 | echo v$cur_desc > ChangeLog 10 | git log --no-merges --reverse --pretty='format: o %s (%h %an)' $tag_desc..HEAD >> ChangeLog 11 | echo "" >> ChangeLog 12 | echo "" >> ChangeLog 13 | 14 | cat bak.ChangeLog >> ChangeLog 15 | -------------------------------------------------------------------------------- /htparse/Makefile: -------------------------------------------------------------------------------- 1 | SRC = htparse.c 2 | OUT = libhtparse.a 3 | OBJ = $(SRC:.c=.o) 4 | INCLUDES = -I. 5 | CFLAGS += -ggdb -Wall -Wextra 6 | LDFLAGS += 7 | CC = gcc 8 | 9 | .SUFFIXES: .c 10 | 11 | default: $(OUT) 12 | 13 | .c.o: 14 | $(CC) $(INCLUDES) $(CFLAGS) -c $< -o $@ 15 | 16 | $(OUT): $(OBJ) 17 | ar rcs $(OUT) $(OBJ) 18 | 19 | test: $(OUT) test.c 20 | $(CC) $(INCLUDES) $(CFLAGS) test.c -o test $(OUT) 21 | 22 | clean: 23 | rm -f $(OBJ) $(OUT) test 24 | -------------------------------------------------------------------------------- /evthr/Makefile: -------------------------------------------------------------------------------- 1 | SRC = evthr.c 2 | OUT = libevthr.a 3 | OBJ = $(SRC:.c=.o) 4 | INCLUDES = -I. 5 | CFLAGS += -Wall -Wextra -ggdb 6 | LDFLAGS += -ggdb 7 | CC = gcc 8 | 9 | .SUFFIXES: .c 10 | 11 | default: $(OUT) 12 | 13 | .c.o: 14 | $(CC) $(INCLUDES) $(CFLAGS) -c $< -o $@ 15 | 16 | $(OUT): $(OBJ) 17 | ar rcs $(OUT) $(OBJ) 18 | 19 | test: $(OUT) test.c 20 | $(CC) $(INCLUDES) $(CFLAGS) test.c -o test $(OUT) -levent -levent_pthreads -lpthread 21 | 22 | clean: 23 | rm -f $(OBJ) $(OUT) test 24 | 25 | -------------------------------------------------------------------------------- /oniguruma/sample/Makefile.am: -------------------------------------------------------------------------------- 1 | noinst_PROGRAMS = encode listcap names posix simple sql syntax crnl 2 | 3 | libname = $(top_builddir)/libonig.la 4 | LDADD = $(libname) 5 | INCLUDES = -I$(top_srcdir) -I$(includedir) 6 | 7 | encode_SOURCES = encode.c 8 | listcap_SOURCES = listcap.c 9 | names_SOURCES = names.c 10 | posix_SOURCES = posix.c 11 | simple_SOURCES = simple.c 12 | sql_SOURCES = sql.c 13 | syntax_SOURCES = syntax.c 14 | 15 | 16 | sampledir = $(top_builddir)/sample 17 | 18 | test: encode listcap names posix simple sql syntax 19 | @$(sampledir)/encode 20 | @$(sampledir)/listcap 21 | @$(sampledir)/names 22 | @$(sampledir)/posix 23 | @$(sampledir)/simple 24 | @$(sampledir)/sql 25 | @$(sampledir)/syntax 26 | -------------------------------------------------------------------------------- /contrib/perftest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" -ne 5 ] 4 | then 5 | echo "usage " 6 | exit 1 7 | fi 8 | 9 | host=$1 10 | port=$2 11 | uri=$3 12 | conns=$4 13 | calls=$5 14 | 15 | echo "Running ($conns connections | $calls requests each)" 16 | 17 | #httperf --ssl --server=$host --port=$port --uri=$uri --rate=1e+06 --send-buffer=4096 --recv-buffer=16384 --wsess=$conns,$calls,0 18 | #httperf --print-reply header --add-header "Accept-Encoding: gzip,deflate,sdch\n" --server=$host --port=$port --uri=$uri --rate=1e+06 --send-buffer=4096 --recv-buffer=16384 --wsess=$conns,$calls,0 19 | httperf --server=$host --port=$port --uri=$uri --rate=1e+06 --send-buffer=4096 --recv-buffer=16384 --wsess=$conns,$calls,0 20 | -------------------------------------------------------------------------------- /test_basic.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | void 9 | testcb(evhtp_request_t * req, void * a) { 10 | evbuffer_add_reference(req->buffer_out, "foobar", 6, NULL, NULL); 11 | evhtp_send_reply(req, EVHTP_RES_OK); 12 | } 13 | 14 | int 15 | main(int argc, char ** argv) { 16 | evbase_t * evbase = event_base_new(); 17 | evhtp_t * htp = evhtp_new(evbase, NULL); 18 | 19 | evhtp_set_cb(htp, "/test", testcb, NULL); 20 | #ifndef EVHTP_DISABLE_EVTHR 21 | evhtp_use_threads(htp, NULL, 4, NULL); 22 | #endif 23 | evhtp_bind_socket(htp, "0.0.0.0", 8081, 1024); 24 | event_base_loop(evbase, 0); 25 | return 0; 26 | } 27 | 28 | -------------------------------------------------------------------------------- /contrib/release_prep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This shouldn't be used by anyone but me...kthx 3 | 4 | rm -rf libhtparse/ 5 | rm -rf evthr/ 6 | rm -rf oniguruma/ 7 | 8 | rm -rf build/libhtparse-latest* 9 | rm -rf build/libevthr-latest* 10 | rm -rf build/oniguruma-latest* 11 | 12 | cd build && wget http://ackers.net/packages/libhtparse-latest.tar http://ackers.net/packages/libevthr-latest.tar http://ackers.net/packages/oniguruma-latest.tar 13 | cd .. 14 | 15 | httparser_dirname=`tar --to-stdout -tf build/libhtparse-latest.tar 2>&1 | head -n 1` 16 | libevthr_dirname=`tar --to-stdout -tf build/libevthr-latest.tar 2>&1 | head -n 1` 17 | oniguruma_dirname=`tar --to-stdout -tf build/oniguruma-latest.tar 2>&1 | head -n 1` 18 | 19 | tar -xf build/libhtparse-latest.tar 20 | tar -xf build/libevthr-latest.tar 21 | tar -xf build/oniguruma-latest.tar 22 | 23 | mv $httparser_dirname libhtparse 24 | mv $libevthr_dirname evthr 25 | mv $oniguruma_dirname oniguruma 26 | 27 | rm -rf build/libhtparse-latest* 28 | rm -rf build/libevthr-latest* 29 | rm -rf build/oniguruma-latest* 30 | -------------------------------------------------------------------------------- /oniguruma/config.h.in: -------------------------------------------------------------------------------- 1 | #cmakedefine CRAY_STACKSEG_END 1 2 | #cmakedefine C_ALLOCA 1 3 | #cmakedefine HAVE_ALLOCA 1 4 | #cmakedefine HAVE_ALLOCA_H 1 5 | #cmakedefine HAVE_DLFCN_H 1 6 | #cmakedefine HAVE_INTTYPES_H 1 7 | #cmakedefine HAVE_MEMORY_H 1 8 | #cmakedefine HAVE_PROTOTYPES 1 9 | #cmakedefine HAVE_STDARG_PROTOTYPES 1 10 | #cmakedefine HAVE_STDINT_H 1 11 | #cmakedefine HAVE_STDLIB_H 1 12 | #cmakedefine HAVE_STRINGS_H 1 13 | #cmakedefine HAVE_STRING_H 1 14 | #cmakedefine HAVE_SYS_STAT_H 1 15 | #cmakedefine HAVE_SYS_TIMES_H 1 16 | #cmakedefine HAVE_SYS_TIME_H 1 17 | #cmakedefine HAVE_SYS_TYPES_H 1 18 | #cmakedefine HAVE_UNISTD_H 1 19 | #cmakedefine LT_OBJDIR 1 20 | #cmakedefine PACKAGE 1 21 | #cmakedefine PACKAGE_BUGREPORT 1 22 | #cmakedefine PACKAGE_NAME 1 23 | #cmakedefine PACKAGE_STRING 1 24 | #cmakedefine PACKAGE_TARNAME 1 25 | #cmakedefine PACKAGE_VERSION 1 26 | #cmakedefine SIZEOF_INT 1 27 | #cmakedefine SIZEOF_LONG 1 28 | #cmakedefine SIZEOF_SHORT 1 29 | #cmakedefine STACK_DIRECTION 1 30 | #cmakedefine STDC_HEADERS 1 31 | #cmakedefine TIME_WITH_SYS_TIME 1 32 | #cmakedefine USE_COMBINATION_EXPLOSION_CHECK 1 33 | #cmakedefine USE_CRNL_AS_LINE_TERMINATOR 1 34 | #cmakedefine VERSION 1 35 | -------------------------------------------------------------------------------- /CMakeModules/BaseConfig.cmake: -------------------------------------------------------------------------------- 1 | if (CMAKE_COMPILER_IS_GNUCC) 2 | 3 | set(RSN_BASE_C_FLAGS "-Wall -fno-strict-aliasing") 4 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${RSN_BASE_C_FLAGS} -DPROJECT_VERSION=\"${PROJECT_VERSION}\"") 5 | set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${RSN_BASE_C_FLAGS} -ggdb") 6 | set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${RSN_BASE_C_FLAGS}") 7 | 8 | if(APPLE) 9 | # Newer versions of OSX will spew a bunch of warnings about deprecated ssl functions, 10 | # this should be addressed at some point in time, but for now, just ignore them. 11 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_BSD_SOURCE -Wno-deprecated-declarations") 12 | elseif(UNIX) 13 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_BSD_SOURCE -D_POSIX_C_SOURCE=199309L") 14 | endif(APPLE) 15 | 16 | endif(CMAKE_COMPILER_IS_GNUCC) 17 | 18 | if (EVHTP_DISABLE_EVTHR) 19 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DEVHTP_DISABLE_EVTHR") 20 | endif(EVHTP_DISABLE_EVTHR) 21 | 22 | if (EVHTP_DISABLE_SSL) 23 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DEVHTP_DISABLE_SSL") 24 | endif(EVHTP_DISABLE_SSL) 25 | 26 | if (NOT CMAKE_BUILD_TYPE) 27 | set(CMAKE_BUILD_TYPE Release) 28 | endif(NOT CMAKE_BUILD_TYPE) 29 | -------------------------------------------------------------------------------- /CMakeModules/FindLibEvent.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find the LibEvent config processing library 2 | # Once done this will define 3 | # 4 | # LIBEVENT_FOUND - System has LibEvent 5 | # LIBEVENT_INCLUDE_DIR - the LibEvent include directory 6 | # LIBEVENT_LIBRARIES 0 The libraries needed to use LibEvent 7 | 8 | FIND_PATH(LIBEVENT_INCLUDE_DIR NAMES event.h) 9 | FIND_LIBRARY(LIBEVENT_LIBRARY NAMES event) 10 | FIND_LIBRARY(LIBEVENT_CORE_LIBRARY NAMES event_core) 11 | FIND_LIBRARY(LIBEVENT_PTHREADS_LIBRARY NAMES event_pthreads) 12 | FIND_LIBRARY(LIBEVENT_EXTRA_LIBRARY NAMES event_extra) 13 | FIND_LIBRARY(LIBEVENT_OPENSSL_LIBRARY NAMES event_openssl) 14 | 15 | 16 | INCLUDE(FindPackageHandleStandardArgs) 17 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibEvent DEFAULT_MSG LIBEVENT_LIBRARY LIBEVENT_INCLUDE_DIR) 18 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibEventPthreads DEFAULT_MSG LIBEVENT_PTHREADS_LIBRARY LIBEVENT_INCLUDE_DIR) 19 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibEventCore DEFAULT_MSG LIBEVENT_CORE_LIBRARY LIBEVENT_INCLUDE_DIR) 20 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibEventExtra DEFAULT_MSG LIBEVENT_EXTRA_LIBRARY LIBEVENT_INCLUDE_DIR) 21 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibEventOpenssl DEFAULT_MSG LIBEVENT_OPENSSL_LIBRARY LIBEVENT_INCLUDE_DIR) 22 | 23 | MARK_AS_ADVANCED(LIBEVENT_INCLUDE_DIR LIBEVENT_LIBRARY LIBEVENT_PTHREADS_LIBRARY LIBEVENT_OPENSSL_LIBRARY LIBEVENT_CORE_LIBRARY LIBEVENT_EXTRA_LIBRARY) 24 | -------------------------------------------------------------------------------- /evthr/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | static void 11 | _test_cb_1(evthr_t * thr, void * cmdarg, void * shared) { 12 | printf("START _test_cb_1 (%u)\n", (unsigned int)pthread_self()); 13 | sleep(1); 14 | printf("END _test_cb_1 (%u)\n", (unsigned int)pthread_self()); 15 | } 16 | 17 | int 18 | main(int argc, char ** argv) { 19 | evthr_pool_t * pool = NULL; 20 | int i = 0; 21 | 22 | evthread_use_pthreads(); 23 | evthread_enable_lock_debuging(); 24 | pool = evthr_pool_new(8, NULL); 25 | 26 | evthr_pool_start(pool); 27 | 28 | while (1) { 29 | if (i++ >= 5) { 30 | break; 31 | } 32 | 33 | printf("Iter %d\n", i); 34 | 35 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 36 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 37 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 38 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 39 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 40 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 41 | 42 | sleep(2); 43 | } 44 | 45 | evthr_pool_stop(pool); 46 | evthr_pool_free(pool); 47 | return 0; 48 | } 49 | 50 | -------------------------------------------------------------------------------- /oniguruma/COPYING: -------------------------------------------------------------------------------- 1 | Oniguruma LICENSE 2 | ----------------- 3 | 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | -------------------------------------------------------------------------------- /oniguruma/sample/simple.c: -------------------------------------------------------------------------------- 1 | /* 2 | * simple.c 3 | */ 4 | #include 5 | #include 6 | #include "oniguruma.h" 7 | 8 | extern int main(int argc, char* argv[]) 9 | { 10 | int r; 11 | unsigned char *start, *range, *end; 12 | regex_t* reg; 13 | OnigErrorInfo einfo; 14 | OnigRegion *region; 15 | 16 | static UChar* pattern = (UChar* )"a(.*)b|[e-f]+"; 17 | static UChar* str = (UChar* )"zzzzaffffffffb"; 18 | 19 | r = onig_new(®, pattern, pattern + strlen((char* )pattern), 20 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); 21 | if (r != ONIG_NORMAL) { 22 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 23 | onig_error_code_to_str(s, r, &einfo); 24 | fprintf(stderr, "ERROR: %s\n", s); 25 | return -1; 26 | } 27 | 28 | region = onig_region_new(); 29 | 30 | end = str + strlen((char* )str); 31 | start = str; 32 | range = end; 33 | r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); 34 | if (r >= 0) { 35 | int i; 36 | 37 | fprintf(stderr, "match at %d\n", r); 38 | for (i = 0; i < region->num_regs; i++) { 39 | fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); 40 | } 41 | } 42 | else if (r == ONIG_MISMATCH) { 43 | fprintf(stderr, "search fail\n"); 44 | } 45 | else { /* error */ 46 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 47 | onig_error_code_to_str(s, r); 48 | fprintf(stderr, "ERROR: %s\n", s); 49 | return -1; 50 | } 51 | 52 | onig_region_free(region, 1 /* 1:free self, 0:free contents only */); 53 | onig_free(reg); 54 | onig_end(); 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /oniguruma/onig-config.in: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (C) 2006 K.Kosako (sndgk393 AT ybb DOT ne DOT jp) 3 | 4 | ONIG_VERSION=@PACKAGE_VERSION@ 5 | 6 | show_usage() 7 | { 8 | cat < 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | enum evthr_res { 14 | EVTHR_RES_OK = 0, 15 | EVTHR_RES_BACKLOG, 16 | EVTHR_RES_RETRY, 17 | EVTHR_RES_NOCB, 18 | EVTHR_RES_FATAL 19 | }; 20 | 21 | struct evthr_pool; 22 | struct evthr; 23 | 24 | typedef struct event_base evbase_t; 25 | typedef struct event ev_t; 26 | 27 | typedef struct evthr_pool evthr_pool_t; 28 | typedef struct evthr evthr_t; 29 | typedef enum evthr_res evthr_res; 30 | 31 | typedef void (*evthr_cb)(evthr_t * thr, void * cmd_arg, void * shared); 32 | typedef void (*evthr_init_cb)(evthr_t * thr, void * shared); 33 | 34 | evthr_t * evthr_new(evthr_init_cb init_cb, void * arg); 35 | evbase_t * evthr_get_base(evthr_t * thr); 36 | void evthr_set_aux(evthr_t * thr, void * aux); 37 | void * evthr_get_aux(evthr_t * thr); 38 | int evthr_start(evthr_t * evthr); 39 | evthr_res evthr_stop(evthr_t * evthr); 40 | evthr_res evthr_defer(evthr_t * evthr, evthr_cb cb, void * arg); 41 | void evthr_free(evthr_t * evthr); 42 | void evthr_inc_backlog(evthr_t * evthr); 43 | void evthr_dec_backlog(evthr_t * evthr); 44 | int evthr_get_backlog(evthr_t * evthr); 45 | 46 | evthr_pool_t * evthr_pool_new(int nthreads, evthr_init_cb init_cb, void * shared); 47 | int evthr_pool_start(evthr_pool_t * pool); 48 | evthr_res evthr_pool_stop(evthr_pool_t * pool); 49 | evthr_res evthr_pool_defer(evthr_pool_t * pool, evthr_cb cb, void * arg); 50 | void evthr_pool_free(evthr_pool_t * pool); 51 | 52 | #endif /* __EVTHR_H__ */ 53 | 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Libevhtp is available for use under the following license, commonly known 2 | as the 3-clause (or "modified") BSD license: 3 | 4 | ============================== 5 | Copyright (c) 2010-2011 Mark Ellzey 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions 9 | are met: 10 | 1. Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 3. The name of the author may not be used to endorse or promote products 16 | derived from this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | ============================== 29 | 30 | Portions of Libevhtp are based on works by others, also made available by them 31 | under the three-clause BSD license above. The functions include: 32 | 33 | evhtp.c: _evhtp_glob_match(): 34 | Copyright (c) 2006-2009, Salvatore Sanfilippo 35 | -------------------------------------------------------------------------------- /oniguruma/st.h: -------------------------------------------------------------------------------- 1 | /* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ 2 | 3 | /* @(#) st.h 5.1 89/12/14 */ 4 | 5 | #ifndef ST_INCLUDED 6 | 7 | #define ST_INCLUDED 8 | 9 | typedef unsigned long st_data_t; 10 | #define ST_DATA_T_DEFINED 11 | 12 | typedef struct st_table st_table; 13 | 14 | struct st_hash_type { 15 | int (*compare)(); 16 | int (*hash)(); 17 | }; 18 | 19 | struct st_table { 20 | struct st_hash_type *type; 21 | int num_bins; 22 | int num_entries; 23 | struct st_table_entry **bins; 24 | }; 25 | 26 | #define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0) 27 | 28 | enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK}; 29 | 30 | #ifndef _ 31 | # define _(args) args 32 | #endif 33 | #ifndef ANYARGS 34 | # ifdef __cplusplus 35 | # define ANYARGS ... 36 | # else 37 | # define ANYARGS 38 | # endif 39 | #endif 40 | 41 | st_table *st_init_table _((struct st_hash_type *)); 42 | st_table *st_init_table_with_size _((struct st_hash_type *, int)); 43 | st_table *st_init_numtable _((void)); 44 | st_table *st_init_numtable_with_size _((int)); 45 | st_table *st_init_strtable _((void)); 46 | st_table *st_init_strtable_with_size _((int)); 47 | int st_delete _((st_table *, st_data_t *, st_data_t *)); 48 | int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t)); 49 | int st_insert _((st_table *, st_data_t, st_data_t)); 50 | int st_lookup _((st_table *, st_data_t, st_data_t *)); 51 | int st_foreach _((st_table *, int (*)(ANYARGS), st_data_t)); 52 | void st_add_direct _((st_table *, st_data_t, st_data_t)); 53 | void st_free_table _((st_table *)); 54 | void st_cleanup_safe _((st_table *, st_data_t)); 55 | st_table *st_copy _((st_table *)); 56 | 57 | #define ST_NUMCMP ((int (*)()) 0) 58 | #define ST_NUMHASH ((int (*)()) -2) 59 | 60 | #define st_numcmp ST_NUMCMP 61 | #define st_numhash ST_NUMHASH 62 | 63 | #endif /* ST_INCLUDED */ 64 | -------------------------------------------------------------------------------- /evthr/README: -------------------------------------------------------------------------------- 1 | Libevthr is an API which manages threads and thread-pools in an event based 2 | manner. This API requires libevent with threading support. 3 | 4 | Libevthr works a bit differently than most thread management systems. Instead of 5 | conditional signalling and some type of pre-thread queue, Libevthr uses a 6 | deferral type mechanism. That is, a thread is always running, abstracted to a 7 | point where you "defer" your function *into* a thread. 8 | 9 | For example you can start up a single thread with a backlog of 10 (a backlog 10 | being the max number of outstanding callbacks to run within the thread), and 11 | execute a function you would like to run inside the thread one or many times. 12 | The act of deferrals is non-blocking. 13 | 14 | Example Code for evthrs: 15 | 16 | evthr_t * thr = evthr_new(10, NULL); 17 | 18 | if (evthr_start(thr) < 0) { 19 | exit(1); 20 | } 21 | 22 | evthr_defer(thr, my_cb_1, NULL); 23 | evthr_defer(thr, my_cb_2, NULL); 24 | evthr_defer(thr, my_cb_3, NULL); 25 | 26 | sleep(n_seconds); 27 | 28 | evthr_stop(thr); 29 | 30 | Libevthr also has the ability to create pools using the same methods that a 31 | single evthr has. For example, if you would like to create 10 threads, each 32 | with a backlog of 5: 33 | 34 | evthr_pool_t * thr_pool = evthr_pool_new(10, 5, NULL); 35 | 36 | if (evthr_pool_start(thr_pool) < 0) { 37 | exit(1); 38 | } 39 | 40 | evthr_pool_defer(thr_pool, my_cb_1, NULL); 41 | evthr_pool_defer(thr_pool, my_cb_2, NULL); 42 | evthr_pool_defer(thr_pool, my_cb_3, NULL); 43 | 44 | Your callback functions which you defer must be of type "evthr_cb", or 45 | "void cb_name(void * arg, void * shared)". In this case, the "arg" variable is 46 | the data you passed as the third argument to either evthr_pool_defer, or 47 | evthr_defer. The "shared" variable is the data that was either the second 48 | variable in evthr_new(), or the third variable in evthr_pool_new(). 49 | 50 | The gist of this is to allow a global dataset, along with deferred specific 51 | data. 52 | 53 | See test.c for a quick example. 54 | -------------------------------------------------------------------------------- /oniguruma/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) 2 | include_directories(${CMAKE_CURRENT_BINARY_DIR}) 3 | 4 | INCLUDE (CheckFunctionExists) 5 | INCLUDE (CheckIncludeFiles) 6 | INCLUDE (CheckTypeSize) 7 | 8 | CHECK_FUNCTION_EXISTS(alloca C_ALLOCA) 9 | CHECK_FUNCTION_EXISTS(memcmp HAVE_MEMCMP) 10 | 11 | CHECK_INCLUDE_FILES(alloca.h HAVE_ALLOCA_H) 12 | CHECK_INCLUDE_FILES(strings.h HAVE_STRINGS_H) 13 | CHECK_INCLUDE_FILES(string.h HAVE_STRING_H) 14 | CHECK_INCLUDE_FILES(stdlib.h HAVE_STDLIB_H) 15 | CHECK_INCLUDE_FILES(sys/time.h HAVE_SYS_TIME_H) 16 | CHECK_INCLUDE_FILES(sys/times.h HAVE_SYS_TIMES_H) 17 | CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) 18 | CHECK_INCLUDE_FILES(memory.h HAVE_MEMORY_H) 19 | CHECK_INCLUDE_FILES(stdarg.h HAVE_STDARG_PROTOTYPES) 20 | 21 | CHECK_TYPE_SIZE("int" SIZEOF_INT) 22 | CHECK_TYPE_SIZE("long" SIZEOF_LONG) 23 | CHECK_TYPE_SIZE("short" SIZEOF_SHORT) 24 | 25 | CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) 26 | 27 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) 28 | 29 | set(SOURCES regint.h regparse.h regenc.h st.h 30 | regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c 31 | regenc.c regsyntax.c regtrav.c regversion.c st.c 32 | regposix.c regposerr.c 33 | enc/unicode.c enc/ascii.c enc/utf8.c 34 | enc/utf16_be.c enc/utf16_le.c 35 | enc/utf32_be.c enc/utf32_le.c 36 | enc/euc_jp.c enc/sjis.c enc/iso8859_1.c 37 | enc/iso8859_2.c enc/iso8859_3.c 38 | enc/iso8859_4.c enc/iso8859_5.c 39 | enc/iso8859_6.c enc/iso8859_7.c 40 | enc/iso8859_8.c enc/iso8859_9.c 41 | enc/iso8859_10.c enc/iso8859_11.c 42 | enc/iso8859_13.c enc/iso8859_14.c 43 | enc/iso8859_15.c enc/iso8859_16.c 44 | enc/euc_tw.c enc/euc_kr.c enc/big5.c 45 | enc/gb18030.c enc/koi8_r.c enc/cp1251.c) 46 | 47 | 48 | add_library(libonig STATIC ${SOURCES}) 49 | set_target_properties(libonig PROPERTIES OUTPUT_NAME "libonig") 50 | -------------------------------------------------------------------------------- /oniguruma/sample/syntax.c: -------------------------------------------------------------------------------- 1 | /* 2 | * syntax.c 3 | */ 4 | #include 5 | #include 6 | #include "oniguruma.h" 7 | 8 | extern int exec(OnigSyntaxType* syntax, 9 | char* apattern, char* astr) 10 | { 11 | int r; 12 | unsigned char *start, *range, *end; 13 | regex_t* reg; 14 | OnigErrorInfo einfo; 15 | OnigRegion *region; 16 | UChar* pattern = (UChar* )apattern; 17 | UChar* str = (UChar* )astr; 18 | 19 | r = onig_new(®, pattern, pattern + strlen((char* )pattern), 20 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); 21 | if (r != ONIG_NORMAL) { 22 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 23 | onig_error_code_to_str(s, r, &einfo); 24 | fprintf(stderr, "ERROR: %s\n", s); 25 | return -1; 26 | } 27 | 28 | region = onig_region_new(); 29 | 30 | end = str + strlen((char* )str); 31 | start = str; 32 | range = end; 33 | r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); 34 | if (r >= 0) { 35 | int i; 36 | 37 | fprintf(stderr, "match at %d\n", r); 38 | for (i = 0; i < region->num_regs; i++) { 39 | fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); 40 | } 41 | } 42 | else if (r == ONIG_MISMATCH) { 43 | fprintf(stderr, "search fail\n"); 44 | } 45 | else { /* error */ 46 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 47 | onig_error_code_to_str(s, r); 48 | fprintf(stderr, "ERROR: %s\n", s); 49 | return -1; 50 | } 51 | 52 | onig_region_free(region, 1 /* 1:free self, 0:free contents only */); 53 | onig_free(reg); 54 | onig_end(); 55 | return 0; 56 | } 57 | 58 | extern int main(int argc, char* argv[]) 59 | { 60 | int r; 61 | 62 | r = exec(ONIG_SYNTAX_PERL, 63 | "\\p{XDigit}\\P{XDigit}\\p{^XDigit}\\P{^XDigit}\\p{XDigit}", 64 | "bgh3a"); 65 | 66 | r = exec(ONIG_SYNTAX_JAVA, 67 | "\\p{XDigit}\\P{XDigit}[a-c&&b-g]", "bgc"); 68 | 69 | r = exec(ONIG_SYNTAX_ASIS, 70 | "abc def* e+ g?ddd[a-rvvv] (vv){3,7}hv\\dvv(?:aczui ss)\\W\\w$", 71 | "abc def* e+ g?ddd[a-rvvv] (vv){3,7}hv\\dvv(?:aczui ss)\\W\\w$"); 72 | onig_end(); 73 | return 0; 74 | } 75 | -------------------------------------------------------------------------------- /contrib/ab_wsesscache.diff: -------------------------------------------------------------------------------- 1 | --- support/ab.c 2010-07-12 05:26:29.000000000 -0400 2 | +++ support/ab.c 2011-06-21 15:29:21.961972191 -0400 3 | @@ -340,6 +340,7 @@ 4 | char *ssl_cipher = NULL; 5 | char *ssl_info = NULL; 6 | BIO *bio_out,*bio_err; 7 | +char * sess_in = NULL; 8 | #endif 9 | 10 | apr_time_t start, lasttime, stoptime; 11 | @@ -1195,6 +1196,28 @@ 12 | ERR_print_errors(bio_err); 13 | exit(1); 14 | } 15 | + 16 | + if (sess_in != NULL) { 17 | + SSL_SESSION * sess; 18 | + BIO * sidfile; 19 | + 20 | + if (!(sidfile = BIO_new_file(sess_in, "r"))) { 21 | + fprintf(stderr, "error opening session file %s\n", sess_in); 22 | + ERR_print_errors_fp(stderr); 23 | + exit(1); 24 | + } 25 | + 26 | + if (!(sess = PEM_read_bio_SSL_SESSION(sidfile, NULL, 0, NULL))) { 27 | + fprintf(stderr, "error reading session file %s\n", sess_in); 28 | + ERR_print_errors_fp(stderr); 29 | + exit(1); 30 | + } 31 | + 32 | + BIO_free(sidfile); 33 | + SSL_set_session(c->ssl, sess); 34 | + SSL_SESSION_free(sess); 35 | + } 36 | + 37 | ssl_rand_seed(); 38 | apr_os_sock_get(&fd, c->aprsock); 39 | bio = BIO_new_socket(fd, BIO_NOCLOSE); 40 | @@ -1875,6 +1898,7 @@ 41 | #ifdef USE_SSL 42 | fprintf(stderr, " -Z ciphersuite Specify SSL/TLS cipher suite (See openssl ciphers)\n"); 43 | fprintf(stderr, " -f protocol Specify SSL/TLS protocol (SSL2, SSL3, TLS1, or ALL)\n"); 44 | + fprintf(stderr, " -I in_file Specify file to read SSL session from\n"); 45 | #endif 46 | exit(EINVAL); 47 | } 48 | @@ -2040,7 +2064,7 @@ 49 | apr_getopt_init(&opt, cntxt, argc, argv); 50 | while ((status = apr_getopt(opt, "n:c:t:b:T:p:u:v:rkVhwix:y:z:C:H:P:A:g:X:de:Sq" 51 | #ifdef USE_SSL 52 | - "Z:f:" 53 | + "Z:f:I:" 54 | #endif 55 | ,&c, &optarg)) == APR_SUCCESS) { 56 | switch (c) { 57 | @@ -2215,6 +2239,9 @@ 58 | meth = TLSv1_client_method(); 59 | } 60 | break; 61 | + case 'I': 62 | + sess_in = strdup(optarg); 63 | + break; 64 | #endif 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /oniguruma/sample/names.c: -------------------------------------------------------------------------------- 1 | /* 2 | * names.c -- example of group name callback. 3 | */ 4 | #include 5 | #include 6 | #include "oniguruma.h" 7 | 8 | static int 9 | name_callback(const UChar* name, const UChar* name_end, 10 | int ngroup_num, int* group_nums, 11 | regex_t* reg, void* arg) 12 | { 13 | int i, gn, ref; 14 | char* s; 15 | OnigRegion *region = (OnigRegion* )arg; 16 | 17 | for (i = 0; i < ngroup_num; i++) { 18 | gn = group_nums[i]; 19 | ref = onig_name_to_backref_number(reg, name, name_end, region); 20 | s = (ref == gn ? "*" : ""); 21 | fprintf(stderr, "%s (%d): ", name, gn); 22 | fprintf(stderr, "(%d-%d) %s\n", region->beg[gn], region->end[gn], s); 23 | } 24 | return 0; /* 0: continue */ 25 | } 26 | 27 | extern int main(int argc, char* argv[]) 28 | { 29 | int r; 30 | unsigned char *start, *range, *end; 31 | regex_t* reg; 32 | OnigErrorInfo einfo; 33 | OnigRegion *region; 34 | 35 | static UChar* pattern = (UChar* )"(?a*)(?b*)(?c*)"; 36 | static UChar* str = (UChar* )"aaabbbbcc"; 37 | 38 | r = onig_new(®, pattern, pattern + strlen((char* )pattern), 39 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); 40 | if (r != ONIG_NORMAL) { 41 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 42 | onig_error_code_to_str(s, r, &einfo); 43 | fprintf(stderr, "ERROR: %s\n", s); 44 | return -1; 45 | } 46 | 47 | fprintf(stderr, "number of names: %d\n", onig_number_of_names(reg)); 48 | 49 | region = onig_region_new(); 50 | 51 | end = str + strlen((char* )str); 52 | start = str; 53 | range = end; 54 | r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); 55 | if (r >= 0) { 56 | fprintf(stderr, "match at %d\n\n", r); 57 | r = onig_foreach_name(reg, name_callback, (void* )region); 58 | } 59 | else if (r == ONIG_MISMATCH) { 60 | fprintf(stderr, "search fail\n"); 61 | } 62 | else { /* error */ 63 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 64 | onig_error_code_to_str(s, r); 65 | return -1; 66 | } 67 | 68 | onig_region_free(region, 1 /* 1:free self, 0:free contents only */); 69 | onig_free(reg); 70 | onig_end(); 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /oniguruma/regversion.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | regversion.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "config.h" 31 | #include "oniguruma.h" 32 | #include 33 | 34 | extern const char* 35 | onig_version(void) 36 | { 37 | static char s[12]; 38 | 39 | sprintf(s, "%d.%d.%d", 40 | ONIGURUMA_VERSION_MAJOR, 41 | ONIGURUMA_VERSION_MINOR, 42 | ONIGURUMA_VERSION_TEENY); 43 | return s; 44 | } 45 | 46 | extern const char* 47 | onig_copyright(void) 48 | { 49 | static char s[58]; 50 | 51 | sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2008 K.Kosako", 52 | ONIGURUMA_VERSION_MAJOR, 53 | ONIGURUMA_VERSION_MINOR, 54 | ONIGURUMA_VERSION_TEENY); 55 | return s; 56 | } 57 | -------------------------------------------------------------------------------- /oniguruma/win32/config.h: -------------------------------------------------------------------------------- 1 | #define STDC_HEADERS 1 2 | #define HAVE_SYS_TYPES_H 1 3 | #define HAVE_SYS_STAT_H 1 4 | #define HAVE_STDLIB_H 1 5 | #define HAVE_STRING_H 1 6 | #define HAVE_MEMORY_H 1 7 | #define HAVE_FLOAT_H 1 8 | #define HAVE_OFF_T 1 9 | #define SIZEOF_INT 4 10 | #define SIZEOF_SHORT 2 11 | #define SIZEOF_LONG 4 12 | #define SIZEOF_LONG_LONG 0 13 | #define SIZEOF___INT64 8 14 | #define SIZEOF_OFF_T 4 15 | #define SIZEOF_VOIDP 4 16 | #define SIZEOF_FLOAT 4 17 | #define SIZEOF_DOUBLE 8 18 | #define HAVE_PROTOTYPES 1 19 | #define TOKEN_PASTE(x,y) x##y 20 | #define HAVE_STDARG_PROTOTYPES 1 21 | #ifndef NORETURN 22 | #if _MSC_VER > 1100 23 | #define NORETURN(x) __declspec(noreturn) x 24 | #else 25 | #define NORETURN(x) x 26 | #endif 27 | #endif 28 | #define HAVE_DECL_SYS_NERR 1 29 | #define STDC_HEADERS 1 30 | #define HAVE_STDLIB_H 1 31 | #define HAVE_STRING_H 1 32 | #define HAVE_LIMITS_H 1 33 | #define HAVE_FCNTL_H 1 34 | #define HAVE_SYS_UTIME_H 1 35 | #define HAVE_MEMORY_H 1 36 | #define uid_t int 37 | #define gid_t int 38 | #define HAVE_STRUCT_STAT_ST_RDEV 1 39 | #define HAVE_ST_RDEV 1 40 | #define GETGROUPS_T int 41 | #define RETSIGTYPE void 42 | #define HAVE_ALLOCA 1 43 | #define HAVE_DUP2 1 44 | #define HAVE_MEMCMP 1 45 | #define HAVE_MEMMOVE 1 46 | #define HAVE_MKDIR 1 47 | #define HAVE_STRCASECMP 1 48 | #define HAVE_STRNCASECMP 1 49 | #define HAVE_STRERROR 1 50 | #define HAVE_STRFTIME 1 51 | #define HAVE_STRCHR 1 52 | #define HAVE_STRSTR 1 53 | #define HAVE_STRTOD 1 54 | #define HAVE_STRTOL 1 55 | #define HAVE_STRTOUL 1 56 | #define HAVE_FLOCK 1 57 | #define HAVE_VSNPRINTF 1 58 | #define HAVE_FINITE 1 59 | #define HAVE_FMOD 1 60 | #define HAVE_FREXP 1 61 | #define HAVE_HYPOT 1 62 | #define HAVE_MODF 1 63 | #define HAVE_WAITPID 1 64 | #define HAVE_CHSIZE 1 65 | #define HAVE_TIMES 1 66 | #define HAVE__SETJMP 1 67 | #define HAVE_TELLDIR 1 68 | #define HAVE_SEEKDIR 1 69 | #define HAVE_MKTIME 1 70 | #define HAVE_COSH 1 71 | #define HAVE_SINH 1 72 | #define HAVE_TANH 1 73 | #define HAVE_EXECVE 1 74 | #define HAVE_TZNAME 1 75 | #define HAVE_DAYLIGHT 1 76 | #define SETPGRP_VOID 1 77 | #define inline __inline 78 | #define NEED_IO_SEEK_BETWEEN_RW 1 79 | #define RSHIFT(x,y) ((x)>>(int)y) 80 | #define FILE_COUNT _cnt 81 | #define FILE_READPTR _ptr 82 | #define DEFAULT_KCODE KCODE_NONE 83 | #define DLEXT ".so" 84 | #define DLEXT2 ".dll" 85 | -------------------------------------------------------------------------------- /oniguruma/sample/sql.c: -------------------------------------------------------------------------------- 1 | /* 2 | * sql.c 3 | */ 4 | #include 5 | #include 6 | #include "oniguruma.h" 7 | 8 | extern int main(int argc, char* argv[]) 9 | { 10 | static OnigSyntaxType SQLSyntax; 11 | 12 | int r; 13 | unsigned char *start, *range, *end; 14 | regex_t* reg; 15 | OnigErrorInfo einfo; 16 | OnigRegion *region; 17 | 18 | static UChar* pattern = (UChar* )"\\_%\\\\__zz"; 19 | static UChar* str = (UChar* )"a_abcabcabc\\ppzz"; 20 | 21 | onig_set_syntax_op (&SQLSyntax, ONIG_SYN_OP_VARIABLE_META_CHARACTERS); 22 | onig_set_syntax_op2 (&SQLSyntax, 0); 23 | onig_set_syntax_behavior(&SQLSyntax, 0); 24 | onig_set_syntax_options (&SQLSyntax, ONIG_OPTION_MULTILINE); 25 | onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ESCAPE, (OnigCodePoint )'\\'); 26 | onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR, (OnigCodePoint )'_'); 27 | onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYTIME, 28 | ONIG_INEFFECTIVE_META_CHAR); 29 | onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ZERO_OR_ONE_TIME, 30 | ONIG_INEFFECTIVE_META_CHAR); 31 | onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ONE_OR_MORE_TIME, 32 | ONIG_INEFFECTIVE_META_CHAR); 33 | onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR_ANYTIME, 34 | (OnigCodePoint )'%'); 35 | 36 | r = onig_new(®, pattern, pattern + strlen((char* )pattern), 37 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, &SQLSyntax, &einfo); 38 | if (r != ONIG_NORMAL) { 39 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 40 | onig_error_code_to_str(s, r, &einfo); 41 | fprintf(stderr, "ERROR: %s\n", s); 42 | return -1; 43 | } 44 | 45 | region = onig_region_new(); 46 | 47 | end = str + strlen((char* )str); 48 | start = str; 49 | range = end; 50 | r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); 51 | if (r >= 0) { 52 | int i; 53 | 54 | fprintf(stderr, "match at %d\n", r); 55 | for (i = 0; i < region->num_regs; i++) { 56 | fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); 57 | } 58 | } 59 | else if (r == ONIG_MISMATCH) { 60 | fprintf(stderr, "search fail\n"); 61 | } 62 | else { /* error */ 63 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 64 | onig_error_code_to_str(s, r); 65 | fprintf(stderr, "ERROR: %s\n", s); 66 | return -1; 67 | } 68 | 69 | onig_region_free(region, 1 /* 1:free self, 0:free contents only */); 70 | onig_free(reg); 71 | onig_end(); 72 | return 0; 73 | } 74 | -------------------------------------------------------------------------------- /oniguruma/enc/ascii.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | ascii.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2006 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static int 33 | ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) 34 | { 35 | if (code < 128) 36 | return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 37 | else 38 | return FALSE; 39 | } 40 | 41 | OnigEncodingType OnigEncodingASCII = { 42 | onigenc_single_byte_mbc_enc_len, 43 | "US-ASCII", /* name */ 44 | 1, /* max byte length */ 45 | 1, /* min byte length */ 46 | onigenc_is_mbc_newline_0x0a, 47 | onigenc_single_byte_mbc_to_code, 48 | onigenc_single_byte_code_to_mbclen, 49 | onigenc_single_byte_code_to_mbc, 50 | onigenc_ascii_mbc_case_fold, 51 | onigenc_ascii_apply_all_case_fold, 52 | onigenc_ascii_get_case_fold_codes_by_str, 53 | onigenc_minimum_property_name_to_ctype, 54 | ascii_is_code_ctype, 55 | onigenc_not_support_get_ctype_code_range, 56 | onigenc_single_byte_left_adjust_char_head, 57 | onigenc_always_true_is_allowed_reverse_match 58 | }; 59 | -------------------------------------------------------------------------------- /oniguruma/sample/posix.c: -------------------------------------------------------------------------------- 1 | /* 2 | * posix.c 3 | */ 4 | #include 5 | #include "onigposix.h" 6 | 7 | typedef unsigned char UChar; 8 | 9 | static int x(regex_t* reg, unsigned char* pattern, unsigned char* str) 10 | { 11 | int r, i; 12 | char buf[200]; 13 | regmatch_t pmatch[20]; 14 | 15 | r = regexec(reg, (char* )str, reg->re_nsub + 1, pmatch, 0); 16 | if (r != 0 && r != REG_NOMATCH) { 17 | regerror(r, reg, buf, sizeof(buf)); 18 | fprintf(stderr, "ERROR: %s\n", buf); 19 | return -1; 20 | } 21 | 22 | if (r == REG_NOMATCH) { 23 | fprintf(stderr, "FAIL: /%s/ '%s'\n", pattern, str); 24 | } 25 | else { 26 | fprintf(stderr, "OK: /%s/ '%s'\n", pattern, str); 27 | for (i = 0; i <= (int )reg->re_nsub; i++) { 28 | fprintf(stderr, "%d: %d-%d\n", i, pmatch[i].rm_so, pmatch[i].rm_eo); 29 | } 30 | } 31 | return 0; 32 | } 33 | 34 | extern int main(int argc, char* argv[]) 35 | { 36 | int r; 37 | char buf[200]; 38 | regex_t reg; 39 | UChar* pattern; 40 | 41 | /* default syntax (ONIG_SYNTAX_RUBY) */ 42 | pattern = (UChar* )"^a+b{2,7}[c-f]?$|uuu"; 43 | r = regcomp(®, (char* )pattern, REG_EXTENDED); 44 | if (r) { 45 | regerror(r, ®, buf, sizeof(buf)); 46 | fprintf(stderr, "ERROR: %s\n", buf); 47 | return -1; 48 | } 49 | x(®, pattern, (UChar* )"aaabbbbd"); 50 | 51 | /* POSIX Basic RE (REG_EXTENDED is not specified.) */ 52 | pattern = (UChar* )"^a+b{2,7}[c-f]?|uuu"; 53 | r = regcomp(®, (char* )pattern, 0); 54 | if (r) { 55 | regerror(r, ®, buf, sizeof(buf)); 56 | fprintf(stderr, "ERROR: %s\n", buf); 57 | return -1; 58 | } 59 | x(®, pattern, (UChar* )"a+b{2,7}d?|uuu"); 60 | 61 | /* POSIX Basic RE (REG_EXTENDED is not specified.) */ 62 | pattern = (UChar* )"^a*b\\{2,7\\}\\([c-f]\\)$"; 63 | r = regcomp(®, (char* )pattern, 0); 64 | if (r) { 65 | regerror(r, ®, buf, sizeof(buf)); 66 | fprintf(stderr, "ERROR: %s\n", buf); 67 | return -1; 68 | } 69 | x(®, pattern, (UChar* )"aaaabbbbbbd"); 70 | 71 | /* POSIX Extended RE */ 72 | onig_set_default_syntax(ONIG_SYNTAX_POSIX_EXTENDED); 73 | pattern = (UChar* )"^a+b{2,7}[c-f]?)$|uuu"; 74 | r = regcomp(®, (char* )pattern, REG_EXTENDED); 75 | if (r) { 76 | regerror(r, ®, buf, sizeof(buf)); 77 | fprintf(stderr, "ERROR: %s\n", buf); 78 | return -1; 79 | } 80 | x(®, pattern, (UChar* )"aaabbbbd)"); 81 | 82 | pattern = (UChar* )"^b."; 83 | r = regcomp(®, (char* )pattern, REG_EXTENDED | REG_NEWLINE); 84 | if (r) { 85 | regerror(r, ®, buf, sizeof(buf)); 86 | fprintf(stderr, "ERROR: %s\n", buf); 87 | return -1; 88 | } 89 | x(®, pattern, (UChar* )"a\nb\n"); 90 | 91 | regfree(®); 92 | return 0; 93 | } 94 | -------------------------------------------------------------------------------- /oniguruma/regtrav.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | regtrav.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2004 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regint.h" 31 | 32 | #ifdef USE_CAPTURE_HISTORY 33 | 34 | static int 35 | capture_tree_traverse(OnigCaptureTreeNode* node, int at, 36 | int(*callback_func)(int,int,int,int,int,void*), 37 | int level, void* arg) 38 | { 39 | int r, i; 40 | 41 | if (node == (OnigCaptureTreeNode* )0) 42 | return 0; 43 | 44 | if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) { 45 | r = (*callback_func)(node->group, node->beg, node->end, 46 | level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg); 47 | if (r != 0) return r; 48 | } 49 | 50 | for (i = 0; i < node->num_childs; i++) { 51 | r = capture_tree_traverse(node->childs[i], at, 52 | callback_func, level + 1, arg); 53 | if (r != 0) return r; 54 | } 55 | 56 | if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) { 57 | r = (*callback_func)(node->group, node->beg, node->end, 58 | level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg); 59 | if (r != 0) return r; 60 | } 61 | 62 | return 0; 63 | } 64 | #endif /* USE_CAPTURE_HISTORY */ 65 | 66 | extern int 67 | onig_capture_tree_traverse(OnigRegion* region, int at, 68 | int(*callback_func)(int,int,int,int,int,void*), void* arg) 69 | { 70 | #ifdef USE_CAPTURE_HISTORY 71 | return capture_tree_traverse(region->history_root, at, 72 | callback_func, 0, arg); 73 | #else 74 | return ONIG_NO_SUPPORT_CONFIG; 75 | #endif 76 | } 77 | -------------------------------------------------------------------------------- /oniguruma/Makefile.am: -------------------------------------------------------------------------------- 1 | ## Makefile.am for Oniguruma 2 | encdir = $(top_srcdir)/enc 3 | sampledir = $(top_srcdir)/sample 4 | libname = libonig.la 5 | 6 | #AM_CFLAGS = -DNOT_RUBY 7 | AM_CFLAGS = 8 | INCLUDES = -I$(top_srcdir) -I$(includedir) 9 | 10 | SUBDIRS = . sample 11 | 12 | include_HEADERS = oniguruma.h oniggnu.h onigposix.h 13 | lib_LTLIBRARIES = $(libname) 14 | 15 | libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \ 16 | regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \ 17 | regenc.c regsyntax.c regtrav.c regversion.c st.c \ 18 | regposix.c regposerr.c \ 19 | $(encdir)/unicode.c $(encdir)/ascii.c $(encdir)/utf8.c \ 20 | $(encdir)/utf16_be.c $(encdir)/utf16_le.c \ 21 | $(encdir)/utf32_be.c $(encdir)/utf32_le.c \ 22 | $(encdir)/euc_jp.c $(encdir)/sjis.c $(encdir)/iso8859_1.c \ 23 | $(encdir)/iso8859_2.c $(encdir)/iso8859_3.c \ 24 | $(encdir)/iso8859_4.c $(encdir)/iso8859_5.c \ 25 | $(encdir)/iso8859_6.c $(encdir)/iso8859_7.c \ 26 | $(encdir)/iso8859_8.c $(encdir)/iso8859_9.c \ 27 | $(encdir)/iso8859_10.c $(encdir)/iso8859_11.c \ 28 | $(encdir)/iso8859_13.c $(encdir)/iso8859_14.c \ 29 | $(encdir)/iso8859_15.c $(encdir)/iso8859_16.c \ 30 | $(encdir)/euc_tw.c $(encdir)/euc_kr.c $(encdir)/big5.c \ 31 | $(encdir)/gb18030.c $(encdir)/koi8_r.c $(encdir)/cp1251.c 32 | 33 | libonig_la_LDFLAGS = -version-info $(LTVERSION) 34 | 35 | EXTRA_DIST = HISTORY README.ja index.html index_ja.html \ 36 | doc/API doc/API.ja doc/RE doc/RE.ja doc/FAQ doc/FAQ.ja \ 37 | win32/Makefile win32/config.h win32/testc.c \ 38 | $(encdir)/koi8.c $(encdir)/mktable.c \ 39 | $(sampledir)/encode.c $(sampledir)/listcap.c $(sampledir)/names.c \ 40 | $(sampledir)/posix.c $(sampledir)/simple.c $(sampledir)/sql.c \ 41 | $(sampledir)/syntax.c 42 | 43 | bin_SCRIPTS = onig-config 44 | 45 | onig-config: onig-config.in 46 | 47 | dll: 48 | $(CXX) -shared -Wl,--output-def,libonig.def -o libonig.dll *.o \ 49 | $(LIBS) 50 | strip libonig.dll 51 | 52 | # Ruby TEST 53 | rtest: 54 | $(RUBYDIR)/ruby -w -Ke $(srcdir)/test.rb 55 | 56 | # character-types-table source generator 57 | mktable: $(encdir)/mktable.c $(srcdir)/regenc.h 58 | $(CC) -I$(top_srcdir) -o mktable $(encdir)/mktable.c 59 | 60 | 61 | # TEST 62 | TESTS = testc testp testcu 63 | 64 | check_PROGRAMS = testc testp testcu 65 | 66 | atest: testc testp testcu 67 | @echo "[Oniguruma API, ASCII/EUC-JP check]" 68 | @$(top_builddir)/testc | grep RESULT 69 | @echo "[POSIX API, ASCII/EUC-JP check]" 70 | @$(top_builddir)/testp | grep RESULT 71 | @echo "[Oniguruma API, UTF-16 check]" 72 | @$(top_builddir)/testcu | grep RESULT 73 | 74 | testc_SOURCES = testc.c 75 | testc_LDADD = libonig.la 76 | 77 | testp_SOURCES = testc.c 78 | testp_LDADD = libonig.la 79 | testp_CFLAGS = -DPOSIX_TEST 80 | 81 | testcu_SOURCES = testu.c 82 | testcu_LDADD = libonig.la 83 | 84 | 85 | #testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb 86 | # ruby -Ke $(srcdir)/testconv.rb < $(srcdir)/test.rb > $@ 87 | 88 | #testu.c: $(srcdir)/test.rb $(srcdir)/testconvu.rb 89 | # ruby -Ke $(srcdir)/testconvu.rb $(srcdir)/test.rb > $@ 90 | 91 | #win32/testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb 92 | # ruby -Ke $(srcdir)/testconv.rb -win < $(srcdir)/test.rb | nkf -cs > $@ 93 | 94 | ## END OF FILE 95 | -------------------------------------------------------------------------------- /oniguruma/sample/listcap.c: -------------------------------------------------------------------------------- 1 | /* 2 | * listcap.c 3 | * 4 | * capture history (?@...) sample. 5 | */ 6 | #include 7 | #include 8 | #include "oniguruma.h" 9 | 10 | static int 11 | node_callback(int group, int beg, int end, int level, int at, void* arg) 12 | { 13 | int i; 14 | 15 | if (at != ONIG_TRAVERSE_CALLBACK_AT_FIRST) 16 | return -1; /* error */ 17 | 18 | /* indent */ 19 | for (i = 0; i < level * 2; i++) 20 | fputc(' ', stderr); 21 | 22 | fprintf(stderr, "%d: (%d-%d)\n", group, beg, end); 23 | return 0; 24 | } 25 | 26 | extern int ex(unsigned char* str, unsigned char* pattern, 27 | OnigSyntaxType* syntax) 28 | { 29 | int r; 30 | unsigned char *start, *range, *end; 31 | regex_t* reg; 32 | OnigErrorInfo einfo; 33 | OnigRegion *region; 34 | 35 | r = onig_new(®, pattern, pattern + strlen((char* )pattern), 36 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); 37 | if (r != ONIG_NORMAL) { 38 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 39 | onig_error_code_to_str(s, r, &einfo); 40 | fprintf(stderr, "ERROR: %s\n", s); 41 | return -1; 42 | } 43 | 44 | fprintf(stderr, "number of captures: %d\n", onig_number_of_captures(reg)); 45 | fprintf(stderr, "number of capture histories: %d\n", 46 | onig_number_of_capture_histories(reg)); 47 | 48 | region = onig_region_new(); 49 | 50 | end = str + strlen((char* )str); 51 | start = str; 52 | range = end; 53 | r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); 54 | if (r >= 0) { 55 | int i; 56 | 57 | fprintf(stderr, "match at %d\n", r); 58 | for (i = 0; i < region->num_regs; i++) { 59 | fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); 60 | } 61 | fprintf(stderr, "\n"); 62 | 63 | r = onig_capture_tree_traverse(region, ONIG_TRAVERSE_CALLBACK_AT_FIRST, 64 | node_callback, (void* )0); 65 | } 66 | else if (r == ONIG_MISMATCH) { 67 | fprintf(stderr, "search fail\n"); 68 | } 69 | else { /* error */ 70 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 71 | onig_error_code_to_str(s, r); 72 | return -1; 73 | } 74 | 75 | onig_region_free(region, 1 /* 1:free self, 0:free contents only */); 76 | onig_free(reg); 77 | return 0; 78 | } 79 | 80 | 81 | extern int main(int argc, char* argv[]) 82 | { 83 | int r; 84 | OnigSyntaxType syn; 85 | 86 | static UChar* str1 = (UChar* )"((())())"; 87 | static UChar* pattern1 88 | = (UChar* )"\\g

(?@

\\(\\g\\)){0}(?@(?:\\g

)*|){0}"; 89 | 90 | static UChar* str2 = (UChar* )"x00x00x00"; 91 | static UChar* pattern2 = (UChar* )"(?@x(?@\\d+))+"; 92 | 93 | static UChar* str3 = (UChar* )"0123"; 94 | static UChar* pattern3 = (UChar* )"(?@.)(?@.)(?@.)(?@.)"; 95 | 96 | /* enable capture hostory */ 97 | onig_copy_syntax(&syn, ONIG_SYNTAX_DEFAULT); 98 | onig_set_syntax_op2(&syn, 99 | onig_get_syntax_op2(&syn) | ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY); 100 | 101 | r = ex(str1, pattern1, &syn); 102 | r = ex(str2, pattern2, &syn); 103 | r = ex(str3, pattern3, &syn); 104 | 105 | onig_end(); 106 | return 0; 107 | } 108 | -------------------------------------------------------------------------------- /oniguruma/oniggnu.h: -------------------------------------------------------------------------------- 1 | #ifndef ONIGGNU_H 2 | #define ONIGGNU_H 3 | /********************************************************************** 4 | oniggnu.h - Oniguruma (regular expression library) 5 | **********************************************************************/ 6 | /*- 7 | * Copyright (c) 2002-2005 K.Kosako 8 | * All rights reserved. 9 | * 10 | * Redistribution and use in source and binary forms, with or without 11 | * modification, are permitted provided that the following conditions 12 | * are met: 13 | * 1. Redistributions of source code must retain the above copyright 14 | * notice, this list of conditions and the following disclaimer. 15 | * 2. Redistributions in binary form must reproduce the above copyright 16 | * notice, this list of conditions and the following disclaimer in the 17 | * documentation and/or other materials provided with the distribution. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 | * SUCH DAMAGE. 30 | */ 31 | 32 | #include "oniguruma.h" 33 | 34 | #ifdef __cplusplus 35 | extern "C" { 36 | #endif 37 | 38 | #define RE_MBCTYPE_ASCII 0 39 | #define RE_MBCTYPE_EUC 1 40 | #define RE_MBCTYPE_SJIS 2 41 | #define RE_MBCTYPE_UTF8 3 42 | 43 | /* GNU regex options */ 44 | #ifndef RE_NREGS 45 | #define RE_NREGS ONIG_NREGION 46 | #endif 47 | 48 | #define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE 49 | #define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND 50 | #define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE 51 | #define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE 52 | #define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST 53 | #define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE) 54 | #define RE_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY 55 | #define RE_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE 56 | #define RE_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP 57 | #define RE_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP 58 | 59 | 60 | ONIG_EXTERN 61 | void re_mbcinit P_((int)); 62 | ONIG_EXTERN 63 | int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); 64 | ONIG_EXTERN 65 | int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); 66 | ONIG_EXTERN 67 | void re_free_pattern P_((struct re_pattern_buffer*)); 68 | ONIG_EXTERN 69 | int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int)); 70 | ONIG_EXTERN 71 | int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*)); 72 | ONIG_EXTERN 73 | int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*)); 74 | ONIG_EXTERN 75 | void re_set_casetable P_((const char*)); 76 | ONIG_EXTERN 77 | void re_free_registers P_((struct re_registers*)); 78 | ONIG_EXTERN 79 | int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */ 80 | 81 | #ifdef __cplusplus 82 | } 83 | #endif 84 | 85 | #endif /* ONIGGNU_H */ 86 | -------------------------------------------------------------------------------- /htparse/htparse.h: -------------------------------------------------------------------------------- 1 | #ifndef __HTPARSE_H__ 2 | #define __HTPARSE_H__ 3 | 4 | struct htparser; 5 | 6 | enum htp_type { 7 | htp_type_request = 0, 8 | htp_type_response 9 | }; 10 | 11 | enum htp_scheme { 12 | htp_scheme_none = 0, 13 | htp_scheme_ftp, 14 | htp_scheme_http, 15 | htp_scheme_https, 16 | htp_scheme_nfs, 17 | htp_scheme_unknown 18 | }; 19 | 20 | enum htp_method { 21 | htp_method_GET = 0, 22 | htp_method_HEAD, 23 | htp_method_POST, 24 | htp_method_PUT, 25 | htp_method_DELETE, 26 | htp_method_MKCOL, 27 | htp_method_COPY, 28 | htp_method_MOVE, 29 | htp_method_OPTIONS, 30 | htp_method_PROPFIND, 31 | htp_method_PROPPATCH, 32 | htp_method_LOCK, 33 | htp_method_UNLOCK, 34 | htp_method_TRACE, 35 | htp_method_UNKNOWN 36 | }; 37 | 38 | enum htpparse_error { 39 | htparse_error_none = 0, 40 | htparse_error_too_big, 41 | htparse_error_inval_method, 42 | htparse_error_inval_reqline, 43 | htparse_error_inval_schema, 44 | htparse_error_inval_proto, 45 | htparse_error_inval_ver, 46 | htparse_error_inval_hdr, 47 | htparse_error_inval_chunk_sz, 48 | htparse_error_inval_chunk, 49 | htparse_error_inval_state, 50 | htparse_error_user, 51 | htparse_error_status, 52 | htparse_error_generic 53 | }; 54 | 55 | typedef struct htparser htparser; 56 | typedef struct htparse_hooks htparse_hooks; 57 | 58 | typedef enum htp_scheme htp_scheme; 59 | typedef enum htp_method htp_method; 60 | typedef enum htp_type htp_type; 61 | typedef enum htpparse_error htpparse_error; 62 | 63 | typedef int (*htparse_hook)(htparser *); 64 | typedef int (*htparse_data_hook)(htparser *, const char *, size_t); 65 | 66 | 67 | struct htparse_hooks { 68 | htparse_hook on_msg_begin; 69 | htparse_data_hook method; 70 | htparse_data_hook scheme; /* called if scheme is found */ 71 | htparse_data_hook host; /* called if a host was in the request scheme */ 72 | htparse_data_hook port; /* called if a port was in the request scheme */ 73 | htparse_data_hook path; /* only the path of the uri */ 74 | htparse_data_hook args; /* only the arguments of the uri */ 75 | htparse_data_hook uri; /* the entire uri including path/args */ 76 | htparse_hook on_hdrs_begin; 77 | htparse_data_hook hdr_key; 78 | htparse_data_hook hdr_val; 79 | htparse_hook on_hdrs_complete; 80 | htparse_hook on_new_chunk; /* called after parsed chunk octet */ 81 | htparse_hook on_chunk_complete; /* called after single parsed chunk */ 82 | htparse_hook on_chunks_complete; /* called after all parsed chunks processed */ 83 | htparse_data_hook body; 84 | htparse_hook on_msg_complete; 85 | }; 86 | 87 | 88 | size_t htparser_run(htparser *, htparse_hooks *, const char *, size_t); 89 | int htparser_should_keep_alive(htparser * p); 90 | htp_scheme htparser_get_scheme(htparser *); 91 | htp_method htparser_get_method(htparser *); 92 | const char * htparser_get_methodstr(htparser *); 93 | void htparser_set_major(htparser *, unsigned char); 94 | void htparser_set_minor(htparser *, unsigned char); 95 | unsigned char htparser_get_major(htparser *); 96 | unsigned char htparser_get_minor(htparser *); 97 | unsigned int htparser_get_status(htparser *); 98 | uint64_t htparser_get_content_length(htparser *); 99 | uint64_t htparser_get_total_bytes_read(htparser *); 100 | htpparse_error htparser_get_error(htparser *); 101 | const char * htparser_get_strerror(htparser *); 102 | void * htparser_get_userdata(htparser *); 103 | void htparser_set_userdata(htparser *, void *); 104 | void htparser_init(htparser *, htp_type); 105 | htparser * htparser_new(void); 106 | 107 | #endif 108 | 109 | -------------------------------------------------------------------------------- /oniguruma/regposerr.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | regposerr.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "config.h" 31 | #include "onigposix.h" 32 | 33 | #ifdef HAVE_STRING_H 34 | # include 35 | #else 36 | # include 37 | #endif 38 | 39 | #if defined(__GNUC__) 40 | # define ARG_UNUSED __attribute__ ((unused)) 41 | #else 42 | # define ARG_UNUSED 43 | #endif 44 | 45 | static char* ESTRING[] = { 46 | NULL, 47 | "failed to match", /* REG_NOMATCH */ 48 | "Invalid regular expression", /* REG_BADPAT */ 49 | "invalid collating element referenced", /* REG_ECOLLATE */ 50 | "invalid character class type referenced", /* REG_ECTYPE */ 51 | "bad backslash-escape sequence", /* REG_EESCAPE */ 52 | "invalid back reference number", /* REG_ESUBREG */ 53 | "imbalanced [ and ]", /* REG_EBRACK */ 54 | "imbalanced ( and )", /* REG_EPAREN */ 55 | "imbalanced { and }", /* REG_EBRACE */ 56 | "invalid repeat range {n,m}", /* REG_BADBR */ 57 | "invalid range", /* REG_ERANGE */ 58 | "Out of memory", /* REG_ESPACE */ 59 | "? * + not preceded by valid regular expression", /* REG_BADRPT */ 60 | 61 | /* Extended errors */ 62 | "internal error", /* REG_EONIG_INTERNAL */ 63 | "invalid wide char value", /* REG_EONIG_BADWC */ 64 | "invalid argument", /* REG_EONIG_BADARG */ 65 | "multi-thread error" /* REG_EONIG_THREAD */ 66 | }; 67 | 68 | #include 69 | 70 | 71 | extern size_t 72 | regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf, 73 | size_t size) 74 | { 75 | char* s; 76 | char tbuf[35]; 77 | size_t len; 78 | 79 | if (posix_ecode > 0 80 | && posix_ecode < (int )(sizeof(ESTRING) / sizeof(ESTRING[0]))) { 81 | s = ESTRING[posix_ecode]; 82 | } 83 | else if (posix_ecode == 0) { 84 | s = ""; 85 | } 86 | else { 87 | sprintf(tbuf, "undefined error code (%d)", posix_ecode); 88 | s = tbuf; 89 | } 90 | 91 | len = strlen(s) + 1; /* use strlen() because s is ascii encoding. */ 92 | 93 | if (buf != NULL && size > 0) { 94 | strncpy(buf, s, size - 1); 95 | buf[size - 1] = '\0'; 96 | } 97 | return len; 98 | } 99 | -------------------------------------------------------------------------------- /oniguruma/sample/crnl.c: -------------------------------------------------------------------------------- 1 | /* 2 | * crnl.c 2007/05/30 K.Kosako 3 | * 4 | * !!! You should enable USE_CRNL_AS_LINE_TERMINATOR. !!! 5 | * 6 | * USE_CRNL_AS_LINE_TERMINATOR config test program. 7 | */ 8 | #include 9 | #include 10 | #include "oniguruma.h" 11 | 12 | static int nfail = 0; 13 | 14 | static void result(int no, int from, int to, 15 | int expected_from, int expected_to) 16 | { 17 | fprintf(stderr, "%3d: ", no); 18 | if (from == expected_from && to == expected_to) { 19 | fprintf(stderr, "Success\n"); 20 | } 21 | else { 22 | fprintf(stderr, "Fail: expected: (%d-%d), result: (%d-%d)\n", 23 | expected_from, expected_to, from, to); 24 | 25 | nfail++; 26 | } 27 | } 28 | 29 | static int 30 | x(int no, char* pattern_arg, char* str_arg, 31 | int expected_from, int expected_to) 32 | { 33 | int r; 34 | unsigned char *start, *range, *end; 35 | regex_t* reg; 36 | OnigErrorInfo einfo; 37 | OnigRegion *region; 38 | UChar *pattern, *str; 39 | 40 | pattern = (UChar* )pattern_arg; 41 | str = (UChar* )str_arg; 42 | 43 | r = onig_new(®, pattern, pattern + strlen((char* )pattern), 44 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); 45 | if (r != ONIG_NORMAL) { 46 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 47 | onig_error_code_to_str(s, r, &einfo); 48 | fprintf(stderr, "ERROR: %s\n", s); 49 | return -1; 50 | } 51 | 52 | region = onig_region_new(); 53 | 54 | end = str + strlen((char* )str); 55 | start = str; 56 | range = end; 57 | r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); 58 | if (r >= 0 || r == ONIG_MISMATCH) { 59 | result(no, region->beg[0], region->end[0], expected_from, expected_to); 60 | } 61 | else if (r == ONIG_MISMATCH) { 62 | result(no, r, -1, expected_from, expected_to); 63 | } 64 | else { /* error */ 65 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 66 | onig_error_code_to_str(s, r); 67 | fprintf(stderr, "ERROR: %s\n", s); 68 | return -1; 69 | } 70 | 71 | onig_region_free(region, 1 /* 1:free self, 0:free contents only */); 72 | onig_free(reg); 73 | return 0; 74 | } 75 | 76 | static int 77 | f(int no, char* pattern_arg, char* str_arg) 78 | { 79 | return x(no, pattern_arg, str_arg, -1, -1); 80 | } 81 | 82 | extern int main(int argc, char* argv[]) 83 | { 84 | x( 1, "", "\r\n", 0, 0); 85 | x( 2, ".", "\r\n", 0, 1); 86 | f( 3, "..", "\r\n"); 87 | x( 4, "^", "\r\n", 0, 0); 88 | x( 5, "\\n^", "\r\nf", 1, 2); 89 | x( 6, "\\n^a", "\r\na", 1, 3); 90 | x( 7, "$", "\r\n", 0, 0); 91 | x( 8, "T$", "T\r\n", 0, 1); 92 | x( 9, "T$", "T\raT\r\n", 3, 4); 93 | x(10, "\\z", "\r\n", 2, 2); 94 | f(11, "a\\z", "a\r\n"); 95 | x(12, "\\Z", "\r\n", 0, 0); 96 | x(13, "\\Z", "\r\na", 3, 3); 97 | x(14, "\\Z", "\r\n\r\n\n", 4, 4); 98 | x(15, "\\Z", "\r\n\r\nX", 5, 5); 99 | x(16, "a\\Z", "a\r\n", 0, 1); 100 | x(17, "aaaaaaaaaaaaaaa\\Z", "aaaaaaaaaaaaaaa\r\n", 0, 15); 101 | x(18, "a|$", "b\r\n", 1, 1); 102 | x(19, "$|b", "\rb", 1, 2); 103 | x(20, "a$|ab$", "\r\nab\r\n", 2, 4); 104 | 105 | x(21, "a|\\Z", "b\r\n", 1, 1); 106 | x(22, "\\Z|b", "\rb", 1, 2); 107 | x(23, "a\\Z|ab\\Z", "\r\nab\r\n", 2, 4); 108 | x(24, "(?=a$).", "a\r\n", 0, 1); 109 | f(25, "(?=a$).", "a\r"); 110 | x(26, "(?!a$)..", "a\r", 0, 2); 111 | x(27, "(?<=a$).\\n", "a\r\n", 1, 3); 112 | f(28, "(? 0) { 120 | fprintf(stderr, "\n"); 121 | fprintf(stderr, "!!! You have to enable USE_CRNL_AS_LINE_TERMINATOR\n"); 122 | fprintf(stderr, "!!! in regenc.h for this test program.\n"); 123 | fprintf(stderr, "\n"); 124 | } 125 | 126 | return 0; 127 | } 128 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | # Libevhtp 2 | ***** 3 | 4 | This document describes details on using the evhtp API. This document is 5 | probably not very awesome, it's best to look at test.c to see advanced usage. 6 | 7 | ## Required Dependencies 8 | * [gcc](http://gcc.gnu.org/) 9 | * [Libevent2](http://libevent.org) 10 | 11 | ## Optional Dependencies 12 | * [OpenSSL](http://openssl.org) 13 | * pthreads 14 | 15 | ## Overview 16 | *** 17 | 18 | Libevhtp was created as a replacement API for Libevent's current HTTP API. The reality of libevent's http interface is that it was created as a JIT server, meaning the developer never thought of it being used for creating a full-fledged HTTP service. Infact I am under the impression that the libevent http API was designed almost as an example of what you can do with libevent. It's not Apache in a box, but more and more developers are attempting to use it as so. 19 | 20 | ### Libevent's HTTP pitfalls 21 | *** 22 | 23 | * It was not designed to be a fully functional HTTP server. 24 | * The code is messy, abstractions are almost non-existent, and feature-creep has made long-term maintainability very hard. 25 | * The parsing code is slow and requires data to be buffered before a full parse can be completed. This results in extranious memory usage and lots of string comparison functions. 26 | * There is no method for a user to access various parts of the request processing cycle. For example if the "Content-Length" header has a value of 50000, your callback is not executed until all 50000 bytes have been read. 27 | * Setting callback URI's do exact matches; meaning if you set a callback for "/foo/", requests for "/foo/bar/" are ignored. 28 | * Creating an HTTPS server is hard, it requires a bunch of work to be done on the underlying bufferevents. 29 | * As far as I know, streaming data back to a client is hard, if not impossible without messing with underlying bufferevents. 30 | * It's confusing to work with, this is probably due to the lack of proper documentation. 31 | 32 | Libevhtp attempts to address these problems along with a wide variety of cool mechanisms allowing a developer to have complete control over your server operations. This is not to say the API cannot be used in a very simplistic manner - a developer can easily create a backwards compatible version of libevent's HTTP server to libevhtp. 33 | 34 | ### A bit about the architecture of libevhtp 35 | *** 36 | 37 | #### Bootstrapping 38 | 39 | 1. Create a parent evhtp_t structure. 40 | 2. Assign callbacks to the parent for specific URIs or posix-regex based URI's 41 | 3. Optionally assign per-connection hooks (see hooks) to the callbacks. 42 | 4. Optionally assign pre-accept and post-accept callbacks for incoming connections. 43 | 5. Optionally enable built-in threadpool for connection handling (lock-free, and non-blocking). 44 | 6. Optionally morph your server to HTTPS. 45 | 7. Start the evhtp listener. 46 | 47 | #### Request handling. 48 | 49 | 1. Optionally deal with pre-accept and post-accept callbacks if they exist, allowing for a connection to be rejected if the function deems it as unacceptable. 50 | 2. Optionally assign per-request hooks (see hooks) for a request (the most optimal place for setting these hooks is on a post-accept callback). 51 | 3. Deal with either per-connection or per-request hook callbacks if they exist. 52 | 4. Once the request has been fully processed, inform evhtp to send a reply. 53 | 54 | ##### A very basic example with no optional conditions. 55 | 56 | #include 57 | #include 58 | 59 | void 60 | testcb(evhtp_request_t * req, void * a) { 61 | evbuffer_add_reference(req->buffer_out, "foobar", 6, NULL, NULL); 62 | evhtp_send_reply(req, EVHTP_RES_OK); 63 | } 64 | 65 | int 66 | main(int argc, char ** argv) { 67 | evbase_t * evbase = event_base_new(); 68 | evhtp_t * htp = evhtp_new(evbase, NULL); 69 | 70 | evhtp_set_cb(htp, "/test", testcb, NULL); 71 | evhtp_bind_socket(htp, "0.0.0.0", 8080, 1024); 72 | event_base_loop(evbase, 0); 73 | return 0; 74 | } 75 | 76 | 77 | ## Is evhtp thread-safe? 78 | 79 | For simple usage with evhtp_use_threads(), yes. But for more extreme cases: 80 | sorta, you are bound to the thread mechanisms of libevent itself. 81 | 82 | But with proper design around libevhtp, thread issues can be out-of-sight, 83 | out-of-mind. 84 | 85 | What do you mean by this "proper design" statement? 86 | 87 | Refer to the code in ./examples/thread_design.c. The comments go into great detail 88 | of the hows and whys for proper design using libevhtp's threading model. 89 | 90 | This example uses redis, mainly because most people who have asked me "is evhtp 91 | thread-safe" were attempting to *other things* before sending a response to a 92 | request. And on more than one occasion, those *other things* were communicating 93 | with redis. 94 | -------------------------------------------------------------------------------- /oniguruma/enc/euc_tw.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | euc_tw.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static const int EncLen_EUCTW[] = { 33 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 34 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 35 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 42 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 44 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 45 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 49 | }; 50 | 51 | static int 52 | euctw_mbc_enc_len(const UChar* p) 53 | { 54 | return EncLen_EUCTW[*p]; 55 | } 56 | 57 | static OnigCodePoint 58 | euctw_mbc_to_code(const UChar* p, const UChar* end) 59 | { 60 | return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end); 61 | } 62 | 63 | static int 64 | euctw_code_to_mbc(OnigCodePoint code, UChar *buf) 65 | { 66 | return onigenc_mb4_code_to_mbc(ONIG_ENCODING_EUC_TW, code, buf); 67 | } 68 | 69 | static int 70 | euctw_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, 71 | UChar* lower) 72 | { 73 | return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_TW, flag, 74 | pp, end, lower); 75 | } 76 | 77 | static int 78 | euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype) 79 | { 80 | return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype); 81 | } 82 | 83 | #define euctw_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) 84 | 85 | static UChar* 86 | euctw_left_adjust_char_head(const UChar* start, const UChar* s) 87 | { 88 | /* Assumed in this encoding, 89 | mb-trail bytes don't mix with single bytes. 90 | */ 91 | const UChar *p; 92 | int len; 93 | 94 | if (s <= start) return (UChar* )s; 95 | p = s; 96 | 97 | while (!euctw_islead(*p) && p > start) p--; 98 | len = enclen(ONIG_ENCODING_EUC_TW, p); 99 | if (p + len > s) return (UChar* )p; 100 | p += len; 101 | return (UChar* )(p + ((s - p) & ~1)); 102 | } 103 | 104 | static int 105 | euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) 106 | { 107 | const UChar c = *s; 108 | if (c <= 0x7e) return TRUE; 109 | else return FALSE; 110 | } 111 | 112 | OnigEncodingType OnigEncodingEUC_TW = { 113 | euctw_mbc_enc_len, 114 | "EUC-TW", /* name */ 115 | 4, /* max enc length */ 116 | 1, /* min enc length */ 117 | onigenc_is_mbc_newline_0x0a, 118 | euctw_mbc_to_code, 119 | onigenc_mb4_code_to_mbclen, 120 | euctw_code_to_mbc, 121 | euctw_mbc_case_fold, 122 | onigenc_ascii_apply_all_case_fold, 123 | onigenc_ascii_get_case_fold_codes_by_str, 124 | onigenc_minimum_property_name_to_ctype, 125 | euctw_is_code_ctype, 126 | onigenc_not_support_get_ctype_code_range, 127 | euctw_left_adjust_char_head, 128 | euctw_is_allowed_reverse_match 129 | }; 130 | -------------------------------------------------------------------------------- /oniguruma/enc/iso8859_6.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | iso8859_6.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | #define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \ 33 | ((EncISO_8859_6_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) 34 | 35 | static const unsigned short EncISO_8859_6_CtypeTable[256] = { 36 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 37 | 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 38 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 39 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 40 | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 41 | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 42 | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 43 | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 44 | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 45 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 46 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 47 | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 48 | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 49 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 50 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 51 | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 52 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 53 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 54 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 55 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 56 | 0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000, 57 | 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000, 58 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 59 | 0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0, 60 | 0x0000, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 61 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 62 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 63 | 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 64 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 65 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 66 | 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 67 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 68 | }; 69 | 70 | static int 71 | is_code_ctype(OnigCodePoint code, unsigned int ctype) 72 | { 73 | if (code < 256) 74 | return ENC_IS_ISO_8859_6_CTYPE(code, ctype); 75 | else 76 | return FALSE; 77 | } 78 | 79 | OnigEncodingType OnigEncodingISO_8859_6 = { 80 | onigenc_single_byte_mbc_enc_len, 81 | "ISO-8859-6", /* name */ 82 | 1, /* max enc length */ 83 | 1, /* min enc length */ 84 | onigenc_is_mbc_newline_0x0a, 85 | onigenc_single_byte_mbc_to_code, 86 | onigenc_single_byte_code_to_mbclen, 87 | onigenc_single_byte_code_to_mbc, 88 | onigenc_ascii_mbc_case_fold, 89 | onigenc_ascii_apply_all_case_fold, 90 | onigenc_ascii_get_case_fold_codes_by_str, 91 | onigenc_minimum_property_name_to_ctype, 92 | is_code_ctype, 93 | onigenc_not_support_get_ctype_code_range, 94 | onigenc_single_byte_left_adjust_char_head, 95 | onigenc_always_true_is_allowed_reverse_match 96 | }; 97 | -------------------------------------------------------------------------------- /oniguruma/enc/iso8859_8.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | iso8859_8.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | #define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \ 33 | ((EncISO_8859_8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) 34 | 35 | static const unsigned short EncISO_8859_8_CtypeTable[256] = { 36 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 37 | 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 38 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 39 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 40 | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 41 | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 42 | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 43 | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 44 | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 45 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 46 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 47 | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 48 | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 49 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 50 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 51 | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 52 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 53 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 54 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 55 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 56 | 0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 57 | 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, 58 | 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, 59 | 0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000, 60 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 61 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 62 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 63 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 64 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 65 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 66 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 67 | 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 68 | }; 69 | 70 | static int 71 | is_code_ctype(OnigCodePoint code, unsigned int ctype) 72 | { 73 | if (code < 256) 74 | return ENC_IS_ISO_8859_8_CTYPE(code, ctype); 75 | else 76 | return FALSE; 77 | } 78 | 79 | OnigEncodingType OnigEncodingISO_8859_8 = { 80 | onigenc_single_byte_mbc_enc_len, 81 | "ISO-8859-8", /* name */ 82 | 1, /* max enc length */ 83 | 1, /* min enc length */ 84 | onigenc_is_mbc_newline_0x0a, 85 | onigenc_single_byte_mbc_to_code, 86 | onigenc_single_byte_code_to_mbclen, 87 | onigenc_single_byte_code_to_mbc, 88 | onigenc_ascii_mbc_case_fold, 89 | onigenc_ascii_apply_all_case_fold, 90 | onigenc_ascii_get_case_fold_codes_by_str, 91 | onigenc_minimum_property_name_to_ctype, 92 | is_code_ctype, 93 | onigenc_not_support_get_ctype_code_range, 94 | onigenc_single_byte_left_adjust_char_head, 95 | onigenc_always_true_is_allowed_reverse_match 96 | }; 97 | -------------------------------------------------------------------------------- /oniguruma/enc/iso8859_11.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | iso8859_11.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | #define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \ 33 | ((EncISO_8859_11_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) 34 | 35 | static const unsigned short EncISO_8859_11_CtypeTable[256] = { 36 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 37 | 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 38 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 39 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 40 | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 41 | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 42 | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 43 | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 44 | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 45 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 46 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 47 | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 48 | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 49 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 50 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 51 | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 52 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 53 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 54 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 55 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 56 | 0x0284, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 57 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 58 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 59 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 60 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 61 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 62 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 63 | 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x30a2, 64 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 65 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 66 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 67 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000 68 | }; 69 | 70 | static int 71 | is_code_ctype(OnigCodePoint code, unsigned int ctype) 72 | { 73 | if (code < 256) 74 | return ENC_IS_ISO_8859_11_CTYPE(code, ctype); 75 | else 76 | return FALSE; 77 | } 78 | 79 | OnigEncodingType OnigEncodingISO_8859_11 = { 80 | onigenc_single_byte_mbc_enc_len, 81 | "ISO-8859-11", /* name */ 82 | 1, /* max enc length */ 83 | 1, /* min enc length */ 84 | onigenc_is_mbc_newline_0x0a, 85 | onigenc_single_byte_mbc_to_code, 86 | onigenc_single_byte_code_to_mbclen, 87 | onigenc_single_byte_code_to_mbc, 88 | onigenc_ascii_mbc_case_fold, 89 | onigenc_ascii_apply_all_case_fold, 90 | onigenc_ascii_get_case_fold_codes_by_str, 91 | onigenc_minimum_property_name_to_ctype, 92 | is_code_ctype, 93 | onigenc_not_support_get_ctype_code_range, 94 | onigenc_single_byte_left_adjust_char_head, 95 | onigenc_always_true_is_allowed_reverse_match 96 | }; 97 | -------------------------------------------------------------------------------- /contrib/git_changelog.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Copyright 2008 Marcus D. Hanwell 3 | # Modifications by Mark Ellzey 4 | # Distributed under the terms of the GNU General Public License v2 or later 5 | 6 | import string, re, os, sys 7 | 8 | # Execute git log with the desired command line options. 9 | old_tag = sys.argv[1] 10 | new_tag = sys.argv[2] 11 | 12 | fin = os.popen('git log --summary --stat --no-merges --date=short %s..%s' % (old_tag, new_tag), 'r') 13 | 14 | os.system("mv ChangeLog bak.ChangeLog") 15 | 16 | # Create a ChangeLog file in the current directory. 17 | fout = open('ChangeLog', 'w') 18 | 19 | # Set up the loop variables in order to locate the blocks we want 20 | authorFound = False 21 | dateFound = False 22 | messageFound = False 23 | filesFound = False 24 | message = "" 25 | messageNL = False 26 | files = "" 27 | prevAuthorLine = "" 28 | 29 | fout.write("v%s\n" % new_tag) 30 | fout.write("=====================================\n") 31 | 32 | # The main part of the loop 33 | for line in fin: 34 | # The commit line marks the start of a new commit object. 35 | if string.find(line, 'commit') >= 0: 36 | # Start all over again... 37 | authorFound = False 38 | dateFound = False 39 | messageFound = False 40 | messageNL = False 41 | message = "" 42 | filesFound = False 43 | files = "" 44 | continue 45 | # Match the author line and extract the part we want 46 | elif re.match('Author:', line) >=0: 47 | authorList = re.split(': ', line, 1) 48 | author = authorList[1] 49 | author = author[0:len(author)-1] 50 | authorFound = True 51 | # Match the date line 52 | elif re.match('Date:', line) >= 0: 53 | dateList = re.split(': ', line, 1) 54 | date = dateList[1] 55 | date = date[0:len(date)-1] 56 | dateFound = True 57 | # The svn-id lines are ignored 58 | elif re.match(' git-svn-id:', line) >= 0: 59 | continue 60 | # The sign off line is ignored too 61 | elif re.search('Signed-off-by', line) >= 0: 62 | continue 63 | # Extract the actual commit message for this commit 64 | elif authorFound & dateFound & messageFound == False: 65 | # Find the commit message if we can 66 | if len(line) == 1: 67 | if messageNL: 68 | messageFound = True 69 | else: 70 | messageNL = True 71 | elif len(line) == 4: 72 | messageFound = True 73 | else: 74 | if len(message) == 0: 75 | message = message + line.strip() 76 | else: 77 | message = message + " " + line.strip() 78 | # If this line is hit all of the files have been stored for this commit 79 | elif re.search('files changed', line) >= 0: 80 | filesFound = True 81 | continue 82 | # Collect the files for this commit. FIXME: Still need to add +/- to files 83 | elif authorFound & dateFound & messageFound: 84 | fileList = re.split(' \| ', line, 2) 85 | if len(fileList) > 1: 86 | if len(files) > 0: 87 | files = files + ", " + fileList[0].strip() 88 | else: 89 | files = fileList[0].strip() 90 | # All of the parts of the commit have been found - write out the entry 91 | if authorFound & dateFound & messageFound & filesFound: 92 | # First the author line, only outputted if it is the first for that 93 | # author on this day 94 | authorLine = date + " " + author 95 | if len(prevAuthorLine) == 0: 96 | fout.write(authorLine + "\n") 97 | elif authorLine == prevAuthorLine: 98 | pass 99 | else: 100 | fout.write("\n" + authorLine + "\n") 101 | 102 | # Assemble the actual commit message line(s) and limit the line length 103 | # to 80 characters. 104 | commitLine = "* " + files + ": " + message 105 | i = 0 106 | commit = "" 107 | while i < len(commitLine): 108 | if len(commitLine) < i + 78: 109 | commit = commit + "\n " + commitLine[i:len(commitLine)] 110 | break 111 | index = commitLine.rfind(' ', i, i+78) 112 | if index > i: 113 | commit = commit + "\n " + commitLine[i:index] 114 | i = index+1 115 | else: 116 | commit = commit + "\n " + commitLine[i:78] 117 | i = i+79 118 | 119 | # Write out the commit line 120 | fout.write(commit + "\n") 121 | 122 | #Now reset all the variables ready for a new commit block. 123 | authorFound = False 124 | dateFound = False 125 | messageFound = False 126 | messageNL = False 127 | message = "" 128 | filesFound = False 129 | files = "" 130 | prevAuthorLine = authorLine 131 | 132 | fout.write("\n\n") 133 | # Close the input and output lines now that we are finished. 134 | fin.close() 135 | fout.close() 136 | 137 | os.system("cat bak.ChangeLog >> ChangeLog") 138 | os.system("rm bak.ChangeLog") 139 | -------------------------------------------------------------------------------- /oniguruma/reggnu.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | reggnu.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regint.h" 31 | 32 | #ifndef ONIGGNU_H 33 | #include "oniggnu.h" 34 | #endif 35 | 36 | extern void 37 | re_free_registers(OnigRegion* r) 38 | { 39 | /* 0: don't free self */ 40 | onig_region_free(r, 0); 41 | } 42 | 43 | extern int 44 | re_adjust_startpos(regex_t* reg, const char* string, int size, 45 | int startpos, int range) 46 | { 47 | if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) { 48 | UChar *p; 49 | UChar *s = (UChar* )string + startpos; 50 | 51 | if (range > 0) { 52 | p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s); 53 | } 54 | else { 55 | p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s); 56 | } 57 | return p - (UChar* )string; 58 | } 59 | 60 | return startpos; 61 | } 62 | 63 | extern int 64 | re_match(regex_t* reg, const char* str, int size, int pos, 65 | struct re_registers* regs) 66 | { 67 | return onig_match(reg, (UChar* )str, (UChar* )(str + size), 68 | (UChar* )(str + pos), regs, ONIG_OPTION_NONE); 69 | } 70 | 71 | extern int 72 | re_search(regex_t* bufp, const char* string, int size, int startpos, int range, 73 | struct re_registers* regs) 74 | { 75 | return onig_search(bufp, (UChar* )string, (UChar* )(string + size), 76 | (UChar* )(string + startpos), 77 | (UChar* )(string + startpos + range), 78 | regs, ONIG_OPTION_NONE); 79 | } 80 | 81 | extern int 82 | re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) 83 | { 84 | int r; 85 | OnigErrorInfo einfo; 86 | 87 | r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo); 88 | if (r != ONIG_NORMAL) { 89 | if (IS_NOT_NULL(ebuf)) 90 | (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); 91 | } 92 | 93 | return r; 94 | } 95 | 96 | #ifdef USE_RECOMPILE_API 97 | extern int 98 | re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) 99 | { 100 | int r; 101 | OnigErrorInfo einfo; 102 | OnigEncoding enc; 103 | 104 | /* I think encoding and options should be arguments of this function. 105 | But this is adapted to present re.c. (2002/11/29) 106 | */ 107 | enc = OnigEncDefaultCharEncoding; 108 | 109 | r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size), 110 | reg->options, enc, OnigDefaultSyntax, &einfo); 111 | if (r != ONIG_NORMAL) { 112 | if (IS_NOT_NULL(ebuf)) 113 | (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); 114 | } 115 | return r; 116 | } 117 | #endif 118 | 119 | extern void 120 | re_free_pattern(regex_t* reg) 121 | { 122 | onig_free(reg); 123 | } 124 | 125 | extern int 126 | re_alloc_pattern(regex_t** reg) 127 | { 128 | *reg = (regex_t* )xmalloc(sizeof(regex_t)); 129 | if (IS_NULL(*reg)) return ONIGERR_MEMORY; 130 | 131 | return onig_reg_init(*reg, ONIG_OPTION_DEFAULT, 132 | ONIGENC_CASE_FOLD_DEFAULT, 133 | OnigEncDefaultCharEncoding, 134 | OnigDefaultSyntax); 135 | } 136 | 137 | extern void 138 | re_set_casetable(const char* table) 139 | { 140 | onigenc_set_default_caseconv_table((UChar* )table); 141 | } 142 | 143 | extern void 144 | re_mbcinit(int mb_code) 145 | { 146 | OnigEncoding enc; 147 | 148 | switch (mb_code) { 149 | case RE_MBCTYPE_ASCII: 150 | enc = ONIG_ENCODING_ASCII; 151 | break; 152 | case RE_MBCTYPE_EUC: 153 | enc = ONIG_ENCODING_EUC_JP; 154 | break; 155 | case RE_MBCTYPE_SJIS: 156 | enc = ONIG_ENCODING_SJIS; 157 | break; 158 | case RE_MBCTYPE_UTF8: 159 | enc = ONIG_ENCODING_UTF8; 160 | break; 161 | default: 162 | return ; 163 | break; 164 | } 165 | 166 | onigenc_set_default_encoding(enc); 167 | } 168 | -------------------------------------------------------------------------------- /oniguruma/enc/euc_kr.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | euc_kr.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static const int EncLen_EUCKR[] = { 33 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 34 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 35 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 44 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 45 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 49 | }; 50 | 51 | static int 52 | euckr_mbc_enc_len(const UChar* p) 53 | { 54 | return EncLen_EUCKR[*p]; 55 | } 56 | 57 | static OnigCodePoint 58 | euckr_mbc_to_code(const UChar* p, const UChar* end) 59 | { 60 | return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end); 61 | } 62 | 63 | static int 64 | euckr_code_to_mbc(OnigCodePoint code, UChar *buf) 65 | { 66 | return onigenc_mb2_code_to_mbc(ONIG_ENCODING_EUC_KR, code, buf); 67 | } 68 | 69 | static int 70 | euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, 71 | UChar* lower) 72 | { 73 | return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_KR, flag, 74 | pp, end, lower); 75 | } 76 | 77 | #if 0 78 | static int 79 | euckr_is_mbc_ambiguous(OnigCaseFoldType flag, 80 | const UChar** pp, const UChar* end) 81 | { 82 | return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end); 83 | } 84 | #endif 85 | 86 | static int 87 | euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype) 88 | { 89 | return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype); 90 | } 91 | 92 | #define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff) 93 | 94 | static UChar* 95 | euckr_left_adjust_char_head(const UChar* start, const UChar* s) 96 | { 97 | /* Assumed in this encoding, 98 | mb-trail bytes don't mix with single bytes. 99 | */ 100 | const UChar *p; 101 | int len; 102 | 103 | if (s <= start) return (UChar* )s; 104 | p = s; 105 | 106 | while (!euckr_islead(*p) && p > start) p--; 107 | len = enclen(ONIG_ENCODING_EUC_KR, p); 108 | if (p + len > s) return (UChar* )p; 109 | p += len; 110 | return (UChar* )(p + ((s - p) & ~1)); 111 | } 112 | 113 | static int 114 | euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) 115 | { 116 | const UChar c = *s; 117 | if (c <= 0x7e) return TRUE; 118 | else return FALSE; 119 | } 120 | 121 | OnigEncodingType OnigEncodingEUC_KR = { 122 | euckr_mbc_enc_len, 123 | "EUC-KR", /* name */ 124 | 2, /* max enc length */ 125 | 1, /* min enc length */ 126 | onigenc_is_mbc_newline_0x0a, 127 | euckr_mbc_to_code, 128 | onigenc_mb2_code_to_mbclen, 129 | euckr_code_to_mbc, 130 | euckr_mbc_case_fold, 131 | onigenc_ascii_apply_all_case_fold, 132 | onigenc_ascii_get_case_fold_codes_by_str, 133 | onigenc_minimum_property_name_to_ctype, 134 | euckr_is_code_ctype, 135 | onigenc_not_support_get_ctype_code_range, 136 | euckr_left_adjust_char_head, 137 | euckr_is_allowed_reverse_match 138 | }; 139 | 140 | /* Same with OnigEncodingEUC_KR except the name */ 141 | OnigEncodingType OnigEncodingEUC_CN = { 142 | euckr_mbc_enc_len, 143 | "EUC-CN", /* name */ 144 | 2, /* max enc length */ 145 | 1, /* min enc length */ 146 | onigenc_is_mbc_newline_0x0a, 147 | euckr_mbc_to_code, 148 | onigenc_mb2_code_to_mbclen, 149 | euckr_code_to_mbc, 150 | euckr_mbc_case_fold, 151 | onigenc_ascii_apply_all_case_fold, 152 | onigenc_ascii_get_case_fold_codes_by_str, 153 | onigenc_minimum_property_name_to_ctype, 154 | euckr_is_code_ctype, 155 | onigenc_not_support_get_ctype_code_range, 156 | euckr_left_adjust_char_head, 157 | euckr_is_allowed_reverse_match 158 | }; 159 | -------------------------------------------------------------------------------- /oniguruma/enc/utf32_be.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | utf32_be.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static int 33 | utf32be_mbc_enc_len(const UChar* p ARG_UNUSED) 34 | { 35 | return 4; 36 | } 37 | 38 | static int 39 | utf32be_is_mbc_newline(const UChar* p, const UChar* end) 40 | { 41 | if (p + 3 < end) { 42 | if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0) 43 | return 1; 44 | #ifdef USE_UNICODE_ALL_LINE_TERMINATORS 45 | if (( 46 | #ifndef USE_CRNL_AS_LINE_TERMINATOR 47 | *(p+3) == 0x0d || 48 | #endif 49 | *(p+3) == 0x85) 50 | && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00) 51 | return 1; 52 | if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28) 53 | && *(p+1) == 0 && *p == 0) 54 | return 1; 55 | #endif 56 | } 57 | return 0; 58 | } 59 | 60 | static OnigCodePoint 61 | utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) 62 | { 63 | return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]); 64 | } 65 | 66 | static int 67 | utf32be_code_to_mbclen(OnigCodePoint code ARG_UNUSED) 68 | { 69 | return 4; 70 | } 71 | 72 | static int 73 | utf32be_code_to_mbc(OnigCodePoint code, UChar *buf) 74 | { 75 | UChar* p = buf; 76 | 77 | *p++ = (UChar )((code & 0xff000000) >>24); 78 | *p++ = (UChar )((code & 0xff0000) >>16); 79 | *p++ = (UChar )((code & 0xff00) >> 8); 80 | *p++ = (UChar ) (code & 0xff); 81 | return 4; 82 | } 83 | 84 | static int 85 | utf32be_mbc_case_fold(OnigCaseFoldType flag, 86 | const UChar** pp, const UChar* end, UChar* fold) 87 | { 88 | const UChar* p = *pp; 89 | 90 | if (ONIGENC_IS_ASCII_CODE(*(p+3)) && *(p+2) == 0 && *(p+1) == 0 && *p == 0) { 91 | *fold++ = 0; 92 | *fold++ = 0; 93 | 94 | #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI 95 | if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { 96 | if (*(p+3) == 0x49) { 97 | *fold++ = 0x01; 98 | *fold = 0x31; 99 | (*pp) += 4; 100 | return 4; 101 | } 102 | } 103 | #endif 104 | 105 | *fold++ = 0; 106 | *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*(p+3)); 107 | *pp += 4; 108 | return 4; 109 | } 110 | else 111 | return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_BE, flag, pp, end, 112 | fold); 113 | } 114 | 115 | #if 0 116 | static int 117 | utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) 118 | { 119 | const UChar* p = *pp; 120 | 121 | (*pp) += 4; 122 | 123 | if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { 124 | int c, v; 125 | 126 | p += 3; 127 | if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { 128 | return TRUE; 129 | } 130 | 131 | c = *p; 132 | v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, 133 | (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); 134 | if ((v | BIT_CTYPE_LOWER) != 0) { 135 | /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ 136 | if (c >= 0xaa && c <= 0xba) 137 | return FALSE; 138 | else 139 | return TRUE; 140 | } 141 | return (v != 0 ? TRUE : FALSE); 142 | } 143 | 144 | return FALSE; 145 | } 146 | #endif 147 | 148 | static UChar* 149 | utf32be_left_adjust_char_head(const UChar* start, const UChar* s) 150 | { 151 | int rem; 152 | 153 | if (s <= start) return (UChar* )s; 154 | 155 | rem = (s - start) % 4; 156 | return (UChar* )(s - rem); 157 | } 158 | 159 | static int 160 | utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag, 161 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 162 | { 163 | return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_BE, 164 | flag, p, end, items); 165 | } 166 | 167 | OnigEncodingType OnigEncodingUTF32_BE = { 168 | utf32be_mbc_enc_len, 169 | "UTF-32BE", /* name */ 170 | 4, /* max byte length */ 171 | 4, /* min byte length */ 172 | utf32be_is_mbc_newline, 173 | utf32be_mbc_to_code, 174 | utf32be_code_to_mbclen, 175 | utf32be_code_to_mbc, 176 | utf32be_mbc_case_fold, 177 | onigenc_unicode_apply_all_case_fold, 178 | utf32be_get_case_fold_codes_by_str, 179 | onigenc_unicode_property_name_to_ctype, 180 | onigenc_unicode_is_code_ctype, 181 | onigenc_utf16_32_get_ctype_code_range, 182 | utf32be_left_adjust_char_head, 183 | onigenc_always_false_is_allowed_reverse_match 184 | }; 185 | -------------------------------------------------------------------------------- /oniguruma/enc/utf32_le.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | utf32_le.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static int 33 | utf32le_mbc_enc_len(const UChar* p ARG_UNUSED) 34 | { 35 | return 4; 36 | } 37 | 38 | static int 39 | utf32le_is_mbc_newline(const UChar* p, const UChar* end) 40 | { 41 | if (p + 3 < end) { 42 | if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) 43 | return 1; 44 | #ifdef USE_UNICODE_ALL_LINE_TERMINATORS 45 | if (( 46 | #ifndef USE_CRNL_AS_LINE_TERMINATOR 47 | *p == 0x0d || 48 | #endif 49 | *p == 0x85) 50 | && *(p+1) == 0x00 && (p+2) == 0x00 && *(p+3) == 0x00) 51 | return 1; 52 | if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28) 53 | && *(p+2) == 0x00 && *(p+3) == 0x00) 54 | return 1; 55 | #endif 56 | } 57 | return 0; 58 | } 59 | 60 | static OnigCodePoint 61 | utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) 62 | { 63 | return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]); 64 | } 65 | 66 | static int 67 | utf32le_code_to_mbclen(OnigCodePoint code ARG_UNUSED) 68 | { 69 | return 4; 70 | } 71 | 72 | static int 73 | utf32le_code_to_mbc(OnigCodePoint code, UChar *buf) 74 | { 75 | UChar* p = buf; 76 | 77 | *p++ = (UChar ) (code & 0xff); 78 | *p++ = (UChar )((code & 0xff00) >> 8); 79 | *p++ = (UChar )((code & 0xff0000) >>16); 80 | *p++ = (UChar )((code & 0xff000000) >>24); 81 | return 4; 82 | } 83 | 84 | static int 85 | utf32le_mbc_case_fold(OnigCaseFoldType flag, 86 | const UChar** pp, const UChar* end, UChar* fold) 87 | { 88 | const UChar* p = *pp; 89 | 90 | if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { 91 | #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI 92 | if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { 93 | if (*p == 0x49) { 94 | *fold++ = 0x31; 95 | *fold++ = 0x01; 96 | } 97 | } 98 | else { 99 | #endif 100 | *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 101 | *fold++ = 0; 102 | #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI 103 | } 104 | #endif 105 | 106 | *fold++ = 0; 107 | *fold = 0; 108 | *pp += 4; 109 | return 4; 110 | } 111 | else 112 | return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_LE, flag, pp, end, 113 | fold); 114 | } 115 | 116 | #if 0 117 | static int 118 | utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) 119 | { 120 | const UChar* p = *pp; 121 | 122 | (*pp) += 4; 123 | 124 | if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { 125 | int c, v; 126 | 127 | if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { 128 | return TRUE; 129 | } 130 | 131 | c = *p; 132 | v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, 133 | (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); 134 | if ((v | BIT_CTYPE_LOWER) != 0) { 135 | /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ 136 | if (c >= 0xaa && c <= 0xba) 137 | return FALSE; 138 | else 139 | return TRUE; 140 | } 141 | return (v != 0 ? TRUE : FALSE); 142 | } 143 | 144 | return FALSE; 145 | } 146 | #endif 147 | 148 | static UChar* 149 | utf32le_left_adjust_char_head(const UChar* start, const UChar* s) 150 | { 151 | int rem; 152 | 153 | if (s <= start) return (UChar* )s; 154 | 155 | rem = (s - start) % 4; 156 | return (UChar* )(s - rem); 157 | } 158 | 159 | static int 160 | utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag, 161 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 162 | { 163 | return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_LE, 164 | flag, p, end, items); 165 | } 166 | 167 | OnigEncodingType OnigEncodingUTF32_LE = { 168 | utf32le_mbc_enc_len, 169 | "UTF-32LE", /* name */ 170 | 4, /* max byte length */ 171 | 4, /* min byte length */ 172 | utf32le_is_mbc_newline, 173 | utf32le_mbc_to_code, 174 | utf32le_code_to_mbclen, 175 | utf32le_code_to_mbc, 176 | utf32le_mbc_case_fold, 177 | onigenc_unicode_apply_all_case_fold, 178 | utf32le_get_case_fold_codes_by_str, 179 | onigenc_unicode_property_name_to_ctype, 180 | onigenc_unicode_is_code_ctype, 181 | onigenc_utf16_32_get_ctype_code_range, 182 | utf32le_left_adjust_char_head, 183 | onigenc_always_false_is_allowed_reverse_match 184 | }; 185 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(reason) 3 | 4 | set(PROJECT_MAJOR_VERSION 0) 5 | set(PROJECT_MINOR_VERSION 4) 6 | set(PROJECT_PATCH_VERSION 14) 7 | 8 | set (PROJECT_VERSION ${PROJECT_MAJOR_VERSION}.${PROJECT_MINOR_VERSION}.${PROJECT_PATCH_VERSION}) 9 | set (CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/CMakeModules) 10 | 11 | INCLUDE (CheckFunctionExists) 12 | INCLUDE (CheckIncludeFiles) 13 | INCLUDE (CheckTypeSize) 14 | 15 | CHECK_FUNCTION_EXISTS(alloca C_ALLOCA) 16 | CHECK_FUNCTION_EXISTS(memcmp HAVE_MEMCMP) 17 | CHECK_FUNCTION_EXISTS(strndup HAVE_STRNDUP) 18 | CHECK_FUNCTION_EXISTS(strnlen HAVE_STRNLEN) 19 | 20 | CHECK_INCLUDE_FILES(alloca.h HAVE_ALLOCA_H) 21 | CHECK_INCLUDE_FILES(strings.h HAVE_STRINGS_H) 22 | CHECK_INCLUDE_FILES(string.h HAVE_STRING_H) 23 | CHECK_INCLUDE_FILES(stdlib.h HAVE_STDLIB_H) 24 | CHECK_INCLUDE_FILES(sys/time.h HAVE_SYS_TIME_H) 25 | CHECK_INCLUDE_FILES(sys/times.h HAVE_SYS_TIMES_H) 26 | CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) 27 | CHECK_INCLUDE_FILES(memory.h HAVE_MEMORY_H) 28 | CHECK_INCLUDE_FILES(stdarg.h HAVE_STDARG_PROTOTYPES) 29 | CHECK_INCLUDE_FILES(sys/tree.h HAVE_SYS_TREE) 30 | CHECK_INCLUDE_FILES(sys/queue.h HAVE_SYS_QUEUE) 31 | CHECK_INCLUDE_FILES(sys/un.h HAVE_SYS_UN) 32 | 33 | CHECK_TYPE_SIZE("int" SIZEOF_INT) 34 | CHECK_TYPE_SIZE("long" SIZEOF_LONG) 35 | CHECK_TYPE_SIZE("short" SIZEOF_SHORT) 36 | 37 | CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/oniguruma/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/oniguruma/config.h) 38 | 39 | set(ONIG_SOURCES 40 | oniguruma/regerror.c 41 | oniguruma/regparse.c 42 | oniguruma/regext.c 43 | oniguruma/regcomp.c 44 | oniguruma/regexec.c 45 | oniguruma/reggnu.c 46 | oniguruma/regenc.c 47 | oniguruma/regsyntax.c 48 | oniguruma/regtrav.c 49 | oniguruma/regversion.c 50 | oniguruma/st.c 51 | oniguruma/regposix.c 52 | oniguruma/regposerr.c 53 | oniguruma/enc/unicode.c 54 | oniguruma/enc/ascii.c 55 | oniguruma/enc/utf8.c 56 | oniguruma/enc/utf16_be.c 57 | oniguruma/enc/utf16_le.c 58 | oniguruma/enc/utf32_be.c 59 | oniguruma/enc/utf32_le.c 60 | oniguruma/enc/euc_jp.c 61 | oniguruma/enc/sjis.c 62 | oniguruma/enc/iso8859_1.c 63 | oniguruma/enc/iso8859_2.c 64 | oniguruma/enc/iso8859_3.c 65 | oniguruma/enc/iso8859_4.c 66 | oniguruma/enc/iso8859_5.c 67 | oniguruma/enc/iso8859_6.c 68 | oniguruma/enc/iso8859_7.c 69 | oniguruma/enc/iso8859_8.c 70 | oniguruma/enc/iso8859_9.c 71 | oniguruma/enc/iso8859_10.c 72 | oniguruma/enc/iso8859_11.c 73 | oniguruma/enc/iso8859_13.c 74 | oniguruma/enc/iso8859_14.c 75 | oniguruma/enc/iso8859_15.c 76 | oniguruma/enc/iso8859_16.c 77 | oniguruma/enc/euc_tw.c 78 | oniguruma/enc/euc_kr.c 79 | oniguruma/enc/big5.c 80 | oniguruma/enc/gb18030.c 81 | oniguruma/enc/koi8_r.c 82 | oniguruma/enc/cp1251.c 83 | ) 84 | 85 | if (NOT HAVE_SYS_TREE) 86 | CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/compat/sys/tree.h.in ${CMAKE_CURRENT_BINARY_DIR}/compat/sys/tree.h) 87 | endif(NOT HAVE_SYS_TREE) 88 | 89 | if (NOT HAVE_SYS_QUEUE) 90 | CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/compat/sys/queue.h.in ${CMAKE_CURRENT_BINARY_DIR}/compat/sys/queue.h) 91 | endif(NOT HAVE_SYS_QUEUE) 92 | 93 | if (NOT HAVE_STRNDUP) 94 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNO_STRNDUP") 95 | endif(NOT HAVE_STRNDUP) 96 | 97 | if (NOT HAVE_STRNLEN) 98 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNO_STRNLEN") 99 | endif(NOT HAVE_STRNLEN) 100 | 101 | if (NOT HAVE_SYS_UN) 102 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNO_SYS_UN") 103 | endif(NOT HAVE_SYS_UN) 104 | 105 | OPTION(EVHTP_DISABLE_SSL "Disable ssl support" OFF) 106 | OPTION(EVHTP_DISABLE_EVTHR "Disable evthread support" OFF) 107 | 108 | SET(CMAKE_INCLUDE_CURRENT_DIR ON) 109 | 110 | include(BaseConfig) 111 | 112 | message("Build Type: ${CMAKE_BUILD_TYPE}") 113 | message("Std CFLAGS: ${CMAKE_C_FLAGS}") 114 | message("Dbg CFLAGS: ${CMAKE_C_FLAGS_DEBUG}") 115 | message("Rel CFLAGS: ${CMAKE_C_FLAGS_RELEASE}") 116 | 117 | find_package(LibEvent REQUIRED) 118 | find_package(OpenSSL) 119 | find_path(LIBEVENT_INCLUDE_DIR event2/event.h REQUIRED) 120 | 121 | include_directories( 122 | ${CMAKE_CURRENT_BINARY_DIR}/compat 123 | ${CMAKE_CURRENT_SOURCE_DIR}/htparse 124 | ${CMAKE_CURRENT_BINARY_DIR}/oniguruma 125 | ${CMAKE_CURRENT_SOURCE_DIR}/oniguruma 126 | ${CMAKE_CURRENT_SOURCE_DIR} 127 | ${CMAKE_CURRENT_SOURCE_DIR}/evthr 128 | ${OPENSSL_INCLUDE_DIR} 129 | ${LIBEVENT_INCLUDE_DIR} 130 | ) 131 | 132 | set(LIBEVHTP_EXTERNAL_LIBS 133 | ${LIBEVENT_LIBRARY} 134 | ${LIBEVENT_PTHREADS_LIBRARY} 135 | ${LIBEVENT_OPENSSL_LIBRARY} 136 | ${OPENSSL_LIBRARIES}) 137 | 138 | if (NOT ${LIBEVENT_PTHREADS_FOUND}) 139 | set(EVHTP_DISABLE_EVTHR 1) 140 | endif(NOT ${LIBEVENT_PTHREADS_FOUND}) 141 | 142 | if (NOT ${LIBEVENT_OPENSSL_FOUND}) 143 | set (EVHTP_DISABLE_SSL 1) 144 | endif(NOT ${LIBEVENT_OPENSSL_FOUND}) 145 | 146 | set(LIBEVHTP_SOURCES evhtp.c htparse/htparse.c) 147 | 148 | if (NOT EVHTP_DISABLE_EVTHR) 149 | set (LIBEVHTP_EXTERNAL_LIBS ${LIBEVHTP_EXTERNAL_LIBS} pthread) 150 | set (LIBEVHTP_SOURCES ${LIBEVHTP_SOURCES} evthr/evthr.c) 151 | endif(NOT EVHTP_DISABLE_EVTHR) 152 | 153 | if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug") 154 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNDEBUG") 155 | endif (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug") 156 | 157 | add_library(libevhtp STATIC ${LIBEVHTP_SOURCES} ${ONIG_SOURCES}) 158 | set_target_properties(libevhtp PROPERTIES OUTPUT_NAME "evhtp") 159 | 160 | install (TARGETS libevhtp DESTINATION lib) 161 | install (FILES evhtp.h DESTINATION include) 162 | install (FILES htparse/htparse.h DESTINATION include) 163 | install (FILES evthr/evthr.h DESTINATION include) 164 | install (FILES oniguruma/onigposix.h DESTINATION include) 165 | 166 | find_library (LIB_DL dl) 167 | set (SYS_LIBS ${LIB_DL}) 168 | 169 | if (NOT APPLE) 170 | find_library (LIB_RT rt) 171 | set (SYS_LIBS ${SYS_LIBS} ${LIB_RT}) 172 | endif() 173 | 174 | add_executable(test test.c) 175 | target_link_libraries(test libevhtp ${LIBEVHTP_EXTERNAL_LIBS} ${SYS_LIBS}) 176 | 177 | add_executable(test_basic test_basic.c) 178 | target_link_libraries(test_basic libevhtp ${LIBEVHTP_EXTERNAL_LIBS} ${SYS_LIBS}) 179 | 180 | -------------------------------------------------------------------------------- /oniguruma/enc/big5.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | big5.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static const int EncLen_BIG5[] = { 33 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 34 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 35 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 44 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 45 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 49 | }; 50 | 51 | static int 52 | big5_mbc_enc_len(const UChar* p) 53 | { 54 | return EncLen_BIG5[*p]; 55 | } 56 | 57 | static OnigCodePoint 58 | big5_mbc_to_code(const UChar* p, const UChar* end) 59 | { 60 | return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end); 61 | } 62 | 63 | static int 64 | big5_code_to_mbc(OnigCodePoint code, UChar *buf) 65 | { 66 | return onigenc_mb2_code_to_mbc(ONIG_ENCODING_BIG5, code, buf); 67 | } 68 | 69 | static int 70 | big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, 71 | UChar* lower) 72 | { 73 | return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_BIG5, flag, 74 | pp, end, lower); 75 | } 76 | 77 | #if 0 78 | static int 79 | big5_is_mbc_ambiguous(OnigCaseFoldType flag, 80 | const UChar** pp, const UChar* end) 81 | { 82 | return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end); 83 | } 84 | #endif 85 | 86 | static int 87 | big5_is_code_ctype(OnigCodePoint code, unsigned int ctype) 88 | { 89 | return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype); 90 | } 91 | 92 | static const char BIG5_CAN_BE_TRAIL_TABLE[256] = { 93 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 98 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 100 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 101 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 102 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 103 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 104 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 105 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 106 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 107 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 108 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 109 | }; 110 | 111 | #define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1) 112 | #define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)] 113 | 114 | static UChar* 115 | big5_left_adjust_char_head(const UChar* start, const UChar* s) 116 | { 117 | const UChar *p; 118 | int len; 119 | 120 | if (s <= start) return (UChar* )s; 121 | p = s; 122 | 123 | if (BIG5_ISMB_TRAIL(*p)) { 124 | while (p > start) { 125 | if (! BIG5_ISMB_FIRST(*--p)) { 126 | p++; 127 | break; 128 | } 129 | } 130 | } 131 | len = enclen(ONIG_ENCODING_BIG5, p); 132 | if (p + len > s) return (UChar* )p; 133 | p += len; 134 | return (UChar* )(p + ((s - p) & ~1)); 135 | } 136 | 137 | static int 138 | big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) 139 | { 140 | const UChar c = *s; 141 | 142 | return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE); 143 | } 144 | 145 | OnigEncodingType OnigEncodingBIG5 = { 146 | big5_mbc_enc_len, 147 | "Big5", /* name */ 148 | 2, /* max enc length */ 149 | 1, /* min enc length */ 150 | onigenc_is_mbc_newline_0x0a, 151 | big5_mbc_to_code, 152 | onigenc_mb2_code_to_mbclen, 153 | big5_code_to_mbc, 154 | big5_mbc_case_fold, 155 | onigenc_ascii_apply_all_case_fold, 156 | onigenc_ascii_get_case_fold_codes_by_str, 157 | onigenc_minimum_property_name_to_ctype, 158 | big5_is_code_ctype, 159 | onigenc_not_support_get_ctype_code_range, 160 | big5_left_adjust_char_head, 161 | big5_is_allowed_reverse_match 162 | }; 163 | -------------------------------------------------------------------------------- /oniguruma/onigposix.h: -------------------------------------------------------------------------------- 1 | #ifndef ONIGPOSIX_H 2 | #define ONIGPOSIX_H 3 | /********************************************************************** 4 | onigposix.h - Oniguruma (regular expression library) 5 | **********************************************************************/ 6 | /*- 7 | * Copyright (c) 2002-2005 K.Kosako 8 | * All rights reserved. 9 | * 10 | * Redistribution and use in source and binary forms, with or without 11 | * modification, are permitted provided that the following conditions 12 | * are met: 13 | * 1. Redistributions of source code must retain the above copyright 14 | * notice, this list of conditions and the following disclaimer. 15 | * 2. Redistributions in binary form must reproduce the above copyright 16 | * notice, this list of conditions and the following disclaimer in the 17 | * documentation and/or other materials provided with the distribution. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 | * SUCH DAMAGE. 30 | */ 31 | #include 32 | 33 | #ifdef __cplusplus 34 | extern "C" { 35 | #endif 36 | 37 | /* options */ 38 | #define REG_ICASE (1<<0) 39 | #define REG_NEWLINE (1<<1) 40 | #define REG_NOTBOL (1<<2) 41 | #define REG_NOTEOL (1<<3) 42 | #define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */ 43 | #define REG_NOSUB (1<<5) 44 | 45 | /* POSIX error codes */ 46 | #define REG_NOMATCH 1 47 | #define REG_BADPAT 2 48 | #define REG_ECOLLATE 3 49 | #define REG_ECTYPE 4 50 | #define REG_EESCAPE 5 51 | #define REG_ESUBREG 6 52 | #define REG_EBRACK 7 53 | #define REG_EPAREN 8 54 | #define REG_EBRACE 9 55 | #define REG_BADBR 10 56 | #define REG_ERANGE 11 57 | #define REG_ESPACE 12 58 | #define REG_BADRPT 13 59 | 60 | /* extended error codes */ 61 | #define REG_EONIG_INTERNAL 14 62 | #define REG_EONIG_BADWC 15 63 | #define REG_EONIG_BADARG 16 64 | #define REG_EONIG_THREAD 17 65 | 66 | /* character encodings (for reg_set_encoding()) */ 67 | #define REG_POSIX_ENCODING_ASCII 0 68 | #define REG_POSIX_ENCODING_EUC_JP 1 69 | #define REG_POSIX_ENCODING_SJIS 2 70 | #define REG_POSIX_ENCODING_UTF8 3 71 | #define REG_POSIX_ENCODING_UTF16_BE 4 72 | #define REG_POSIX_ENCODING_UTF16_LE 5 73 | 74 | 75 | typedef int regoff_t; 76 | 77 | typedef struct { 78 | regoff_t rm_so; 79 | regoff_t rm_eo; 80 | } regmatch_t; 81 | 82 | /* POSIX regex_t */ 83 | typedef struct { 84 | void* onig; /* Oniguruma regex_t* */ 85 | size_t re_nsub; 86 | int comp_options; 87 | } regex_t; 88 | 89 | 90 | #ifndef P_ 91 | #if defined(__STDC__) || defined(_WIN32) 92 | # define P_(args) args 93 | #else 94 | # define P_(args) () 95 | #endif 96 | #endif 97 | 98 | #ifndef ONIG_EXTERN 99 | #if defined(_WIN32) && !defined(__GNUC__) 100 | #if defined(EXPORT) 101 | #define ONIG_EXTERN extern __declspec(dllexport) 102 | #else 103 | #define ONIG_EXTERN extern __declspec(dllimport) 104 | #endif 105 | #endif 106 | #endif 107 | 108 | #ifndef ONIG_EXTERN 109 | #define ONIG_EXTERN extern 110 | #endif 111 | 112 | #ifndef ONIGURUMA_H 113 | typedef unsigned int OnigOptionType; 114 | 115 | /* syntax */ 116 | typedef struct { 117 | unsigned int op; 118 | unsigned int op2; 119 | unsigned int behavior; 120 | OnigOptionType options; /* default option */ 121 | } OnigSyntaxType; 122 | 123 | ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; 124 | ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; 125 | ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; 126 | ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; 127 | ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; 128 | ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; 129 | ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; 130 | ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; 131 | 132 | /* predefined syntaxes (see regsyntax.c) */ 133 | #define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) 134 | #define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) 135 | #define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) 136 | #define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) 137 | #define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) 138 | #define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) 139 | #define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) 140 | #define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) 141 | /* default syntax */ 142 | #define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax 143 | 144 | ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; 145 | 146 | ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax)); 147 | ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); 148 | ONIG_EXTERN const char* onig_version P_((void)); 149 | ONIG_EXTERN const char* onig_copyright P_((void)); 150 | 151 | #endif /* ONIGURUMA_H */ 152 | 153 | 154 | ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options)); 155 | ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options)); 156 | ONIG_EXTERN void regfree P_((regex_t* reg)); 157 | ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size)); 158 | 159 | /* extended API */ 160 | ONIG_EXTERN void reg_set_encoding P_((int enc)); 161 | ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums)); 162 | ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg)); 163 | ONIG_EXTERN int reg_number_of_names P_((regex_t* reg)); 164 | 165 | #ifdef __cplusplus 166 | } 167 | #endif 168 | 169 | #endif /* ONIGPOSIX_H */ 170 | -------------------------------------------------------------------------------- /oniguruma/README: -------------------------------------------------------------------------------- 1 | README 2007/05/31 2 | 3 | Oniguruma ---- (C) K.Kosako 4 | 5 | http://www.geocities.jp/kosako3/oniguruma/ 6 | 7 | Oniguruma is a regular expressions library. 8 | The characteristics of this library is that different character encoding 9 | for every regular expression object can be specified. 10 | 11 | Supported character encodings: 12 | 13 | ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, 14 | EUC-JP, EUC-TW, EUC-KR, EUC-CN, 15 | Shift_JIS, Big5, GB18030, KOI8-R, CP1251, 16 | ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, 17 | ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, 18 | ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 19 | 20 | * GB18030: contributed by KUBO Takehiro 21 | * CP1251: contributed by Byte 22 | ------------------------------------------------------------ 23 | 24 | License 25 | 26 | BSD license. 27 | 28 | 29 | Install 30 | 31 | Case 1: Unix and Cygwin platform 32 | 33 | 1. ./configure 34 | 2. make 35 | 3. make install 36 | 37 | * uninstall 38 | 39 | make uninstall 40 | 41 | * test (ASCII/EUC-JP) 42 | 43 | make atest 44 | 45 | * configuration check 46 | 47 | onig-config --cflags 48 | onig-config --libs 49 | onig-config --prefix 50 | onig-config --exec-prefix 51 | 52 | 53 | 54 | Case 2: Win32 platform (VC++) 55 | 56 | 1. copy win32\Makefile Makefile 57 | 2. copy win32\config.h config.h 58 | 3. nmake 59 | 60 | onig_s.lib: static link library 61 | onig.dll: dynamic link library 62 | 63 | * test (ASCII/Shift_JIS) 64 | 4. copy win32\testc.c testc.c 65 | 5. nmake ctest 66 | 67 | 68 | 69 | Regular Expressions 70 | 71 | See doc/RE (or doc/RE.ja for Japanese). 72 | 73 | 74 | Usage 75 | 76 | Include oniguruma.h in your program. (Oniguruma API) 77 | See doc/API for Oniguruma API. 78 | 79 | If you want to disable UChar type (== unsigned char) definition 80 | in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then 81 | include oniguruma.h. 82 | 83 | If you want to disable regex_t type definition in oniguruma.h, 84 | define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h. 85 | 86 | Example of the compiling/linking command line in Unix or Cygwin, 87 | (prefix == /usr/local case) 88 | 89 | cc sample.c -L/usr/local/lib -lonig 90 | 91 | 92 | If you want to use static link library(onig_s.lib) in Win32, 93 | add option -DONIG_EXTERN=extern to C compiler. 94 | 95 | 96 | 97 | Sample Programs 98 | 99 | sample/simple.c example of the minimum (Oniguruma API) 100 | sample/names.c example of the named group callback. 101 | sample/encode.c example of some encodings. 102 | sample/listcap.c example of the capture history. 103 | sample/posix.c POSIX API sample. 104 | sample/sql.c example of the variable meta characters. 105 | (SQL-like pattern matching) 106 | 107 | Test Programs 108 | sample/syntax.c Perl, Java and ASIS syntax test. 109 | sample/crnl.c --enable-crnl-as-line-terminator test 110 | 111 | 112 | Source Files 113 | 114 | oniguruma.h Oniguruma API header file. (public) 115 | onig-config.in configuration check program template. 116 | 117 | regenc.h character encodings framework header file. 118 | regint.h internal definitions 119 | regparse.h internal definitions for regparse.c and regcomp.c 120 | regcomp.c compiling and optimization functions 121 | regenc.c character encodings framework. 122 | regerror.c error message function 123 | regext.c extended API functions. (deluxe version API) 124 | regexec.c search and match functions 125 | regparse.c parsing functions. 126 | regsyntax.c pattern syntax functions and built-in syntax definitions. 127 | regtrav.c capture history tree data traverse functions. 128 | regversion.c version info function. 129 | st.h hash table functions header file 130 | st.c hash table functions 131 | 132 | oniggnu.h GNU regex API header file. (public) 133 | reggnu.c GNU regex API functions 134 | 135 | onigposix.h POSIX API header file. (public) 136 | regposerr.c POSIX error message function. 137 | regposix.c POSIX API functions. 138 | 139 | enc/mktable.c character type table generator. 140 | enc/ascii.c ASCII encoding. 141 | enc/euc_jp.c EUC-JP encoding. 142 | enc/euc_tw.c EUC-TW encoding. 143 | enc/euc_kr.c EUC-KR, EUC-CN encoding. 144 | enc/sjis.c Shift_JIS encoding. 145 | enc/big5.c Big5 encoding. 146 | enc/gb18030.c GB18030 encoding. 147 | enc/koi8.c KOI8 encoding. 148 | enc/koi8_r.c KOI8-R encoding. 149 | enc/cp1251.c CP1251 encoding. 150 | enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1) 151 | enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2) 152 | enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3) 153 | enc/iso8859_4.c ISO-8859-4 encoding. (Latin-4) 154 | enc/iso8859_5.c ISO-8859-5 encoding. (Cyrillic) 155 | enc/iso8859_6.c ISO-8859-6 encoding. (Arabic) 156 | enc/iso8859_7.c ISO-8859-7 encoding. (Greek) 157 | enc/iso8859_8.c ISO-8859-8 encoding. (Hebrew) 158 | enc/iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish) 159 | enc/iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic) 160 | enc/iso8859_11.c ISO-8859-11 encoding. (Thai) 161 | enc/iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim) 162 | enc/iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic) 163 | enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro) 164 | enc/iso8859_16.c ISO-8859-16 encoding. 165 | (Latin-10 or South-Eastern European with Euro) 166 | enc/utf8.c UTF-8 encoding. 167 | enc/utf16_be.c UTF-16BE encoding. 168 | enc/utf16_le.c UTF-16LE encoding. 169 | enc/utf32_be.c UTF-32BE encoding. 170 | enc/utf32_le.c UTF-32LE encoding. 171 | enc/unicode.c Unicode information data. 172 | 173 | win32/Makefile Makefile for Win32 (VC++) 174 | win32/config.h config.h for Win32 175 | 176 | 177 | 178 | ToDo 179 | 180 | ? case fold flag: Katakana <-> Hiragana. 181 | ? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z) 182 | ?? \X (== \PM\pM*) 183 | ?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS. 184 | ?? transmission stopper. (return ONIG_STOP from match_at()) 185 | 186 | and I'm thankful to Akinori MUSHA. 187 | 188 | 189 | Mail Address: K.Kosako 190 | -------------------------------------------------------------------------------- /oniguruma/regext.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | regext.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regint.h" 31 | 32 | static void 33 | conv_ext0be32(const UChar* s, const UChar* end, UChar* conv) 34 | { 35 | while (s < end) { 36 | *conv++ = '\0'; 37 | *conv++ = '\0'; 38 | *conv++ = '\0'; 39 | *conv++ = *s++; 40 | } 41 | } 42 | 43 | static void 44 | conv_ext0le32(const UChar* s, const UChar* end, UChar* conv) 45 | { 46 | while (s < end) { 47 | *conv++ = *s++; 48 | *conv++ = '\0'; 49 | *conv++ = '\0'; 50 | *conv++ = '\0'; 51 | } 52 | } 53 | 54 | static void 55 | conv_ext0be(const UChar* s, const UChar* end, UChar* conv) 56 | { 57 | while (s < end) { 58 | *conv++ = '\0'; 59 | *conv++ = *s++; 60 | } 61 | } 62 | 63 | static void 64 | conv_ext0le(const UChar* s, const UChar* end, UChar* conv) 65 | { 66 | while (s < end) { 67 | *conv++ = *s++; 68 | *conv++ = '\0'; 69 | } 70 | } 71 | 72 | static void 73 | conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv) 74 | { 75 | while (s < end) { 76 | *conv++ = s[3]; 77 | *conv++ = s[2]; 78 | *conv++ = s[1]; 79 | *conv++ = s[0]; 80 | s += 4; 81 | } 82 | } 83 | 84 | static void 85 | conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv) 86 | { 87 | while (s < end) { 88 | *conv++ = s[1]; 89 | *conv++ = s[0]; 90 | s += 2; 91 | } 92 | } 93 | 94 | static int 95 | conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end, 96 | UChar** conv, UChar** conv_end) 97 | { 98 | int len = end - s; 99 | 100 | if (to == ONIG_ENCODING_UTF16_BE) { 101 | if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { 102 | *conv = (UChar* )xmalloc(len * 2); 103 | CHECK_NULL_RETURN_MEMERR(*conv); 104 | *conv_end = *conv + (len * 2); 105 | conv_ext0be(s, end, *conv); 106 | return 0; 107 | } 108 | else if (from == ONIG_ENCODING_UTF16_LE) { 109 | swap16: 110 | *conv = (UChar* )xmalloc(len); 111 | CHECK_NULL_RETURN_MEMERR(*conv); 112 | *conv_end = *conv + len; 113 | conv_swap2bytes(s, end, *conv); 114 | return 0; 115 | } 116 | } 117 | else if (to == ONIG_ENCODING_UTF16_LE) { 118 | if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { 119 | *conv = (UChar* )xmalloc(len * 2); 120 | CHECK_NULL_RETURN_MEMERR(*conv); 121 | *conv_end = *conv + (len * 2); 122 | conv_ext0le(s, end, *conv); 123 | return 0; 124 | } 125 | else if (from == ONIG_ENCODING_UTF16_BE) { 126 | goto swap16; 127 | } 128 | } 129 | if (to == ONIG_ENCODING_UTF32_BE) { 130 | if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { 131 | *conv = (UChar* )xmalloc(len * 4); 132 | CHECK_NULL_RETURN_MEMERR(*conv); 133 | *conv_end = *conv + (len * 4); 134 | conv_ext0be32(s, end, *conv); 135 | return 0; 136 | } 137 | else if (from == ONIG_ENCODING_UTF32_LE) { 138 | swap32: 139 | *conv = (UChar* )xmalloc(len); 140 | CHECK_NULL_RETURN_MEMERR(*conv); 141 | *conv_end = *conv + len; 142 | conv_swap4bytes(s, end, *conv); 143 | return 0; 144 | } 145 | } 146 | else if (to == ONIG_ENCODING_UTF32_LE) { 147 | if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { 148 | *conv = (UChar* )xmalloc(len * 4); 149 | CHECK_NULL_RETURN_MEMERR(*conv); 150 | *conv_end = *conv + (len * 4); 151 | conv_ext0le32(s, end, *conv); 152 | return 0; 153 | } 154 | else if (from == ONIG_ENCODING_UTF32_BE) { 155 | goto swap32; 156 | } 157 | } 158 | 159 | return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION; 160 | } 161 | 162 | extern int 163 | onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, 164 | OnigCompileInfo* ci, OnigErrorInfo* einfo) 165 | { 166 | int r; 167 | UChar *cpat, *cpat_end; 168 | 169 | if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; 170 | 171 | if (ci->pattern_enc != ci->target_enc) { 172 | r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end, 173 | &cpat, &cpat_end); 174 | if (r) return r; 175 | } 176 | else { 177 | cpat = (UChar* )pattern; 178 | cpat_end = (UChar* )pattern_end; 179 | } 180 | 181 | *reg = (regex_t* )xmalloc(sizeof(regex_t)); 182 | if (IS_NULL(*reg)) { 183 | r = ONIGERR_MEMORY; 184 | goto err2; 185 | } 186 | 187 | r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc, 188 | ci->syntax); 189 | if (r) goto err; 190 | 191 | r = onig_compile(*reg, cpat, cpat_end, einfo); 192 | if (r) { 193 | err: 194 | onig_free(*reg); 195 | *reg = NULL; 196 | } 197 | 198 | err2: 199 | if (cpat != pattern) xfree(cpat); 200 | 201 | return r; 202 | } 203 | 204 | #ifdef USE_RECOMPILE_API 205 | extern int 206 | onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end, 207 | OnigCompileInfo* ci, OnigErrorInfo* einfo) 208 | { 209 | int r; 210 | regex_t *new_reg; 211 | 212 | r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo); 213 | if (r) return r; 214 | if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { 215 | onig_transfer(reg, new_reg); 216 | } 217 | else { 218 | onig_chain_link_add(reg, new_reg); 219 | } 220 | return 0; 221 | } 222 | #endif 223 | -------------------------------------------------------------------------------- /oniguruma/enc/utf16_be.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | utf16_be.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static const int EncLen_UTF16[] = { 33 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 34 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 35 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 36 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 37 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 38 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 39 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 40 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 41 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 42 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 43 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 44 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 45 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46 | 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 49 | }; 50 | 51 | static int 52 | utf16be_mbc_enc_len(const UChar* p) 53 | { 54 | return EncLen_UTF16[*p]; 55 | } 56 | 57 | static int 58 | utf16be_is_mbc_newline(const UChar* p, const UChar* end) 59 | { 60 | if (p + 1 < end) { 61 | if (*(p+1) == 0x0a && *p == 0x00) 62 | return 1; 63 | #ifdef USE_UNICODE_ALL_LINE_TERMINATORS 64 | if (( 65 | #ifndef USE_CRNL_AS_LINE_TERMINATOR 66 | *(p+1) == 0x0d || 67 | #endif 68 | *(p+1) == 0x85) && *p == 0x00) 69 | return 1; 70 | if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28)) 71 | return 1; 72 | #endif 73 | } 74 | return 0; 75 | } 76 | 77 | static OnigCodePoint 78 | utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) 79 | { 80 | OnigCodePoint code; 81 | 82 | if (UTF16_IS_SURROGATE_FIRST(*p)) { 83 | code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16) 84 | + ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8) 85 | + p[3]; 86 | } 87 | else { 88 | code = p[0] * 256 + p[1]; 89 | } 90 | return code; 91 | } 92 | 93 | static int 94 | utf16be_code_to_mbclen(OnigCodePoint code) 95 | { 96 | return (code > 0xffff ? 4 : 2); 97 | } 98 | 99 | static int 100 | utf16be_code_to_mbc(OnigCodePoint code, UChar *buf) 101 | { 102 | UChar* p = buf; 103 | 104 | if (code > 0xffff) { 105 | unsigned int plane, high; 106 | 107 | plane = (code >> 16) - 1; 108 | *p++ = (plane >> 2) + 0xd8; 109 | high = (code & 0xff00) >> 8; 110 | *p++ = ((plane & 0x03) << 6) + (high >> 2); 111 | *p++ = (high & 0x03) + 0xdc; 112 | *p = (UChar )(code & 0xff); 113 | return 4; 114 | } 115 | else { 116 | *p++ = (UChar )((code & 0xff00) >> 8); 117 | *p++ = (UChar )(code & 0xff); 118 | return 2; 119 | } 120 | } 121 | 122 | static int 123 | utf16be_mbc_case_fold(OnigCaseFoldType flag, 124 | const UChar** pp, const UChar* end, UChar* fold) 125 | { 126 | const UChar* p = *pp; 127 | 128 | if (ONIGENC_IS_ASCII_CODE(*(p+1)) && *p == 0) { 129 | p++; 130 | #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI 131 | if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { 132 | if (*p == 0x49) { 133 | *fold++ = 0x01; 134 | *fold = 0x31; 135 | (*pp) += 2; 136 | return 2; 137 | } 138 | } 139 | #endif 140 | 141 | *fold++ = 0; 142 | *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 143 | *pp += 2; 144 | return 2; 145 | } 146 | else 147 | return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_BE, flag, 148 | pp, end, fold); 149 | } 150 | 151 | #if 0 152 | static int 153 | utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) 154 | { 155 | const UChar* p = *pp; 156 | 157 | (*pp) += EncLen_UTF16[*p]; 158 | 159 | if (*p == 0) { 160 | int c, v; 161 | 162 | p++; 163 | if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { 164 | return TRUE; 165 | } 166 | 167 | c = *p; 168 | v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, 169 | (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); 170 | 171 | if ((v | BIT_CTYPE_LOWER) != 0) { 172 | /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ 173 | if (c >= 0xaa && c <= 0xba) 174 | return FALSE; 175 | else 176 | return TRUE; 177 | } 178 | return (v != 0 ? TRUE : FALSE); 179 | } 180 | 181 | return FALSE; 182 | } 183 | #endif 184 | 185 | static UChar* 186 | utf16be_left_adjust_char_head(const UChar* start, const UChar* s) 187 | { 188 | if (s <= start) return (UChar* )s; 189 | 190 | if ((s - start) % 2 == 1) { 191 | s--; 192 | } 193 | 194 | if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1) 195 | s -= 2; 196 | 197 | return (UChar* )s; 198 | } 199 | 200 | static int 201 | utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag, 202 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 203 | { 204 | return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_BE, 205 | flag, p, end, items); 206 | } 207 | 208 | OnigEncodingType OnigEncodingUTF16_BE = { 209 | utf16be_mbc_enc_len, 210 | "UTF-16BE", /* name */ 211 | 4, /* max byte length */ 212 | 2, /* min byte length */ 213 | utf16be_is_mbc_newline, 214 | utf16be_mbc_to_code, 215 | utf16be_code_to_mbclen, 216 | utf16be_code_to_mbc, 217 | utf16be_mbc_case_fold, 218 | onigenc_unicode_apply_all_case_fold, 219 | utf16be_get_case_fold_codes_by_str, 220 | onigenc_unicode_property_name_to_ctype, 221 | onigenc_unicode_is_code_ctype, 222 | onigenc_utf16_32_get_ctype_code_range, 223 | utf16be_left_adjust_char_head, 224 | onigenc_always_false_is_allowed_reverse_match 225 | }; 226 | -------------------------------------------------------------------------------- /oniguruma/enc/utf16_le.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | utf16_le.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static const int EncLen_UTF16[] = { 33 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 34 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 35 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 36 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 37 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 38 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 39 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 40 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 41 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 42 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 43 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 44 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 45 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46 | 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 49 | }; 50 | 51 | static int 52 | utf16le_code_to_mbclen(OnigCodePoint code) 53 | { 54 | return (code > 0xffff ? 4 : 2); 55 | } 56 | 57 | static int 58 | utf16le_mbc_enc_len(const UChar* p) 59 | { 60 | return EncLen_UTF16[*(p+1)]; 61 | } 62 | 63 | static int 64 | utf16le_is_mbc_newline(const UChar* p, const UChar* end) 65 | { 66 | if (p + 1 < end) { 67 | if (*p == 0x0a && *(p+1) == 0x00) 68 | return 1; 69 | #ifdef USE_UNICODE_ALL_LINE_TERMINATORS 70 | if (( 71 | #ifndef USE_CRNL_AS_LINE_TERMINATOR 72 | *p == 0x0d || 73 | #endif 74 | *p == 0x85) && *(p+1) == 0x00) 75 | return 1; 76 | if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)) 77 | return 1; 78 | #endif 79 | } 80 | return 0; 81 | } 82 | 83 | static OnigCodePoint 84 | utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) 85 | { 86 | OnigCodePoint code; 87 | UChar c0 = *p; 88 | UChar c1 = *(p+1); 89 | 90 | if (UTF16_IS_SURROGATE_FIRST(c1)) { 91 | code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16) 92 | + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8) 93 | + p[2]; 94 | } 95 | else { 96 | code = c1 * 256 + p[0]; 97 | } 98 | return code; 99 | } 100 | 101 | static int 102 | utf16le_code_to_mbc(OnigCodePoint code, UChar *buf) 103 | { 104 | UChar* p = buf; 105 | 106 | if (code > 0xffff) { 107 | unsigned int plane, high; 108 | 109 | plane = (code >> 16) - 1; 110 | high = (code & 0xff00) >> 8; 111 | 112 | *p++ = ((plane & 0x03) << 6) + (high >> 2); 113 | *p++ = (plane >> 2) + 0xd8; 114 | *p++ = (UChar )(code & 0xff); 115 | *p = (high & 0x03) + 0xdc; 116 | return 4; 117 | } 118 | else { 119 | *p++ = (UChar )(code & 0xff); 120 | *p++ = (UChar )((code & 0xff00) >> 8); 121 | return 2; 122 | } 123 | } 124 | 125 | static int 126 | utf16le_mbc_case_fold(OnigCaseFoldType flag, 127 | const UChar** pp, const UChar* end, UChar* fold) 128 | { 129 | const UChar* p = *pp; 130 | 131 | if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) { 132 | #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI 133 | if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { 134 | if (*p == 0x49) { 135 | *fold++ = 0x31; 136 | *fold = 0x01; 137 | (*pp) += 2; 138 | return 2; 139 | } 140 | } 141 | #endif 142 | 143 | *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 144 | *fold = 0; 145 | *pp += 2; 146 | return 2; 147 | } 148 | else 149 | return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end, 150 | fold); 151 | } 152 | 153 | #if 0 154 | static int 155 | utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, 156 | const UChar* end) 157 | { 158 | const UChar* p = *pp; 159 | 160 | (*pp) += EncLen_UTF16[*(p+1)]; 161 | 162 | if (*(p+1) == 0) { 163 | int c, v; 164 | 165 | if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { 166 | return TRUE; 167 | } 168 | 169 | c = *p; 170 | v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, 171 | (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); 172 | if ((v | BIT_CTYPE_LOWER) != 0) { 173 | /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ 174 | if (c >= 0xaa && c <= 0xba) 175 | return FALSE; 176 | else 177 | return TRUE; 178 | } 179 | return (v != 0 ? TRUE : FALSE); 180 | } 181 | 182 | return FALSE; 183 | } 184 | #endif 185 | 186 | static UChar* 187 | utf16le_left_adjust_char_head(const UChar* start, const UChar* s) 188 | { 189 | if (s <= start) return (UChar* )s; 190 | 191 | if ((s - start) % 2 == 1) { 192 | s--; 193 | } 194 | 195 | if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1) 196 | s -= 2; 197 | 198 | return (UChar* )s; 199 | } 200 | 201 | static int 202 | utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag, 203 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 204 | { 205 | return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE, 206 | flag, p, end, items); 207 | } 208 | 209 | OnigEncodingType OnigEncodingUTF16_LE = { 210 | utf16le_mbc_enc_len, 211 | "UTF-16LE", /* name */ 212 | 4, /* max byte length */ 213 | 2, /* min byte length */ 214 | utf16le_is_mbc_newline, 215 | utf16le_mbc_to_code, 216 | utf16le_code_to_mbclen, 217 | utf16le_code_to_mbc, 218 | utf16le_mbc_case_fold, 219 | onigenc_unicode_apply_all_case_fold, 220 | utf16le_get_case_fold_codes_by_str, 221 | onigenc_unicode_property_name_to_ctype, 222 | onigenc_unicode_is_code_ctype, 223 | onigenc_utf16_32_get_ctype_code_range, 224 | utf16le_left_adjust_char_head, 225 | onigenc_always_false_is_allowed_reverse_match 226 | }; 227 | -------------------------------------------------------------------------------- /htparse/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "htparse.h" 8 | 9 | static int 10 | _on_msg_start(htparser * p) { 11 | printf("START {\n"); 12 | return 0; 13 | } 14 | 15 | static int 16 | _on_msg_end(htparser * p) { 17 | printf("}\n"); 18 | return 0; 19 | } 20 | 21 | static int 22 | _path(htparser * p, const char * data, size_t len) { 23 | printf("\tpath = '%.*s'\n", (int)len, data); 24 | return 0; 25 | } 26 | 27 | static int 28 | _method(htparser * p, const char * data, size_t len) { 29 | printf("\tmethod = '%.*s'\n", (int)len, data); 30 | return 0; 31 | } 32 | 33 | static int 34 | _uri(htparser * p, const char * data, size_t len) { 35 | printf("\turi = '%.*s'\n", (int)len, data); 36 | return 0; 37 | } 38 | 39 | static int 40 | _args(htparser * p, const char * data, size_t len) { 41 | printf("\targs = '%.*s'\n", (int)len, data); 42 | return 0; 43 | } 44 | 45 | static int 46 | _hdrs_end(htparser * p) { 47 | printf("\t}\n"); 48 | return 0; 49 | } 50 | 51 | static int 52 | _hdrs_start(htparser * p) { 53 | printf("\thdrs {\n"); 54 | return 0; 55 | } 56 | 57 | static int 58 | _hdr_key(htparser * p, const char * data, size_t len) { 59 | printf("\t\thdr_key = '%.*s'\n", (int)len, data); 60 | return 0; 61 | } 62 | 63 | static int 64 | _hdr_val(htparser * p, const char * data, size_t len) { 65 | printf("\t\thdr_val = '%.*s'\n", (int)len, data); 66 | return 0; 67 | } 68 | 69 | static int 70 | _read_body(htparser * p, const char * data, size_t len) { 71 | printf("\t'%.*s'\n", (int)len, data); 72 | return 0; 73 | } 74 | 75 | static int 76 | _on_new_chunk(htparser * p) { 77 | printf("\t--chunk payload (%zu)--\n", htparser_get_content_length(p)); 78 | /* printf("..chunk..\n"); */ 79 | return 0; 80 | } 81 | 82 | static void 83 | _test(htparser * p, htparse_hooks * hooks, const char * l, htp_type type) { 84 | printf("---- test ----\n"); 85 | printf("%zu, %s\n", strlen(l), l); 86 | 87 | htparser_init(p, type); 88 | printf("%zu == %zu\n", htparser_run(p, hooks, l, strlen(l)), strlen(l)); 89 | 90 | if (htparser_get_error(p)) { 91 | printf("ERROR: %s\n", htparser_get_strerror(p)); 92 | } 93 | 94 | printf("\n"); 95 | } 96 | 97 | static void 98 | _test_fragments(htparser * p, htparse_hooks * hooks, const char ** fragments, 99 | htp_type type) { 100 | int i = 0; 101 | 102 | printf("---- test fragment ----\n"); 103 | htparser_init(p, type); 104 | 105 | while (1) { 106 | const char * l = fragments[i++]; 107 | 108 | if (l == NULL) { 109 | break; 110 | } 111 | 112 | htparser_run(p, hooks, l, strlen(l)); 113 | 114 | if (htparser_get_error(p)) { 115 | printf("ERROR: %s\n", htparser_get_strerror(p)); 116 | } 117 | } 118 | 119 | printf("\n"); 120 | } 121 | 122 | static const char * test_fragment_1[] = { 123 | "GET \0", 124 | " /fjdksf\0", 125 | "jfkdslfds H\0", 126 | "TTP/1.\0", 127 | "1\r\0", 128 | "\n\0", 129 | "\r\0", 130 | "\n\0", 131 | NULL 132 | }; 133 | 134 | static const char * test_fragment_2[] = { 135 | "POST /\0", 136 | "h?a=b HTTP/1.0\r\n\0", 137 | "Content-Len\0", 138 | "gth\0", 139 | ": 1\0", 140 | "0\r\n\0", 141 | "\r\n\0", 142 | "12345\0", 143 | "67890\0", 144 | NULL 145 | }; 146 | 147 | static const char * test_chunk_fragment_1[] = { 148 | "POST /stupid HTTP/1.1\r\n", 149 | "Transfer-Encoding: chunked\r\n", 150 | "\r\n", 151 | "25\r\n", 152 | "This is the data in the first chunk\r\n", 153 | "\r\n", 154 | "1C\r\n", 155 | "and this is the second one\r\n", 156 | "\r\n", 157 | "3\r\n", 158 | "con\r\n", 159 | "8\r\n", 160 | "sequence\r\n", 161 | "0\r\n", 162 | "\r\n", 163 | NULL 164 | }; 165 | 166 | static const char * test_chunk_fragment_2[] = { 167 | "POST /stupid HTTP/1.1\r\n", 168 | "Transfer-Encoding: chunked\r\n", 169 | "\r\n", 170 | "25\r\n", 171 | "This is the data in the first chunk\r\n", 172 | "\r\n", 173 | "1C\r\n", 174 | "and this is the second one\r\n", 175 | "\r\n", 176 | "3\r\n", 177 | "c", 178 | "on\r\n", 179 | "8\r\n", 180 | "sequence\r\n", 181 | "0\r\n", 182 | "\r\n", 183 | "GET /foo?bar/baz? HTTP/1.0\r\n", 184 | "Host: stupid.com\r\n", 185 | "\r\n", 186 | NULL 187 | }; 188 | int 189 | main(int argc, char ** argv) { 190 | htparser * p = htparser_new(); 191 | htparse_hooks hooks = { 192 | .on_msg_begin = _on_msg_start, 193 | .method = _method, 194 | .scheme = NULL, 195 | .host = NULL, 196 | .port = NULL, 197 | .path = _path, 198 | .args = _args, 199 | .uri = _uri, 200 | .on_hdrs_begin = _hdrs_start, 201 | .hdr_key = _hdr_key, 202 | .hdr_val = _hdr_val, 203 | .on_hdrs_complete = _hdrs_end, 204 | .on_new_chunk = _on_new_chunk, 205 | .on_chunk_complete = NULL, 206 | .on_chunks_complete = NULL, 207 | .body = _read_body, 208 | .on_msg_complete = _on_msg_end 209 | }; 210 | 211 | const char * test_1 = "GET / HTTP/1.0\r\n\r\n"; 212 | const char * test_2 = "GET /hi?a=b&c=d HTTP/1.1\r\n\r\n"; 213 | const char * test_3 = "GET /hi/die/?a=b&c=d HTTP/1.1\r\n\r\n"; 214 | const char * test_4 = "POST /fjdls HTTP/1.0\r\n" 215 | "Content-Length: 4\r\n" 216 | "\r\n" 217 | "abcd"; 218 | const char * test_7 = "POST /derp HTTP/1.1\r\n" 219 | "Transfer-Encoding: chunked\r\n\r\n" 220 | "1e\r\nall your base are belong to us\r\n" 221 | "0\r\n" 222 | "\r\n\0"; 223 | const char * test_8 = "GET /DIE HTTP/1.1\r\n" 224 | "HERP: DE\r\n" 225 | "\tRP\r\nthings:stuff\r\n\r\n"; 226 | const char * test_9 = "GET /big_content_len HTTP/1.1\r\n" 227 | "Content-Length: 18446744073709551615\r\n\r\n"; 228 | 229 | const char * test_fail = "GET /JF HfD]\r\n\r\n"; 230 | const char * test_resp_1 = "HTTP/1.0 200 OK\r\n" 231 | "Stuff: junk\r\n\r\n"; 232 | 233 | _test(p, &hooks, test_resp_1, htp_type_response); 234 | _test(p, &hooks, test_1, htp_type_request); 235 | _test(p, &hooks, test_2, htp_type_request); 236 | _test(p, &hooks, test_3, htp_type_request); 237 | _test(p, &hooks, test_4, htp_type_request); 238 | _test(p, &hooks, test_7, htp_type_request); 239 | _test(p, &hooks, test_8, htp_type_request); 240 | _test(p, &hooks, test_9, htp_type_request); 241 | _test(p, &hooks, test_fail, htp_type_request); 242 | 243 | _test_fragments(p, &hooks, test_fragment_1, htp_type_request); 244 | _test_fragments(p, &hooks, test_fragment_2, htp_type_request); 245 | _test_fragments(p, &hooks, test_chunk_fragment_1, htp_type_request); 246 | _test_fragments(p, &hooks, test_chunk_fragment_2, htp_type_request); 247 | 248 | return 0; 249 | } /* main */ 250 | 251 | -------------------------------------------------------------------------------- /oniguruma/win32/Makefile: -------------------------------------------------------------------------------- 1 | # Oniguruma Makefile for Win32 2 | 3 | product_name = oniguruma 4 | 5 | CPPFLAGS = 6 | CFLAGS = -O2 -nologo /W3 7 | LDFLAGS = 8 | LOADLIBES = 9 | ARLIB = lib 10 | ARLIB_FLAGS = -nologo 11 | ARDLL = cl 12 | ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll 13 | LINKFLAGS = -link -incremental:no -pdb:none 14 | 15 | INSTALL = install -c 16 | CP = copy 17 | CC = cl 18 | DEFS = -DHAVE_CONFIG_H -DNOT_RUBY -DEXPORT 19 | RUBYDIR = .. 20 | 21 | subdirs = 22 | 23 | libbase = onig 24 | libname = $(libbase)_s.lib 25 | dllname = $(libbase).dll 26 | dlllib = $(libbase).lib 27 | 28 | onigheaders = oniguruma.h regint.h regparse.h regenc.h st.h 29 | posixheaders = onigposix.h 30 | headers = $(posixheaders) $(onigheaders) 31 | 32 | onigobjs = reggnu.obj regerror.obj regparse.obj regext.obj regcomp.obj \ 33 | regexec.obj regenc.obj regsyntax.obj regtrav.obj \ 34 | regversion.obj st.obj 35 | posixobjs = regposix.obj regposerr.obj 36 | libobjs = $(onigobjs) $(posixobjs) 37 | 38 | jp_objs = $(encdir)\euc_jp.obj $(encdir)\sjis.obj 39 | iso8859_objs = $(encdir)\iso8859_1.obj $(encdir)\iso8859_2.obj \ 40 | $(encdir)\iso8859_3.obj $(encdir)\iso8859_4.obj \ 41 | $(encdir)\iso8859_5.obj $(encdir)\iso8859_6.obj \ 42 | $(encdir)\iso8859_7.obj $(encdir)\iso8859_8.obj \ 43 | $(encdir)\iso8859_9.obj $(encdir)\iso8859_10.obj \ 44 | $(encdir)\iso8859_11.obj $(encdir)\iso8859_13.obj \ 45 | $(encdir)\iso8859_14.obj $(encdir)\iso8859_15.obj \ 46 | $(encdir)\iso8859_16.obj 47 | 48 | encobjs = $(encdir)\ascii.obj $(encdir)\utf8.obj \ 49 | $(encdir)\unicode.obj \ 50 | $(encdir)\utf16_be.obj $(encdir)\utf16_le.obj \ 51 | $(encdir)\utf32_be.obj $(encdir)\utf32_le.obj \ 52 | $(jp_objs) $(iso8859_objs) \ 53 | $(encdir)\euc_tw.obj $(encdir)\euc_kr.obj $(encdir)\big5.obj \ 54 | $(encdir)\gb18030.obj \ 55 | $(encdir)\koi8_r.obj \ 56 | $(encdir)\cp1251.obj # $(encdir)\koi8.obj 57 | 58 | onigsources = regerror.c regparse.c regext.c regcomp.c regexec.c regenc.c \ 59 | regsyntax.c regtrav.c regversion.c reggnu.c st.c 60 | posixsources = regposix.c regposerr.c 61 | libsources = $(posixsources) $(onigsources) 62 | rubysources = $(onigsources) 63 | 64 | encdir = enc 65 | patchfiles = re.c.168.patch re.c.181.patch 66 | distfiles = README COPYING HISTORY \ 67 | Makefile.in configure.in config.h.in configure \ 68 | $(headers) $(libsources) $(patchfiles) \ 69 | test.rb testconv.rb 70 | testc = testc 71 | testp = testp 72 | 73 | makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' 74 | 75 | .SUFFIXES: 76 | .SUFFIXES: .obj .c .h .ps .dvi .info .texinfo 77 | 78 | .c.obj: 79 | $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $< 80 | 81 | # targets 82 | default: all 83 | 84 | setup: 85 | $(CP) win32\config.h config.h 86 | $(CP) win32\testc.c testc.c 87 | 88 | 89 | all: $(libname) $(dllname) 90 | 91 | $(libname): $(libobjs) $(encobjs) 92 | $(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs) $(encobjs) 93 | 94 | $(dllname): $(libobjs) $(encobjs) 95 | $(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS) 96 | 97 | regparse.obj: regparse.c $(onigheaders) config.h st.h 98 | regext.obj: regext.c $(onigheaders) config.h 99 | regtrav.obj: regtrav.c $(onigheaders) config.h 100 | regcomp.obj: regcomp.c $(onigheaders) config.h 101 | regexec.obj: regexec.c regint.h regenc.h oniguruma.h config.h 102 | reggnu.obj: reggnu.c regint.h regenc.h oniguruma.h config.h oniggnu.h 103 | regerror.obj: regerror.c regint.h regenc.h oniguruma.h config.h 104 | regenc.obj: regenc.c regenc.h oniguruma.h config.h 105 | regsyntax.obj: regsyntax.c regint.h regenc.h oniguruma.h config.h 106 | regversion.obj: regversion.c oniguruma.h config.h 107 | regposix.obj: regposix.c $(posixheaders) oniguruma.h config.h 108 | regposerr.obj: regposerr.c $(posixheaders) config.h 109 | st.obj: st.c regint.h oniguruma.h config.h st.h 110 | 111 | $(encdir)\ascii.obj: $(encdir)\ascii.c regenc.h config.h 112 | $(encdir)\unicode.obj: $(encdir)\unicode.c regenc.h config.h 113 | $(encdir)\utf8.obj: $(encdir)\utf8.c regenc.h config.h 114 | $(encdir)\utf16_be.obj: $(encdir)\utf16_be.c regenc.h config.h 115 | $(encdir)\utf16_le.obj: $(encdir)\utf16_le.c regenc.h config.h 116 | $(encdir)\utf32_be.obj: $(encdir)\utf32_be.c regenc.h config.h 117 | $(encdir)\utf32_le.obj: $(encdir)\utf32_le.c regenc.h config.h 118 | $(encdir)\euc_jp.obj: $(encdir)\euc_jp.c regenc.h config.h 119 | $(encdir)\euc_tw.obj: $(encdir)\euc_tw.c regenc.h config.h 120 | $(encdir)\euc_kr.obj: $(encdir)\euc_kr.c regenc.h config.h 121 | $(encdir)\sjis.obj: $(encdir)\sjis.c regenc.h config.h 122 | $(encdir)\iso8859_1.obj: $(encdir)\iso8859_1.c regenc.h config.h 123 | $(encdir)\iso8859_2.obj: $(encdir)\iso8859_2.c regenc.h config.h 124 | $(encdir)\iso8859_3.obj: $(encdir)\iso8859_3.c regenc.h config.h 125 | $(encdir)\iso8859_4.obj: $(encdir)\iso8859_4.c regenc.h config.h 126 | $(encdir)\iso8859_5.obj: $(encdir)\iso8859_5.c regenc.h config.h 127 | $(encdir)\iso8859_6.obj: $(encdir)\iso8859_6.c regenc.h config.h 128 | $(encdir)\iso8859_7.obj: $(encdir)\iso8859_7.c regenc.h config.h 129 | $(encdir)\iso8859_8.obj: $(encdir)\iso8859_8.c regenc.h config.h 130 | $(encdir)\iso8859_9.obj: $(encdir)\iso8859_9.c regenc.h config.h 131 | $(encdir)\iso8859_10.obj: $(encdir)\iso8859_10.c regenc.h config.h 132 | $(encdir)\iso8859_11.obj: $(encdir)\iso8859_11.c regenc.h config.h 133 | $(encdir)\iso8859_13.obj: $(encdir)\iso8859_13.c regenc.h config.h 134 | $(encdir)\iso8859_14.obj: $(encdir)\iso8859_14.c regenc.h config.h 135 | $(encdir)\iso8859_15.obj: $(encdir)\iso8859_15.c regenc.h config.h 136 | $(encdir)\iso8859_16.obj: $(encdir)\iso8859_16.c regenc.h config.h 137 | $(encdir)\koi8.obj: $(encdir)\koi8.c regenc.h config.h 138 | $(encdir)\koi8_r.obj: $(encdir)\koi8_r.c regenc.h config.h 139 | $(encdir)\cp1251.obj: $(encdir)\cp1251.c regenc.h config.h 140 | $(encdir)\big5.obj: $(encdir)\big5.c regenc.h config.h 141 | $(encdir)\gb18030.obj: $(encdir)\gb18030.c regenc.h config.h 142 | 143 | 144 | # Ruby test 145 | rtest: 146 | $(RUBYDIR)\win32\ruby -w -Ke test.rb 147 | 148 | # C library test 149 | ctest: $(testc) 150 | .\$(testc) 151 | 152 | # POSIX C library test 153 | ptest: $(testp) 154 | .\$(testp) 155 | 156 | $(testc): $(testc).c $(libname) 157 | $(CC) -nologo -o $(testc) -DONIG_EXTERN=extern $(testc).c $(libname) 158 | 159 | $(testp): $(testc).c $(dlllib) 160 | $(CC) -nologo -DPOSIX_TEST -o $(testp) $(testc).c $(dlllib) 161 | 162 | #$(testc)u.c: test.rb testconvu.rb 163 | # ruby -Ke testconvu.rb test.rb > $@ 164 | 165 | $(testc)u: $(testc)u.c $(libname) 166 | $(CC) -nologo -o $(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname) 167 | 168 | clean: 169 | del *.obj $(encdir)\*.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj 170 | 171 | 172 | # backup file suffix 173 | SORIG = ruby_orig 174 | 175 | # ruby 1.9 source update 176 | 19: 177 | $(CP) regerror.c $(RUBYDIR) 178 | $(CP) regparse.c $(RUBYDIR) 179 | $(CP) regcomp.c $(RUBYDIR) 180 | $(CP) regexec.c $(RUBYDIR) 181 | $(CP) regenc.c $(RUBYDIR) 182 | $(CP) regint.h $(RUBYDIR) 183 | $(CP) regparse.h $(RUBYDIR) 184 | $(CP) regenc.h $(RUBYDIR) 185 | $(CP) oniguruma.h $(RUBYDIR) 186 | $(CP) enc\ascii.c $(RUBYDIR) 187 | $(CP) enc\utf8.c $(RUBYDIR) 188 | $(CP) enc\euc_jp.c $(RUBYDIR) 189 | $(CP) enc\sjis.c $(RUBYDIR) 190 | $(CP) enc\unicode.c $(RUBYDIR) 191 | 192 | 193 | samples: all 194 | $(CC) $(CFLAGS) -I. -o simple sample\simple.c $(dlllib) 195 | $(CC) $(CFLAGS) -I. -o posix sample\posix.c $(dlllib) 196 | $(CC) $(CFLAGS) -I. -o names sample\names.c $(dlllib) 197 | $(CC) $(CFLAGS) -I. -o listcap sample\listcap.c $(dlllib) 198 | $(CC) $(CFLAGS) -I. -o sql sample\sql.c $(dlllib) 199 | $(CC) $(CFLAGS) -I. -o encode sample\encode.c $(dlllib) 200 | $(CC) $(CFLAGS) -I. -o syntax sample\syntax.c $(dlllib) 201 | -------------------------------------------------------------------------------- /oniguruma/enc/euc_jp.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | euc_jp.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regint.h" 31 | 32 | #define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) 33 | 34 | static const int EncLen_EUCJP[] = { 35 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 44 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 45 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 49 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 50 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 51 | }; 52 | 53 | static int 54 | mbc_enc_len(const UChar* p) 55 | { 56 | return EncLen_EUCJP[*p]; 57 | } 58 | 59 | static OnigCodePoint 60 | mbc_to_code(const UChar* p, const UChar* end) 61 | { 62 | int c, i, len; 63 | OnigCodePoint n; 64 | 65 | len = enclen(ONIG_ENCODING_EUC_JP, p); 66 | n = (OnigCodePoint )*p++; 67 | if (len == 1) return n; 68 | 69 | for (i = 1; i < len; i++) { 70 | if (p >= end) break; 71 | c = *p++; 72 | n <<= 8; n += c; 73 | } 74 | return n; 75 | } 76 | 77 | static int 78 | code_to_mbclen(OnigCodePoint code) 79 | { 80 | if (ONIGENC_IS_CODE_ASCII(code)) return 1; 81 | else if ((code & 0xff0000) != 0) return 3; 82 | else if ((code & 0xff00) != 0) return 2; 83 | else 84 | return ONIGERR_INVALID_CODE_POINT_VALUE; 85 | } 86 | 87 | #if 0 88 | static int 89 | code_to_mbc_first(OnigCodePoint code) 90 | { 91 | int first; 92 | 93 | if ((code & 0xff0000) != 0) { 94 | first = (code >> 16) & 0xff; 95 | } 96 | else if ((code & 0xff00) != 0) { 97 | first = (code >> 8) & 0xff; 98 | } 99 | else { 100 | return (int )code; 101 | } 102 | return first; 103 | } 104 | #endif 105 | 106 | static int 107 | code_to_mbc(OnigCodePoint code, UChar *buf) 108 | { 109 | UChar *p = buf; 110 | 111 | if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff)); 112 | if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); 113 | *p++ = (UChar )(code & 0xff); 114 | 115 | #if 1 116 | if (enclen(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) 117 | return ONIGERR_INVALID_CODE_POINT_VALUE; 118 | #endif 119 | return p - buf; 120 | } 121 | 122 | static int 123 | mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, 124 | const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) 125 | { 126 | int len; 127 | const UChar* p = *pp; 128 | 129 | if (ONIGENC_IS_MBC_ASCII(p)) { 130 | *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 131 | (*pp)++; 132 | return 1; 133 | } 134 | else { 135 | int i; 136 | 137 | len = enclen(ONIG_ENCODING_EUC_JP, p); 138 | for (i = 0; i < len; i++) { 139 | *lower++ = *p++; 140 | } 141 | (*pp) += len; 142 | return len; /* return byte length of converted char to lower */ 143 | } 144 | } 145 | 146 | static UChar* 147 | left_adjust_char_head(const UChar* start, const UChar* s) 148 | { 149 | /* In this encoding 150 | mb-trail bytes doesn't mix with single bytes. 151 | */ 152 | const UChar *p; 153 | int len; 154 | 155 | if (s <= start) return (UChar* )s; 156 | p = s; 157 | 158 | while (!eucjp_islead(*p) && p > start) p--; 159 | len = enclen(ONIG_ENCODING_EUC_JP, p); 160 | if (p + len > s) return (UChar* )p; 161 | p += len; 162 | return (UChar* )(p + ((s - p) & ~1)); 163 | } 164 | 165 | static int 166 | is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) 167 | { 168 | const UChar c = *s; 169 | if (c <= 0x7e || c == 0x8e || c == 0x8f) 170 | return TRUE; 171 | else 172 | return FALSE; 173 | } 174 | 175 | 176 | static int PropertyInited = 0; 177 | static const OnigCodePoint** PropertyList; 178 | static int PropertyListNum; 179 | static int PropertyListSize; 180 | static hash_table_type* PropertyNameTable; 181 | 182 | static const OnigCodePoint CR_Hiragana[] = { 183 | 1, 184 | 0xa4a1, 0xa4f3 185 | }; /* CR_Hiragana */ 186 | 187 | static const OnigCodePoint CR_Katakana[] = { 188 | 3, 189 | 0xa5a1, 0xa5f6, 190 | 0xaaa6, 0xaaaf, 191 | 0xaab1, 0xaadd 192 | }; /* CR_Katakana */ 193 | 194 | static int 195 | init_property_list(void) 196 | { 197 | int r; 198 | 199 | PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); 200 | PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); 201 | PropertyInited = 1; 202 | 203 | end: 204 | return r; 205 | } 206 | 207 | static int 208 | property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) 209 | { 210 | hash_data_type ctype; 211 | 212 | PROPERTY_LIST_INIT_CHECK; 213 | 214 | if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { 215 | return onigenc_minimum_property_name_to_ctype(enc, p, end); 216 | } 217 | 218 | return (int )ctype; 219 | } 220 | 221 | static int 222 | is_code_ctype(OnigCodePoint code, unsigned int ctype) 223 | { 224 | if (ctype <= ONIGENC_MAX_STD_CTYPE) { 225 | if (code < 128) 226 | return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 227 | else { 228 | if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { 229 | return (code_to_mbclen(code) > 1 ? TRUE : FALSE); 230 | } 231 | } 232 | } 233 | else { 234 | PROPERTY_LIST_INIT_CHECK; 235 | 236 | ctype -= (ONIGENC_MAX_STD_CTYPE + 1); 237 | if (ctype >= (unsigned int )PropertyListNum) 238 | return ONIGERR_TYPE_BUG; 239 | 240 | return onig_is_in_code_range((UChar* )PropertyList[ctype], code); 241 | } 242 | 243 | return FALSE; 244 | } 245 | 246 | static int 247 | get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, 248 | const OnigCodePoint* ranges[]) 249 | { 250 | if (ctype <= ONIGENC_MAX_STD_CTYPE) { 251 | return ONIG_NO_SUPPORT_CONFIG; 252 | } 253 | else { 254 | *sb_out = 0x80; 255 | 256 | PROPERTY_LIST_INIT_CHECK; 257 | 258 | ctype -= (ONIGENC_MAX_STD_CTYPE + 1); 259 | if (ctype >= (OnigCtype )PropertyListNum) 260 | return ONIGERR_TYPE_BUG; 261 | 262 | *ranges = PropertyList[ctype]; 263 | return 0; 264 | } 265 | } 266 | 267 | 268 | OnigEncodingType OnigEncodingEUC_JP = { 269 | mbc_enc_len, 270 | "EUC-JP", /* name */ 271 | 3, /* max enc length */ 272 | 1, /* min enc length */ 273 | onigenc_is_mbc_newline_0x0a, 274 | mbc_to_code, 275 | code_to_mbclen, 276 | code_to_mbc, 277 | mbc_case_fold, 278 | onigenc_ascii_apply_all_case_fold, 279 | onigenc_ascii_get_case_fold_codes_by_str, 280 | property_name_to_ctype, 281 | is_code_ctype, 282 | get_ctype_code_range, 283 | left_adjust_char_head, 284 | is_allowed_reverse_match 285 | }; 286 | -------------------------------------------------------------------------------- /oniguruma/enc/cp1251.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | cp1251.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2006-2007 Byte 6 | * K.Kosako 7 | * All rights reserved. 8 | * 9 | * Redistribution and use in source and binary forms, with or without 10 | * modification, are permitted provided that the following conditions 11 | * are met: 12 | * 1. Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * 2. Redistributions in binary form must reproduce the above copyright 15 | * notice, this list of conditions and the following disclaimer in the 16 | * documentation and/or other materials provided with the distribution. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 | * SUCH DAMAGE. 29 | */ 30 | 31 | #include "regenc.h" 32 | 33 | #define ENC_CP1251_TO_LOWER_CASE(c) EncCP1251_ToLowerCaseTable[c] 34 | #define ENC_IS_CP1251_CTYPE(code,ctype) \ 35 | ((EncCP1251_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) 36 | 37 | static const UChar EncCP1251_ToLowerCaseTable[256] = { 38 | '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 39 | '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 40 | '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 41 | '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 42 | '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 43 | '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 44 | '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 45 | '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 46 | '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 47 | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 48 | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 49 | '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', 50 | '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 51 | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 52 | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 53 | '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', 54 | '\220', '\203', '\202', '\203', '\204', '\205', '\206', '\207', 55 | '\210', '\211', '\232', '\213', '\234', '\235', '\236', '\237', 56 | '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 57 | '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 58 | '\240', '\242', '\242', '\274', '\244', '\264', '\246', '\247', 59 | '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277', 60 | '\260', '\261', '\263', '\263', '\264', '\265', '\266', '\267', 61 | '\270', '\271', '\272', '\273', '\274', '\276', '\276', '\277', 62 | '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 63 | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 64 | '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 65 | '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', 66 | '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 67 | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 68 | '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 69 | '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' 70 | }; 71 | 72 | static const unsigned short EncCP1251_CtypeTable[256] = { 73 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 74 | 0x4008, 0x428c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 75 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 76 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 77 | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 78 | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 79 | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 80 | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 81 | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 82 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 83 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 84 | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 85 | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 86 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 87 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 88 | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 89 | 0x34a2, 0x34a2, 0x01a0, 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 90 | 0x0000, 0x01a0, 0x34a2, 0x01a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 91 | 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 92 | 0x0008, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 93 | 0x0280, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, 94 | 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x34a2, 95 | 0x01a0, 0x01a0, 0x34a2, 0x30e2, 0x30e2, 0x31e2, 0x01a0, 0x01a0, 96 | 0x30e2, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x34a2, 0x30e2, 0x30e2, 97 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 98 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 99 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 100 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 101 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 102 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 103 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 104 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 105 | }; 106 | 107 | static int 108 | cp1251_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, 109 | const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) 110 | { 111 | const UChar* p = *pp; 112 | 113 | *lower = ENC_CP1251_TO_LOWER_CASE(*p); 114 | (*pp)++; 115 | return 1; 116 | } 117 | 118 | static int 119 | cp1251_is_code_ctype(OnigCodePoint code, unsigned int ctype) 120 | { 121 | if (code < 256) 122 | return ENC_IS_CP1251_CTYPE(code, ctype); 123 | else 124 | return FALSE; 125 | } 126 | 127 | static const OnigPairCaseFoldCodes CaseFoldMap[] = { 128 | { 0xb8, 0xa8 }, 129 | 130 | { 0xe0, 0xc0 }, 131 | { 0xe1, 0xc1 }, 132 | { 0xe2, 0xc2 }, 133 | { 0xe3, 0xc3 }, 134 | { 0xe4, 0xc4 }, 135 | { 0xe5, 0xc5 }, 136 | { 0xe6, 0xc6 }, 137 | { 0xe7, 0xc7 }, 138 | { 0xe8, 0xc8 }, 139 | { 0xe9, 0xc9 }, 140 | { 0xea, 0xca }, 141 | { 0xeb, 0xcb }, 142 | { 0xec, 0xcc }, 143 | { 0xed, 0xcd }, 144 | { 0xee, 0xce }, 145 | { 0xef, 0xcf }, 146 | 147 | { 0xf0, 0xd0 }, 148 | { 0xf1, 0xd1 }, 149 | { 0xf2, 0xd2 }, 150 | { 0xf3, 0xd3 }, 151 | { 0xf4, 0xd4 }, 152 | { 0xf5, 0xd5 }, 153 | { 0xf6, 0xd6 }, 154 | { 0xf7, 0xd7 }, 155 | { 0xf8, 0xd8 }, 156 | { 0xf9, 0xd9 }, 157 | { 0xfa, 0xda }, 158 | { 0xfb, 0xdb }, 159 | { 0xfc, 0xdc }, 160 | { 0xfd, 0xdd }, 161 | { 0xfe, 0xde }, 162 | { 0xff, 0xdf } 163 | }; 164 | 165 | static int 166 | cp1251_apply_all_case_fold(OnigCaseFoldType flag, 167 | OnigApplyAllCaseFoldFunc f, void* arg) 168 | { 169 | return onigenc_apply_all_case_fold_with_map( 170 | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, 171 | flag, f, arg); 172 | } 173 | 174 | static int 175 | cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag, 176 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 177 | { 178 | return onigenc_get_case_fold_codes_by_str_with_map( 179 | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, 180 | flag, p, end, items); 181 | } 182 | 183 | OnigEncodingType OnigEncodingCP1251 = { 184 | onigenc_single_byte_mbc_enc_len, 185 | "CP1251", /* name */ 186 | 1, /* max enc length */ 187 | 1, /* min enc length */ 188 | onigenc_is_mbc_newline_0x0a, 189 | onigenc_single_byte_mbc_to_code, 190 | onigenc_single_byte_code_to_mbclen, 191 | onigenc_single_byte_code_to_mbc, 192 | cp1251_mbc_case_fold, 193 | cp1251_apply_all_case_fold, 194 | cp1251_get_case_fold_codes_by_str, 195 | onigenc_minimum_property_name_to_ctype, 196 | cp1251_is_code_ctype, 197 | onigenc_not_support_get_ctype_code_range, 198 | onigenc_single_byte_left_adjust_char_head, 199 | onigenc_always_true_is_allowed_reverse_match 200 | }; 201 | -------------------------------------------------------------------------------- /oniguruma/enc/koi8_r.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | koi8_r.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | #define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c] 33 | #define ENC_IS_KOI8_R_CTYPE(code,ctype) \ 34 | ((EncKOI8_R_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) 35 | 36 | static const UChar EncKOI8_R_ToLowerCaseTable[256] = { 37 | '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 38 | '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 39 | '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 40 | '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 41 | '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 42 | '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 43 | '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 44 | '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 45 | '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 46 | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 47 | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 48 | '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', 49 | '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 50 | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 51 | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 52 | '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', 53 | '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 54 | '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 55 | '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 56 | '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 57 | '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 58 | '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 59 | '\260', '\261', '\262', '\243', '\264', '\265', '\266', '\267', 60 | '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', 61 | '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 62 | '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 63 | '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', 64 | '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', 65 | '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 66 | '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 67 | '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', 68 | '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' 69 | }; 70 | 71 | static const unsigned short EncKOI8_R_CtypeTable[256] = { 72 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 73 | 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 74 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 75 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 76 | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 77 | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 78 | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 79 | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 80 | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 81 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 82 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 83 | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 84 | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 85 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 86 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 87 | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 88 | 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 89 | 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 90 | 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 91 | 0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0, 92 | 0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 93 | 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 94 | 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 95 | 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 96 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 97 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 98 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 99 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 100 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 101 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 102 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 103 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2 104 | }; 105 | 106 | static int 107 | koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, 108 | const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) 109 | { 110 | const UChar* p = *pp; 111 | 112 | *lower = ENC_KOI8_R_TO_LOWER_CASE(*p); 113 | (*pp)++; 114 | return 1; 115 | } 116 | 117 | #if 0 118 | static int 119 | koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) 120 | { 121 | int v; 122 | const UChar* p = *pp; 123 | 124 | (*pp)++; 125 | v = (EncKOI8_R_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); 126 | return (v != 0 ? TRUE : FALSE); 127 | } 128 | #endif 129 | 130 | static int 131 | koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype) 132 | { 133 | if (code < 256) 134 | return ENC_IS_KOI8_R_CTYPE(code, ctype); 135 | else 136 | return FALSE; 137 | } 138 | 139 | static const OnigPairCaseFoldCodes CaseFoldMap[] = { 140 | { 0xa3, 0xb3 }, 141 | 142 | { 0xc0, 0xe0 }, 143 | { 0xc1, 0xe1 }, 144 | { 0xc2, 0xe2 }, 145 | { 0xc3, 0xe3 }, 146 | { 0xc4, 0xe4 }, 147 | { 0xc5, 0xe5 }, 148 | { 0xc6, 0xe6 }, 149 | { 0xc7, 0xe7 }, 150 | { 0xc8, 0xe8 }, 151 | { 0xc9, 0xe9 }, 152 | { 0xca, 0xea }, 153 | { 0xcb, 0xeb }, 154 | { 0xcc, 0xec }, 155 | { 0xcd, 0xed }, 156 | { 0xce, 0xee }, 157 | { 0xcf, 0xef }, 158 | 159 | { 0xd0, 0xf0 }, 160 | { 0xd1, 0xf1 }, 161 | { 0xd2, 0xf2 }, 162 | { 0xd3, 0xf3 }, 163 | { 0xd4, 0xf4 }, 164 | { 0xd5, 0xf5 }, 165 | { 0xd6, 0xf6 }, 166 | { 0xd7, 0xf7 }, 167 | { 0xd8, 0xf8 }, 168 | { 0xd9, 0xf9 }, 169 | { 0xda, 0xfa }, 170 | { 0xdb, 0xfb }, 171 | { 0xdc, 0xfc }, 172 | { 0xdd, 0xfd }, 173 | { 0xde, 0xfe }, 174 | { 0xdf, 0xff } 175 | }; 176 | 177 | static int 178 | koi8_r_apply_all_case_fold(OnigCaseFoldType flag, 179 | OnigApplyAllCaseFoldFunc f, void* arg) 180 | { 181 | return onigenc_apply_all_case_fold_with_map( 182 | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, 183 | flag, f, arg); 184 | } 185 | 186 | static int 187 | koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag, 188 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 189 | { 190 | return onigenc_get_case_fold_codes_by_str_with_map( 191 | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, 192 | flag, p, end, items); 193 | } 194 | 195 | OnigEncodingType OnigEncodingKOI8_R = { 196 | onigenc_single_byte_mbc_enc_len, 197 | "KOI8-R", /* name */ 198 | 1, /* max enc length */ 199 | 1, /* min enc length */ 200 | onigenc_is_mbc_newline_0x0a, 201 | onigenc_single_byte_mbc_to_code, 202 | onigenc_single_byte_code_to_mbclen, 203 | onigenc_single_byte_code_to_mbc, 204 | koi8_r_mbc_case_fold, 205 | koi8_r_apply_all_case_fold, 206 | koi8_r_get_case_fold_codes_by_str, 207 | onigenc_minimum_property_name_to_ctype, 208 | koi8_r_is_code_ctype, 209 | onigenc_not_support_get_ctype_code_range, 210 | onigenc_single_byte_left_adjust_char_head, 211 | onigenc_always_true_is_allowed_reverse_match 212 | }; 213 | -------------------------------------------------------------------------------- /oniguruma/enc/iso8859_7.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | iso8859_7.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | #define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c] 33 | #define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \ 34 | ((EncISO_8859_7_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) 35 | 36 | static const UChar EncISO_8859_7_ToLowerCaseTable[256] = { 37 | '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 38 | '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 39 | '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 40 | '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 41 | '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 42 | '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 43 | '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 44 | '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 45 | '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 46 | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 47 | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 48 | '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', 49 | '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 50 | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 51 | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 52 | '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', 53 | '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 54 | '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 55 | '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 56 | '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 57 | '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 58 | '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 59 | '\260', '\261', '\262', '\263', '\264', '\265', '\334', '\267', 60 | '\335', '\336', '\337', '\273', '\374', '\275', '\375', '\376', 61 | '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 62 | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 63 | '\360', '\361', '\322', '\363', '\364', '\365', '\366', '\367', 64 | '\370', '\371', '\372', '\373', '\334', '\335', '\336', '\337', 65 | '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 66 | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 67 | '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 68 | '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' 69 | }; 70 | 71 | static const unsigned short EncISO_8859_7_CtypeTable[256] = { 72 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 73 | 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 74 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 75 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 76 | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 77 | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 78 | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 79 | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 80 | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 81 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 82 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 83 | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 84 | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 85 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 86 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 87 | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 88 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 89 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 90 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 91 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 92 | 0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0, 93 | 0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0, 94 | 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x34a2, 0x01a0, 95 | 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x10a0, 0x34a2, 0x34a2, 96 | 0x30e2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 97 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 98 | 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 99 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 100 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 101 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 102 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 103 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x0000 104 | }; 105 | 106 | static int 107 | mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, 108 | const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) 109 | { 110 | const UChar* p = *pp; 111 | 112 | *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p); 113 | (*pp)++; 114 | return 1; 115 | } 116 | 117 | #if 0 118 | static int 119 | is_mbc_ambiguous(OnigCaseFoldType flag, 120 | const UChar** pp, const UChar* end) 121 | { 122 | int v; 123 | const UChar* p = *pp; 124 | 125 | (*pp)++; 126 | v = (EncISO_8859_7_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); 127 | if ((v | BIT_CTYPE_LOWER) != 0) { 128 | if (*p == 0xc0 || *p == 0xe0) 129 | return FALSE; 130 | else 131 | return TRUE; 132 | } 133 | 134 | return (v != 0 ? TRUE : FALSE); 135 | } 136 | #endif 137 | 138 | static int 139 | is_code_ctype(OnigCodePoint code, unsigned int ctype) 140 | { 141 | if (code < 256) 142 | return ENC_IS_ISO_8859_7_CTYPE(code, ctype); 143 | else 144 | return FALSE; 145 | } 146 | 147 | static const OnigPairCaseFoldCodes CaseFoldMap[] = { 148 | { 0xb6, 0xdc }, 149 | { 0xb8, 0xdd }, 150 | { 0xb9, 0xde }, 151 | { 0xba, 0xdf }, 152 | { 0xbc, 0xfc }, 153 | { 0xbe, 0xfd }, 154 | { 0xbf, 0xfe }, 155 | 156 | { 0xc1, 0xe1 }, 157 | { 0xc2, 0xe2 }, 158 | { 0xc3, 0xe3 }, 159 | { 0xc4, 0xe4 }, 160 | { 0xc5, 0xe5 }, 161 | { 0xc6, 0xe6 }, 162 | { 0xc7, 0xe7 }, 163 | { 0xc8, 0xe8 }, 164 | { 0xc9, 0xe9 }, 165 | { 0xca, 0xea }, 166 | { 0xcb, 0xeb }, 167 | { 0xcc, 0xec }, 168 | { 0xcd, 0xed }, 169 | { 0xce, 0xee }, 170 | { 0xcf, 0xef }, 171 | 172 | { 0xd0, 0xf0 }, 173 | { 0xd1, 0xf1 }, 174 | { 0xd2, 0xf2 }, 175 | { 0xd3, 0xf3 }, 176 | { 0xd4, 0xf4 }, 177 | { 0xd5, 0xf5 }, 178 | { 0xd6, 0xf6 }, 179 | { 0xd7, 0xf7 }, 180 | { 0xd8, 0xf8 }, 181 | { 0xd9, 0xf9 }, 182 | { 0xda, 0xfa }, 183 | { 0xdb, 0xfb } 184 | }; 185 | 186 | static int 187 | apply_all_case_fold(OnigCaseFoldType flag, 188 | OnigApplyAllCaseFoldFunc f, void* arg) 189 | { 190 | return onigenc_apply_all_case_fold_with_map( 191 | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, 192 | flag, f, arg); 193 | } 194 | 195 | static int 196 | get_case_fold_codes_by_str(OnigCaseFoldType flag, 197 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 198 | { 199 | return onigenc_get_case_fold_codes_by_str_with_map( 200 | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, 201 | flag, p, end, items); 202 | } 203 | 204 | 205 | OnigEncodingType OnigEncodingISO_8859_7 = { 206 | onigenc_single_byte_mbc_enc_len, 207 | "ISO-8859-7", /* name */ 208 | 1, /* max enc length */ 209 | 1, /* min enc length */ 210 | onigenc_is_mbc_newline_0x0a, 211 | onigenc_single_byte_mbc_to_code, 212 | onigenc_single_byte_code_to_mbclen, 213 | onigenc_single_byte_code_to_mbc, 214 | mbc_case_fold, 215 | apply_all_case_fold, 216 | get_case_fold_codes_by_str, 217 | onigenc_minimum_property_name_to_ctype, 218 | is_code_ctype, 219 | onigenc_not_support_get_ctype_code_range, 220 | onigenc_single_byte_left_adjust_char_head, 221 | onigenc_always_true_is_allowed_reverse_match 222 | }; 223 | --------------------------------------------------------------------------------