├── oniguruma ├── AUTHORS ├── testc.c ├── README.ja ├── win32 │ ├── testc.c │ ├── config.h │ └── Makefile ├── sample │ ├── Makefile.am │ ├── simple.c │ ├── syntax.c │ ├── names.c │ ├── sql.c │ ├── posix.c │ ├── listcap.c │ └── crnl.c ├── config.h.in ├── COPYING ├── onig-config.in ├── st.h ├── CMakeLists.txt ├── regversion.c ├── enc │ ├── ascii.c │ ├── euc_tw.c │ ├── iso8859_6.c │ ├── iso8859_8.c │ ├── iso8859_11.c │ ├── euc_kr.c │ ├── utf32_be.c │ ├── utf32_le.c │ ├── big5.c │ ├── utf16_be.c │ ├── utf16_le.c │ ├── euc_jp.c │ ├── cp1251.c │ ├── koi8_r.c │ ├── iso8859_7.c │ └── utf8.c ├── regtrav.c ├── Makefile.am ├── oniggnu.h ├── regposerr.c ├── reggnu.c ├── onigposix.h ├── README └── regext.c ├── .gitignore ├── evhtp-config.h.in ├── htparse ├── Makefile └── htparse.h ├── evthr ├── Makefile ├── test.c ├── README └── evthr.h ├── examples ├── test_basic.c ├── test_vhost.c ├── test_client.c └── test_proxy.c ├── CMakeModules ├── BaseConfig.cmake └── FindLibEvent.cmake ├── LICENSE ├── README.markdown └── CMakeLists.txt /oniguruma/AUTHORS: -------------------------------------------------------------------------------- 1 | sndgk393 AT ybb DOT ne DOT jp (K.Kosako) 2 | -------------------------------------------------------------------------------- /oniguruma/testc.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerotao/libevhtp/HEAD/oniguruma/testc.c -------------------------------------------------------------------------------- /oniguruma/README.ja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerotao/libevhtp/HEAD/oniguruma/README.ja -------------------------------------------------------------------------------- /oniguruma/win32/testc.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerotao/libevhtp/HEAD/oniguruma/win32/testc.c -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # cmake manages these; they shouldn't go in version control 2 | 3 | /CMakeCache.txt 4 | /CMakeFiles/** 5 | /Makefile 6 | /cmake_install.cmake 7 | 8 | # generated .h files 9 | 10 | /compat/sys/tree.h 11 | /oniguruma/config.h 12 | 13 | # compiled files 14 | 15 | /libevhtp.a 16 | /test 17 | /test_basic 18 | /test_vhost 19 | 20 | /test_client 21 | /test_proxy 22 | -------------------------------------------------------------------------------- /evhtp-config.h.in: -------------------------------------------------------------------------------- 1 | #ifndef __EVHTP_CONFIG_H__ 2 | #define __EVHTP_CONFIG_H__ 3 | 4 | #undef EVHTP_DISABLE_EVTHR 5 | #undef EVHTP_DISABLE_REGEX 6 | #undef EVHTP_DISABLE_SSL 7 | #undef EVHTP_DISABLE_EVTHR 8 | 9 | #cmakedefine EVHTP_DISABLE_EVTHR 10 | #cmakedefine EVHTP_DISABLE_REGEX 11 | #cmakedefine EVHTP_DISABLE_SSL 12 | #cmakedefine EVHTP_DISABLE_EVTHR 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /htparse/Makefile: -------------------------------------------------------------------------------- 1 | SRC = htparse.c 2 | OUT = libhtparse.a 3 | OBJ = $(SRC:.c=.o) 4 | INCLUDES = -I. 5 | CFLAGS += -ggdb 6 | LDFLAGS += 7 | CC = gcc 8 | 9 | .SUFFIXES: .c 10 | 11 | default: $(OUT) test 12 | 13 | .c.o: 14 | $(CC) $(INCLUDES) $(CFLAGS) -c $< -o $@ 15 | 16 | $(OUT): $(OBJ) 17 | ar rcs $(OUT) $(OBJ) 18 | 19 | test: $(OUT) test.c 20 | $(CC) $(INCLUDES) $(CFLAGS) test.c -o test $(OUT) 21 | 22 | clean: 23 | rm -f $(OBJ) $(OUT) test 24 | -------------------------------------------------------------------------------- /evthr/Makefile: -------------------------------------------------------------------------------- 1 | SRC = evthr.c 2 | OUT = libevthr.a 3 | OBJ = $(SRC:.c=.o) 4 | INCLUDES = -I. 5 | CFLAGS += -Wall -Wextra -ggdb 6 | LDFLAGS += -ggdb 7 | CC = gcc 8 | 9 | .SUFFIXES: .c 10 | 11 | default: $(OUT) 12 | 13 | .c.o: 14 | $(CC) $(INCLUDES) $(CFLAGS) -c $< -o $@ 15 | 16 | $(OUT): $(OBJ) 17 | ar rcs $(OUT) $(OBJ) 18 | 19 | test: $(OUT) test.c 20 | $(CC) $(INCLUDES) $(CFLAGS) test.c -o test $(OUT) -levent -levent_pthreads -lpthread 21 | 22 | clean: 23 | rm -f $(OBJ) $(OUT) test 24 | 25 | -------------------------------------------------------------------------------- /oniguruma/sample/Makefile.am: -------------------------------------------------------------------------------- 1 | noinst_PROGRAMS = encode listcap names posix simple sql syntax crnl 2 | 3 | libname = $(top_builddir)/libonig.la 4 | LDADD = $(libname) 5 | INCLUDES = -I$(top_srcdir) -I$(includedir) 6 | 7 | encode_SOURCES = encode.c 8 | listcap_SOURCES = listcap.c 9 | names_SOURCES = names.c 10 | posix_SOURCES = posix.c 11 | simple_SOURCES = simple.c 12 | sql_SOURCES = sql.c 13 | syntax_SOURCES = syntax.c 14 | 15 | 16 | sampledir = $(top_builddir)/sample 17 | 18 | test: encode listcap names posix simple sql syntax 19 | @$(sampledir)/encode 20 | @$(sampledir)/listcap 21 | @$(sampledir)/names 22 | @$(sampledir)/posix 23 | @$(sampledir)/simple 24 | @$(sampledir)/sql 25 | @$(sampledir)/syntax 26 | -------------------------------------------------------------------------------- /examples/test_basic.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | void 9 | testcb(evhtp_request_t * req, void * a) { 10 | const char * str = a; 11 | 12 | evbuffer_add_printf(req->buffer_out, "%s", str); 13 | evhtp_send_reply(req, EVHTP_RES_OK); 14 | } 15 | 16 | int 17 | main(int argc, char ** argv) { 18 | evbase_t * evbase = event_base_new(); 19 | evhtp_t * htp = evhtp_new(evbase, NULL); 20 | 21 | evhtp_set_cb(htp, "/simple/", testcb, "simple"); 22 | evhtp_set_cb(htp, "/1/ping", testcb, "one"); 23 | evhtp_set_cb(htp, "/1/ping.json", testcb, "two"); 24 | #ifndef EVHTP_DISABLE_EVTHR 25 | evhtp_use_threads(htp, NULL, 4, NULL); 26 | #endif 27 | evhtp_bind_socket(htp, "0.0.0.0", 8081, 1024); 28 | 29 | event_base_loop(evbase, 0); 30 | 31 | evhtp_unbind_socket(htp); 32 | evhtp_free(htp); 33 | event_base_free(evbase); 34 | 35 | return 0; 36 | } 37 | 38 | -------------------------------------------------------------------------------- /oniguruma/config.h.in: -------------------------------------------------------------------------------- 1 | #cmakedefine CRAY_STACKSEG_END 1 2 | #cmakedefine C_ALLOCA 1 3 | #cmakedefine HAVE_ALLOCA 1 4 | #cmakedefine HAVE_ALLOCA_H 1 5 | #cmakedefine HAVE_DLFCN_H 1 6 | #cmakedefine HAVE_INTTYPES_H 1 7 | #cmakedefine HAVE_MEMORY_H 1 8 | #cmakedefine HAVE_PROTOTYPES 1 9 | #cmakedefine HAVE_STDARG_PROTOTYPES 1 10 | #cmakedefine HAVE_STDINT_H 1 11 | #cmakedefine HAVE_STDLIB_H 1 12 | #cmakedefine HAVE_STRINGS_H 1 13 | #cmakedefine HAVE_STRING_H 1 14 | #cmakedefine HAVE_SYS_STAT_H 1 15 | #cmakedefine HAVE_SYS_TIMES_H 1 16 | #cmakedefine HAVE_SYS_TIME_H 1 17 | #cmakedefine HAVE_SYS_TYPES_H 1 18 | #cmakedefine HAVE_UNISTD_H 1 19 | #cmakedefine LT_OBJDIR 1 20 | #cmakedefine PACKAGE 1 21 | #cmakedefine PACKAGE_BUGREPORT 1 22 | #cmakedefine PACKAGE_NAME 1 23 | #cmakedefine PACKAGE_STRING 1 24 | #cmakedefine PACKAGE_TARNAME 1 25 | #cmakedefine PACKAGE_VERSION 1 26 | #cmakedefine SIZEOF_INT 1 27 | #cmakedefine SIZEOF_LONG 1 28 | #cmakedefine SIZEOF_SHORT 1 29 | #cmakedefine STACK_DIRECTION 1 30 | #cmakedefine STDC_HEADERS 1 31 | #cmakedefine TIME_WITH_SYS_TIME 1 32 | #cmakedefine USE_COMBINATION_EXPLOSION_CHECK 1 33 | #cmakedefine USE_CRNL_AS_LINE_TERMINATOR 1 34 | #cmakedefine VERSION 1 35 | -------------------------------------------------------------------------------- /CMakeModules/BaseConfig.cmake: -------------------------------------------------------------------------------- 1 | if (CMAKE_COMPILER_IS_GNUCC) 2 | 3 | set(RSN_BASE_C_FLAGS "-Wall -fno-strict-aliasing") 4 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${RSN_BASE_C_FLAGS} -DPROJECT_VERSION=\"${PROJECT_VERSION}\"") 5 | set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${RSN_BASE_C_FLAGS} -ggdb") 6 | set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${RSN_BASE_C_FLAGS}") 7 | 8 | if(APPLE) 9 | # Newer versions of OSX will spew a bunch of warnings about deprecated ssl functions, 10 | # this should be addressed at some point in time, but for now, just ignore them. 11 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_BSD_SOURCE -Wno-deprecated-declarations") 12 | elseif (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD") 13 | # XXX Should I set POSIX_C_SOURCE? 14 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_BSD_SOURCE") 15 | elseif(UNIX) 16 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_BSD_SOURCE -D_POSIX_C_SOURCE=200112") 17 | endif(APPLE) 18 | 19 | endif(CMAKE_COMPILER_IS_GNUCC) 20 | 21 | if (EVHTP_DISABLE_EVTHR) 22 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DEVHTP_DISABLE_EVTHR") 23 | endif(EVHTP_DISABLE_EVTHR) 24 | 25 | if (EVHTP_DISABLE_SSL) 26 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DEVHTP_DISABLE_SSL") 27 | endif(EVHTP_DISABLE_SSL) 28 | 29 | if (NOT CMAKE_BUILD_TYPE) 30 | set(CMAKE_BUILD_TYPE Release) 31 | endif(NOT CMAKE_BUILD_TYPE) 32 | -------------------------------------------------------------------------------- /CMakeModules/FindLibEvent.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find the LibEvent config processing library 2 | # Once done this will define 3 | # 4 | # LIBEVENT_FOUND - System has LibEvent 5 | # LIBEVENT_INCLUDE_DIR - the LibEvent include directory 6 | # LIBEVENT_LIBRARIES 0 The libraries needed to use LibEvent 7 | 8 | FIND_PATH(LIBEVENT_INCLUDE_DIR NAMES event.h) 9 | FIND_LIBRARY(LIBEVENT_LIBRARY NAMES event) 10 | FIND_LIBRARY(LIBEVENT_CORE_LIBRARY NAMES event_core) 11 | FIND_LIBRARY(LIBEVENT_PTHREADS_LIBRARY NAMES event_pthreads) 12 | FIND_LIBRARY(LIBEVENT_EXTRA_LIBRARY NAMES event_extra) 13 | 14 | if (NOT EVHTP_DISABLE_SSL) 15 | FIND_LIBRARY(LIBEVENT_OPENSSL_LIBRARY NAMES event_openssl) 16 | endif() 17 | 18 | 19 | INCLUDE(FindPackageHandleStandardArgs) 20 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibEvent DEFAULT_MSG LIBEVENT_LIBRARY LIBEVENT_INCLUDE_DIR) 21 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibEventPthreads DEFAULT_MSG LIBEVENT_PTHREADS_LIBRARY LIBEVENT_INCLUDE_DIR) 22 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibEventCore DEFAULT_MSG LIBEVENT_CORE_LIBRARY LIBEVENT_INCLUDE_DIR) 23 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibEventExtra DEFAULT_MSG LIBEVENT_EXTRA_LIBRARY LIBEVENT_INCLUDE_DIR) 24 | 25 | if (NOT EVHTP_DISABLE_SSL) 26 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibEventOpenssl DEFAULT_MSG LIBEVENT_OPENSSL_LIBRARY LIBEVENT_INCLUDE_DIR) 27 | endif() 28 | 29 | MARK_AS_ADVANCED(LIBEVENT_INCLUDE_DIR LIBEVENT_LIBRARY LIBEVENT_PTHREADS_LIBRARY LIBEVENT_OPENSSL_LIBRARY LIBEVENT_CORE_LIBRARY LIBEVENT_EXTRA_LIBRARY) 30 | -------------------------------------------------------------------------------- /oniguruma/COPYING: -------------------------------------------------------------------------------- 1 | Oniguruma LICENSE 2 | ----------------- 3 | 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | -------------------------------------------------------------------------------- /examples/test_vhost.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | void 9 | testcb(evhtp_request_t * req, void * a) { 10 | evbuffer_add_reference(req->buffer_out, "foobar", 6, NULL, NULL); 11 | evhtp_send_reply(req, EVHTP_RES_OK); 12 | } 13 | 14 | int 15 | main(int argc, char ** argv) { 16 | evbase_t * evbase = event_base_new(); 17 | evhtp_t * evhtp = evhtp_new(evbase, NULL); 18 | evhtp_t * v1 = evhtp_new(evbase, NULL); 19 | evhtp_t * v2 = evhtp_new(evbase, NULL); 20 | 21 | evhtp_set_cb(v1, "/host1", NULL, "host1.com"); 22 | evhtp_set_cb(v2, "/localhost", testcb, "localhost"); 23 | 24 | evhtp_add_vhost(evhtp, "host1.com", v1); 25 | evhtp_add_vhost(evhtp, "localhost", v2); 26 | 27 | evhtp_add_alias(v2, "127.0.0.1"); 28 | evhtp_add_alias(v2, "localhost"); 29 | evhtp_add_alias(v2, "localhost:8081"); 30 | 31 | #if 0 32 | scfg1.pemfile = "./server.pem"; 33 | scfg1.privfile = "./server.pem"; 34 | scfg2.pemfile = "./server1.pem"; 35 | scfg2.pemfile = "./server1.pem"; 36 | 37 | evhtp_ssl_init(evhtp, &scfg1); 38 | evhtp_ssl_init(v1, &scfg2); 39 | evhtp_ssl_init(v2, &scfg2); 40 | #endif 41 | 42 | evhtp_bind_socket(evhtp, "0.0.0.0", 8081, 1024); 43 | 44 | event_base_loop(evbase, 0); 45 | 46 | evhtp_unbind_socket(evhtp); 47 | evhtp_free(v2); 48 | evhtp_free(v1); 49 | evhtp_free(evhtp); 50 | event_base_free(evbase); 51 | 52 | return 0; 53 | } 54 | 55 | -------------------------------------------------------------------------------- /oniguruma/sample/simple.c: -------------------------------------------------------------------------------- 1 | /* 2 | * simple.c 3 | */ 4 | #include 5 | #include 6 | #include "oniguruma.h" 7 | 8 | extern int main(int argc, char* argv[]) 9 | { 10 | int r; 11 | unsigned char *start, *range, *end; 12 | regex_t* reg; 13 | OnigErrorInfo einfo; 14 | OnigRegion *region; 15 | 16 | static UChar* pattern = (UChar* )"a(.*)b|[e-f]+"; 17 | static UChar* str = (UChar* )"zzzzaffffffffb"; 18 | 19 | r = onig_new(®, pattern, pattern + strlen((char* )pattern), 20 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); 21 | if (r != ONIG_NORMAL) { 22 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 23 | onig_error_code_to_str(s, r, &einfo); 24 | fprintf(stderr, "ERROR: %s\n", s); 25 | return -1; 26 | } 27 | 28 | region = onig_region_new(); 29 | 30 | end = str + strlen((char* )str); 31 | start = str; 32 | range = end; 33 | r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); 34 | if (r >= 0) { 35 | int i; 36 | 37 | fprintf(stderr, "match at %d\n", r); 38 | for (i = 0; i < region->num_regs; i++) { 39 | fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); 40 | } 41 | } 42 | else if (r == ONIG_MISMATCH) { 43 | fprintf(stderr, "search fail\n"); 44 | } 45 | else { /* error */ 46 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 47 | onig_error_code_to_str(s, r); 48 | fprintf(stderr, "ERROR: %s\n", s); 49 | return -1; 50 | } 51 | 52 | onig_region_free(region, 1 /* 1:free self, 0:free contents only */); 53 | onig_free(reg); 54 | onig_end(); 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /oniguruma/onig-config.in: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (C) 2006 K.Kosako (sndgk393 AT ybb DOT ne DOT jp) 3 | 4 | ONIG_VERSION=@PACKAGE_VERSION@ 5 | 6 | show_usage() 7 | { 8 | cat < 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | static void 10 | _test_cb_1(evthr_t * thr, void * cmdarg, void * shared) { 11 | printf("START _test_cb_1 (%u)\n", (unsigned int)pthread_self()); 12 | sleep(1); 13 | printf("END _test_cb_1 (%u)\n", (unsigned int)pthread_self()); 14 | } 15 | 16 | int 17 | main(int argc, char ** argv) { 18 | evthr_pool_t * pool = NULL; 19 | int i = 0; 20 | 21 | pool = evthr_pool_new(8, NULL, NULL); 22 | 23 | evthr_pool_start(pool); 24 | 25 | while (1) { 26 | if (i++ >= 5) { 27 | break; 28 | } 29 | 30 | printf("Iter %d\n", i); 31 | 32 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 33 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 34 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 35 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 36 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 37 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 38 | 39 | sleep(2); 40 | } 41 | 42 | evthr_pool_stop(pool); 43 | evthr_pool_free(pool); 44 | 45 | pool = evthr_pool_new(2, NULL, NULL); 46 | i = 0; 47 | 48 | evthr_pool_set_max_backlog(pool, 1); 49 | evthr_pool_start(pool); 50 | 51 | while (1) { 52 | if (i++ >= 5) { 53 | break; 54 | } 55 | 56 | printf("Iter %d\n", i); 57 | 58 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 59 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 60 | printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); 61 | } 62 | 63 | evthr_pool_stop(pool); 64 | evthr_pool_free(pool); 65 | 66 | return 0; 67 | } /* main */ 68 | 69 | -------------------------------------------------------------------------------- /evthr/README: -------------------------------------------------------------------------------- 1 | Libevthr is an API which manages threads and thread-pools in an event based 2 | manner. This API requires libevent with threading support. 3 | 4 | Libevthr works a bit differently than most thread management systems. Instead of 5 | conditional signalling and some type of pre-thread queue, Libevthr uses a 6 | deferral type mechanism. That is, a thread is always running, abstracted to a 7 | point where you "defer" your function *into* a thread. 8 | 9 | For example you can start up a single thread with a backlog of 10 (a backlog 10 | being the max number of outstanding callbacks to run within the thread), and 11 | execute a function you would like to run inside the thread one or many times. 12 | The act of deferrals is non-blocking. 13 | 14 | Example Code for evthrs: 15 | 16 | evthr_t * thr = evthr_new(10, NULL); 17 | 18 | if (evthr_start(thr) < 0) { 19 | exit(1); 20 | } 21 | 22 | evthr_defer(thr, my_cb_1, NULL); 23 | evthr_defer(thr, my_cb_2, NULL); 24 | evthr_defer(thr, my_cb_3, NULL); 25 | 26 | sleep(n_seconds); 27 | 28 | evthr_stop(thr); 29 | 30 | Libevthr also has the ability to create pools using the same methods that a 31 | single evthr has. For example, if you would like to create 10 threads, each 32 | with a backlog of 5: 33 | 34 | evthr_pool_t * thr_pool = evthr_pool_new(10, 5, NULL); 35 | 36 | if (evthr_pool_start(thr_pool) < 0) { 37 | exit(1); 38 | } 39 | 40 | evthr_pool_defer(thr_pool, my_cb_1, NULL); 41 | evthr_pool_defer(thr_pool, my_cb_2, NULL); 42 | evthr_pool_defer(thr_pool, my_cb_3, NULL); 43 | 44 | Your callback functions which you defer must be of type "evthr_cb", or 45 | "void cb_name(void * arg, void * shared)". In this case, the "arg" variable is 46 | the data you passed as the third argument to either evthr_pool_defer, or 47 | evthr_defer. The "shared" variable is the data that was either the second 48 | variable in evthr_new(), or the third variable in evthr_pool_new(). 49 | 50 | The gist of this is to allow a global dataset, along with deferred specific 51 | data. 52 | 53 | See test.c for a quick example. 54 | -------------------------------------------------------------------------------- /oniguruma/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) 2 | include_directories(${CMAKE_CURRENT_BINARY_DIR}) 3 | 4 | INCLUDE (CheckFunctionExists) 5 | INCLUDE (CheckIncludeFiles) 6 | INCLUDE (CheckTypeSize) 7 | 8 | CHECK_FUNCTION_EXISTS(alloca C_ALLOCA) 9 | CHECK_FUNCTION_EXISTS(memcmp HAVE_MEMCMP) 10 | 11 | CHECK_INCLUDE_FILES(alloca.h HAVE_ALLOCA_H) 12 | CHECK_INCLUDE_FILES(strings.h HAVE_STRINGS_H) 13 | CHECK_INCLUDE_FILES(string.h HAVE_STRING_H) 14 | CHECK_INCLUDE_FILES(stdlib.h HAVE_STDLIB_H) 15 | CHECK_INCLUDE_FILES(sys/time.h HAVE_SYS_TIME_H) 16 | CHECK_INCLUDE_FILES(sys/times.h HAVE_SYS_TIMES_H) 17 | CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) 18 | CHECK_INCLUDE_FILES(memory.h HAVE_MEMORY_H) 19 | CHECK_INCLUDE_FILES(stdarg.h HAVE_STDARG_PROTOTYPES) 20 | 21 | CHECK_TYPE_SIZE("int" SIZEOF_INT) 22 | CHECK_TYPE_SIZE("long" SIZEOF_LONG) 23 | CHECK_TYPE_SIZE("short" SIZEOF_SHORT) 24 | 25 | CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) 26 | 27 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) 28 | 29 | set(SOURCES regint.h regparse.h regenc.h st.h 30 | regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c 31 | regenc.c regsyntax.c regtrav.c regversion.c st.c 32 | regposix.c regposerr.c 33 | enc/unicode.c enc/ascii.c enc/utf8.c 34 | enc/utf16_be.c enc/utf16_le.c 35 | enc/utf32_be.c enc/utf32_le.c 36 | enc/euc_jp.c enc/sjis.c enc/iso8859_1.c 37 | enc/iso8859_2.c enc/iso8859_3.c 38 | enc/iso8859_4.c enc/iso8859_5.c 39 | enc/iso8859_6.c enc/iso8859_7.c 40 | enc/iso8859_8.c enc/iso8859_9.c 41 | enc/iso8859_10.c enc/iso8859_11.c 42 | enc/iso8859_13.c enc/iso8859_14.c 43 | enc/iso8859_15.c enc/iso8859_16.c 44 | enc/euc_tw.c enc/euc_kr.c enc/big5.c 45 | enc/gb18030.c enc/koi8_r.c enc/cp1251.c) 46 | 47 | 48 | add_library(libonig STATIC ${SOURCES}) 49 | set_target_properties(libonig PROPERTIES OUTPUT_NAME "libonig") 50 | -------------------------------------------------------------------------------- /oniguruma/sample/syntax.c: -------------------------------------------------------------------------------- 1 | /* 2 | * syntax.c 3 | */ 4 | #include 5 | #include 6 | #include "oniguruma.h" 7 | 8 | extern int exec(OnigSyntaxType* syntax, 9 | char* apattern, char* astr) 10 | { 11 | int r; 12 | unsigned char *start, *range, *end; 13 | regex_t* reg; 14 | OnigErrorInfo einfo; 15 | OnigRegion *region; 16 | UChar* pattern = (UChar* )apattern; 17 | UChar* str = (UChar* )astr; 18 | 19 | r = onig_new(®, pattern, pattern + strlen((char* )pattern), 20 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); 21 | if (r != ONIG_NORMAL) { 22 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 23 | onig_error_code_to_str(s, r, &einfo); 24 | fprintf(stderr, "ERROR: %s\n", s); 25 | return -1; 26 | } 27 | 28 | region = onig_region_new(); 29 | 30 | end = str + strlen((char* )str); 31 | start = str; 32 | range = end; 33 | r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); 34 | if (r >= 0) { 35 | int i; 36 | 37 | fprintf(stderr, "match at %d\n", r); 38 | for (i = 0; i < region->num_regs; i++) { 39 | fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); 40 | } 41 | } 42 | else if (r == ONIG_MISMATCH) { 43 | fprintf(stderr, "search fail\n"); 44 | } 45 | else { /* error */ 46 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 47 | onig_error_code_to_str(s, r); 48 | fprintf(stderr, "ERROR: %s\n", s); 49 | return -1; 50 | } 51 | 52 | onig_region_free(region, 1 /* 1:free self, 0:free contents only */); 53 | onig_free(reg); 54 | onig_end(); 55 | return 0; 56 | } 57 | 58 | extern int main(int argc, char* argv[]) 59 | { 60 | int r; 61 | 62 | r = exec(ONIG_SYNTAX_PERL, 63 | "\\p{XDigit}\\P{XDigit}\\p{^XDigit}\\P{^XDigit}\\p{XDigit}", 64 | "bgh3a"); 65 | 66 | r = exec(ONIG_SYNTAX_JAVA, 67 | "\\p{XDigit}\\P{XDigit}[a-c&&b-g]", "bgc"); 68 | 69 | r = exec(ONIG_SYNTAX_ASIS, 70 | "abc def* e+ g?ddd[a-rvvv] (vv){3,7}hv\\dvv(?:aczui ss)\\W\\w$", 71 | "abc def* e+ g?ddd[a-rvvv] (vv){3,7}hv\\dvv(?:aczui ss)\\W\\w$"); 72 | onig_end(); 73 | return 0; 74 | } 75 | -------------------------------------------------------------------------------- /evthr/evthr.h: -------------------------------------------------------------------------------- 1 | #ifndef _GNU_SOURCE 2 | #define _GNU_SOURCE 1 3 | #endif 4 | #ifndef __EVTHR_H__ 5 | #define __EVTHR_H__ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | enum evthr_res { 18 | EVTHR_RES_OK = 0, 19 | EVTHR_RES_BACKLOG, 20 | EVTHR_RES_RETRY, 21 | EVTHR_RES_NOCB, 22 | EVTHR_RES_FATAL 23 | }; 24 | 25 | struct evthr_pool; 26 | struct evthr; 27 | 28 | typedef struct event_base evbase_t; 29 | typedef struct event ev_t; 30 | 31 | typedef struct evthr_pool evthr_pool_t; 32 | typedef struct evthr evthr_t; 33 | typedef enum evthr_res evthr_res; 34 | 35 | typedef void (*evthr_cb)(evthr_t * thr, void * cmd_arg, void * shared); 36 | typedef void (*evthr_init_cb)(evthr_t * thr, void * shared); 37 | 38 | evthr_t * evthr_new(evthr_init_cb init_cb, void * arg); 39 | evbase_t * evthr_get_base(evthr_t * thr); 40 | void evthr_set_aux(evthr_t * thr, void * aux); 41 | void * evthr_get_aux(evthr_t * thr); 42 | int evthr_start(evthr_t * evthr); 43 | evthr_res evthr_stop(evthr_t * evthr); 44 | evthr_res evthr_defer(evthr_t * evthr, evthr_cb cb, void * arg); 45 | void evthr_free(evthr_t * evthr); 46 | void evthr_inc_backlog(evthr_t * evthr); 47 | void evthr_dec_backlog(evthr_t * evthr); 48 | int evthr_get_backlog(evthr_t * evthr); 49 | void evthr_set_max_backlog(evthr_t * evthr, int max); 50 | int evthr_set_backlog(evthr_t *, int); 51 | 52 | evthr_pool_t * evthr_pool_new(int nthreads, evthr_init_cb init_cb, void * shared); 53 | int evthr_pool_start(evthr_pool_t * pool); 54 | evthr_res evthr_pool_stop(evthr_pool_t * pool); 55 | evthr_res evthr_pool_defer(evthr_pool_t * pool, evthr_cb cb, void * arg); 56 | void evthr_pool_free(evthr_pool_t * pool); 57 | void evthr_pool_set_max_backlog(evthr_pool_t * evthr, int max); 58 | int evthr_pool_set_backlog(evthr_pool_t *, int); 59 | 60 | #ifdef __cplusplus 61 | } 62 | #endif 63 | 64 | #endif /* __EVTHR_H__ */ 65 | 66 | -------------------------------------------------------------------------------- /oniguruma/sample/names.c: -------------------------------------------------------------------------------- 1 | /* 2 | * names.c -- example of group name callback. 3 | */ 4 | #include 5 | #include 6 | #include "oniguruma.h" 7 | 8 | static int 9 | name_callback(const UChar* name, const UChar* name_end, 10 | int ngroup_num, int* group_nums, 11 | regex_t* reg, void* arg) 12 | { 13 | int i, gn, ref; 14 | char* s; 15 | OnigRegion *region = (OnigRegion* )arg; 16 | 17 | for (i = 0; i < ngroup_num; i++) { 18 | gn = group_nums[i]; 19 | ref = onig_name_to_backref_number(reg, name, name_end, region); 20 | s = (ref == gn ? "*" : ""); 21 | fprintf(stderr, "%s (%d): ", name, gn); 22 | fprintf(stderr, "(%d-%d) %s\n", region->beg[gn], region->end[gn], s); 23 | } 24 | return 0; /* 0: continue */ 25 | } 26 | 27 | extern int main(int argc, char* argv[]) 28 | { 29 | int r; 30 | unsigned char *start, *range, *end; 31 | regex_t* reg; 32 | OnigErrorInfo einfo; 33 | OnigRegion *region; 34 | 35 | static UChar* pattern = (UChar* )"(?a*)(?b*)(?c*)"; 36 | static UChar* str = (UChar* )"aaabbbbcc"; 37 | 38 | r = onig_new(®, pattern, pattern + strlen((char* )pattern), 39 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); 40 | if (r != ONIG_NORMAL) { 41 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 42 | onig_error_code_to_str(s, r, &einfo); 43 | fprintf(stderr, "ERROR: %s\n", s); 44 | return -1; 45 | } 46 | 47 | fprintf(stderr, "number of names: %d\n", onig_number_of_names(reg)); 48 | 49 | region = onig_region_new(); 50 | 51 | end = str + strlen((char* )str); 52 | start = str; 53 | range = end; 54 | r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); 55 | if (r >= 0) { 56 | fprintf(stderr, "match at %d\n\n", r); 57 | r = onig_foreach_name(reg, name_callback, (void* )region); 58 | } 59 | else if (r == ONIG_MISMATCH) { 60 | fprintf(stderr, "search fail\n"); 61 | } 62 | else { /* error */ 63 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 64 | onig_error_code_to_str(s, r); 65 | return -1; 66 | } 67 | 68 | onig_region_free(region, 1 /* 1:free self, 0:free contents only */); 69 | onig_free(reg); 70 | onig_end(); 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /oniguruma/regversion.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | regversion.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "config.h" 31 | #include "oniguruma.h" 32 | #include 33 | 34 | extern const char* 35 | onig_version(void) 36 | { 37 | static char s[12]; 38 | 39 | sprintf(s, "%d.%d.%d", 40 | ONIGURUMA_VERSION_MAJOR, 41 | ONIGURUMA_VERSION_MINOR, 42 | ONIGURUMA_VERSION_TEENY); 43 | return s; 44 | } 45 | 46 | extern const char* 47 | onig_copyright(void) 48 | { 49 | static char s[58]; 50 | 51 | sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2008 K.Kosako", 52 | ONIGURUMA_VERSION_MAJOR, 53 | ONIGURUMA_VERSION_MINOR, 54 | ONIGURUMA_VERSION_TEENY); 55 | return s; 56 | } 57 | -------------------------------------------------------------------------------- /examples/test_client.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | static void 10 | request_cb(evhtp_request_t * req, void * arg) { 11 | printf("hi %zu\n", evbuffer_get_length(req->buffer_in)); 12 | } 13 | 14 | static evhtp_res 15 | print_data(evhtp_request_t * req, evbuf_t * buf, void * arg) { 16 | printf("Got %zu bytes\n", evbuffer_get_length(buf)); 17 | 18 | return EVHTP_RES_OK; 19 | } 20 | 21 | static evhtp_res 22 | print_new_chunk_len(evhtp_request_t * req, uint64_t len, void * arg) { 23 | printf("started new chunk, %" PRIu64 " bytes\n", len); 24 | 25 | return EVHTP_RES_OK; 26 | } 27 | 28 | static evhtp_res 29 | print_chunk_complete(evhtp_request_t * req, void * arg) { 30 | printf("ended a single chunk\n"); 31 | 32 | return EVHTP_RES_OK; 33 | } 34 | 35 | static evhtp_res 36 | print_chunks_complete(evhtp_request_t * req, void * arg) { 37 | printf("all chunks read\n"); 38 | 39 | return EVHTP_RES_OK; 40 | } 41 | 42 | int 43 | main(int argc, char ** argv) { 44 | evbase_t * evbase; 45 | evhtp_connection_t * conn; 46 | evhtp_request_t * request; 47 | 48 | evbase = event_base_new(); 49 | conn = evhtp_connection_new(evbase, "75.126.169.52", 80); 50 | request = evhtp_request_new(request_cb, evbase); 51 | 52 | evhtp_set_hook(&request->hooks, evhtp_hook_on_read, print_data, evbase); 53 | evhtp_set_hook(&request->hooks, evhtp_hook_on_new_chunk, print_new_chunk_len, NULL); 54 | evhtp_set_hook(&request->hooks, evhtp_hook_on_chunk_complete, print_chunk_complete, NULL); 55 | evhtp_set_hook(&request->hooks, evhtp_hook_on_chunks_complete, print_chunks_complete, NULL); 56 | 57 | evhtp_headers_add_header(request->headers_out, 58 | evhtp_header_new("Host", "ieatfood.net", 0, 0)); 59 | evhtp_headers_add_header(request->headers_out, 60 | evhtp_header_new("User-Agent", "libevhtp", 0, 0)); 61 | evhtp_headers_add_header(request->headers_out, 62 | evhtp_header_new("Connection", "close", 0, 0)); 63 | 64 | evhtp_make_request(conn, request, htp_method_GET, "/"); 65 | 66 | event_base_loop(evbase, 0); 67 | event_base_free(evbase); 68 | 69 | return 0; 70 | } 71 | 72 | -------------------------------------------------------------------------------- /oniguruma/win32/config.h: -------------------------------------------------------------------------------- 1 | #define STDC_HEADERS 1 2 | #define HAVE_SYS_TYPES_H 1 3 | #define HAVE_SYS_STAT_H 1 4 | #define HAVE_STDLIB_H 1 5 | #define HAVE_STRING_H 1 6 | #define HAVE_MEMORY_H 1 7 | #define HAVE_FLOAT_H 1 8 | #define HAVE_OFF_T 1 9 | #define SIZEOF_INT 4 10 | #define SIZEOF_SHORT 2 11 | #define SIZEOF_LONG 4 12 | #define SIZEOF_LONG_LONG 0 13 | #define SIZEOF___INT64 8 14 | #define SIZEOF_OFF_T 4 15 | #define SIZEOF_VOIDP 4 16 | #define SIZEOF_FLOAT 4 17 | #define SIZEOF_DOUBLE 8 18 | #define HAVE_PROTOTYPES 1 19 | #define TOKEN_PASTE(x,y) x##y 20 | #define HAVE_STDARG_PROTOTYPES 1 21 | #ifndef NORETURN 22 | #if _MSC_VER > 1100 23 | #define NORETURN(x) __declspec(noreturn) x 24 | #else 25 | #define NORETURN(x) x 26 | #endif 27 | #endif 28 | #define HAVE_DECL_SYS_NERR 1 29 | #define STDC_HEADERS 1 30 | #define HAVE_STDLIB_H 1 31 | #define HAVE_STRING_H 1 32 | #define HAVE_LIMITS_H 1 33 | #define HAVE_FCNTL_H 1 34 | #define HAVE_SYS_UTIME_H 1 35 | #define HAVE_MEMORY_H 1 36 | #define uid_t int 37 | #define gid_t int 38 | #define HAVE_STRUCT_STAT_ST_RDEV 1 39 | #define HAVE_ST_RDEV 1 40 | #define GETGROUPS_T int 41 | #define RETSIGTYPE void 42 | #define HAVE_ALLOCA 1 43 | #define HAVE_DUP2 1 44 | #define HAVE_MEMCMP 1 45 | #define HAVE_MEMMOVE 1 46 | #define HAVE_MKDIR 1 47 | #define HAVE_STRCASECMP 1 48 | #define HAVE_STRNCASECMP 1 49 | #define HAVE_STRERROR 1 50 | #define HAVE_STRFTIME 1 51 | #define HAVE_STRCHR 1 52 | #define HAVE_STRSTR 1 53 | #define HAVE_STRTOD 1 54 | #define HAVE_STRTOL 1 55 | #define HAVE_STRTOUL 1 56 | #define HAVE_FLOCK 1 57 | #define HAVE_VSNPRINTF 1 58 | #define HAVE_FINITE 1 59 | #define HAVE_FMOD 1 60 | #define HAVE_FREXP 1 61 | #define HAVE_HYPOT 1 62 | #define HAVE_MODF 1 63 | #define HAVE_WAITPID 1 64 | #define HAVE_CHSIZE 1 65 | #define HAVE_TIMES 1 66 | #define HAVE__SETJMP 1 67 | #define HAVE_TELLDIR 1 68 | #define HAVE_SEEKDIR 1 69 | #define HAVE_MKTIME 1 70 | #define HAVE_COSH 1 71 | #define HAVE_SINH 1 72 | #define HAVE_TANH 1 73 | #define HAVE_EXECVE 1 74 | #define HAVE_TZNAME 1 75 | #define HAVE_DAYLIGHT 1 76 | #define SETPGRP_VOID 1 77 | #define inline __inline 78 | #define NEED_IO_SEEK_BETWEEN_RW 1 79 | #define RSHIFT(x,y) ((x)>>(int)y) 80 | #define FILE_COUNT _cnt 81 | #define FILE_READPTR _ptr 82 | #define DEFAULT_KCODE KCODE_NONE 83 | #define DLEXT ".so" 84 | #define DLEXT2 ".dll" 85 | -------------------------------------------------------------------------------- /oniguruma/sample/sql.c: -------------------------------------------------------------------------------- 1 | /* 2 | * sql.c 3 | */ 4 | #include 5 | #include 6 | #include "oniguruma.h" 7 | 8 | extern int main(int argc, char* argv[]) 9 | { 10 | static OnigSyntaxType SQLSyntax; 11 | 12 | int r; 13 | unsigned char *start, *range, *end; 14 | regex_t* reg; 15 | OnigErrorInfo einfo; 16 | OnigRegion *region; 17 | 18 | static UChar* pattern = (UChar* )"\\_%\\\\__zz"; 19 | static UChar* str = (UChar* )"a_abcabcabc\\ppzz"; 20 | 21 | onig_set_syntax_op (&SQLSyntax, ONIG_SYN_OP_VARIABLE_META_CHARACTERS); 22 | onig_set_syntax_op2 (&SQLSyntax, 0); 23 | onig_set_syntax_behavior(&SQLSyntax, 0); 24 | onig_set_syntax_options (&SQLSyntax, ONIG_OPTION_MULTILINE); 25 | onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ESCAPE, (OnigCodePoint )'\\'); 26 | onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR, (OnigCodePoint )'_'); 27 | onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYTIME, 28 | ONIG_INEFFECTIVE_META_CHAR); 29 | onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ZERO_OR_ONE_TIME, 30 | ONIG_INEFFECTIVE_META_CHAR); 31 | onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ONE_OR_MORE_TIME, 32 | ONIG_INEFFECTIVE_META_CHAR); 33 | onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR_ANYTIME, 34 | (OnigCodePoint )'%'); 35 | 36 | r = onig_new(®, pattern, pattern + strlen((char* )pattern), 37 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, &SQLSyntax, &einfo); 38 | if (r != ONIG_NORMAL) { 39 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 40 | onig_error_code_to_str(s, r, &einfo); 41 | fprintf(stderr, "ERROR: %s\n", s); 42 | return -1; 43 | } 44 | 45 | region = onig_region_new(); 46 | 47 | end = str + strlen((char* )str); 48 | start = str; 49 | range = end; 50 | r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); 51 | if (r >= 0) { 52 | int i; 53 | 54 | fprintf(stderr, "match at %d\n", r); 55 | for (i = 0; i < region->num_regs; i++) { 56 | fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); 57 | } 58 | } 59 | else if (r == ONIG_MISMATCH) { 60 | fprintf(stderr, "search fail\n"); 61 | } 62 | else { /* error */ 63 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 64 | onig_error_code_to_str(s, r); 65 | fprintf(stderr, "ERROR: %s\n", s); 66 | return -1; 67 | } 68 | 69 | onig_region_free(region, 1 /* 1:free self, 0:free contents only */); 70 | onig_free(reg); 71 | onig_end(); 72 | return 0; 73 | } 74 | -------------------------------------------------------------------------------- /oniguruma/enc/ascii.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | ascii.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2006 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static int 33 | ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) 34 | { 35 | if (code < 128) 36 | return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 37 | else 38 | return FALSE; 39 | } 40 | 41 | OnigEncodingType OnigEncodingASCII = { 42 | onigenc_single_byte_mbc_enc_len, 43 | "US-ASCII", /* name */ 44 | 1, /* max byte length */ 45 | 1, /* min byte length */ 46 | onigenc_is_mbc_newline_0x0a, 47 | onigenc_single_byte_mbc_to_code, 48 | onigenc_single_byte_code_to_mbclen, 49 | onigenc_single_byte_code_to_mbc, 50 | onigenc_ascii_mbc_case_fold, 51 | onigenc_ascii_apply_all_case_fold, 52 | onigenc_ascii_get_case_fold_codes_by_str, 53 | onigenc_minimum_property_name_to_ctype, 54 | ascii_is_code_ctype, 55 | onigenc_not_support_get_ctype_code_range, 56 | onigenc_single_byte_left_adjust_char_head, 57 | onigenc_always_true_is_allowed_reverse_match 58 | }; 59 | -------------------------------------------------------------------------------- /oniguruma/sample/posix.c: -------------------------------------------------------------------------------- 1 | /* 2 | * posix.c 3 | */ 4 | #include 5 | #include "onigposix.h" 6 | 7 | typedef unsigned char UChar; 8 | 9 | static int x(regex_t* reg, unsigned char* pattern, unsigned char* str) 10 | { 11 | int r, i; 12 | char buf[200]; 13 | regmatch_t pmatch[20]; 14 | 15 | r = regexec(reg, (char* )str, reg->re_nsub + 1, pmatch, 0); 16 | if (r != 0 && r != REG_NOMATCH) { 17 | regerror(r, reg, buf, sizeof(buf)); 18 | fprintf(stderr, "ERROR: %s\n", buf); 19 | return -1; 20 | } 21 | 22 | if (r == REG_NOMATCH) { 23 | fprintf(stderr, "FAIL: /%s/ '%s'\n", pattern, str); 24 | } 25 | else { 26 | fprintf(stderr, "OK: /%s/ '%s'\n", pattern, str); 27 | for (i = 0; i <= (int )reg->re_nsub; i++) { 28 | fprintf(stderr, "%d: %d-%d\n", i, pmatch[i].rm_so, pmatch[i].rm_eo); 29 | } 30 | } 31 | return 0; 32 | } 33 | 34 | extern int main(int argc, char* argv[]) 35 | { 36 | int r; 37 | char buf[200]; 38 | regex_t reg; 39 | UChar* pattern; 40 | 41 | /* default syntax (ONIG_SYNTAX_RUBY) */ 42 | pattern = (UChar* )"^a+b{2,7}[c-f]?$|uuu"; 43 | r = regcomp(®, (char* )pattern, REG_EXTENDED); 44 | if (r) { 45 | regerror(r, ®, buf, sizeof(buf)); 46 | fprintf(stderr, "ERROR: %s\n", buf); 47 | return -1; 48 | } 49 | x(®, pattern, (UChar* )"aaabbbbd"); 50 | 51 | /* POSIX Basic RE (REG_EXTENDED is not specified.) */ 52 | pattern = (UChar* )"^a+b{2,7}[c-f]?|uuu"; 53 | r = regcomp(®, (char* )pattern, 0); 54 | if (r) { 55 | regerror(r, ®, buf, sizeof(buf)); 56 | fprintf(stderr, "ERROR: %s\n", buf); 57 | return -1; 58 | } 59 | x(®, pattern, (UChar* )"a+b{2,7}d?|uuu"); 60 | 61 | /* POSIX Basic RE (REG_EXTENDED is not specified.) */ 62 | pattern = (UChar* )"^a*b\\{2,7\\}\\([c-f]\\)$"; 63 | r = regcomp(®, (char* )pattern, 0); 64 | if (r) { 65 | regerror(r, ®, buf, sizeof(buf)); 66 | fprintf(stderr, "ERROR: %s\n", buf); 67 | return -1; 68 | } 69 | x(®, pattern, (UChar* )"aaaabbbbbbd"); 70 | 71 | /* POSIX Extended RE */ 72 | onig_set_default_syntax(ONIG_SYNTAX_POSIX_EXTENDED); 73 | pattern = (UChar* )"^a+b{2,7}[c-f]?)$|uuu"; 74 | r = regcomp(®, (char* )pattern, REG_EXTENDED); 75 | if (r) { 76 | regerror(r, ®, buf, sizeof(buf)); 77 | fprintf(stderr, "ERROR: %s\n", buf); 78 | return -1; 79 | } 80 | x(®, pattern, (UChar* )"aaabbbbd)"); 81 | 82 | pattern = (UChar* )"^b."; 83 | r = regcomp(®, (char* )pattern, REG_EXTENDED | REG_NEWLINE); 84 | if (r) { 85 | regerror(r, ®, buf, sizeof(buf)); 86 | fprintf(stderr, "ERROR: %s\n", buf); 87 | return -1; 88 | } 89 | x(®, pattern, (UChar* )"a\nb\n"); 90 | 91 | regfree(®); 92 | return 0; 93 | } 94 | -------------------------------------------------------------------------------- /oniguruma/regtrav.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | regtrav.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2004 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regint.h" 31 | 32 | #ifdef USE_CAPTURE_HISTORY 33 | 34 | static int 35 | capture_tree_traverse(OnigCaptureTreeNode* node, int at, 36 | int(*callback_func)(int,int,int,int,int,void*), 37 | int level, void* arg) 38 | { 39 | int r, i; 40 | 41 | if (node == (OnigCaptureTreeNode* )0) 42 | return 0; 43 | 44 | if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) { 45 | r = (*callback_func)(node->group, node->beg, node->end, 46 | level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg); 47 | if (r != 0) return r; 48 | } 49 | 50 | for (i = 0; i < node->num_childs; i++) { 51 | r = capture_tree_traverse(node->childs[i], at, 52 | callback_func, level + 1, arg); 53 | if (r != 0) return r; 54 | } 55 | 56 | if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) { 57 | r = (*callback_func)(node->group, node->beg, node->end, 58 | level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg); 59 | if (r != 0) return r; 60 | } 61 | 62 | return 0; 63 | } 64 | #endif /* USE_CAPTURE_HISTORY */ 65 | 66 | extern int 67 | onig_capture_tree_traverse(OnigRegion* region, int at, 68 | int(*callback_func)(int,int,int,int,int,void*), void* arg) 69 | { 70 | #ifdef USE_CAPTURE_HISTORY 71 | return capture_tree_traverse(region->history_root, at, 72 | callback_func, 0, arg); 73 | #else 74 | return ONIG_NO_SUPPORT_CONFIG; 75 | #endif 76 | } 77 | -------------------------------------------------------------------------------- /oniguruma/Makefile.am: -------------------------------------------------------------------------------- 1 | ## Makefile.am for Oniguruma 2 | encdir = $(top_srcdir)/enc 3 | sampledir = $(top_srcdir)/sample 4 | libname = libonig.la 5 | 6 | #AM_CFLAGS = -DNOT_RUBY 7 | AM_CFLAGS = 8 | INCLUDES = -I$(top_srcdir) -I$(includedir) 9 | 10 | SUBDIRS = . sample 11 | 12 | include_HEADERS = oniguruma.h oniggnu.h onigposix.h 13 | lib_LTLIBRARIES = $(libname) 14 | 15 | libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \ 16 | regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \ 17 | regenc.c regsyntax.c regtrav.c regversion.c st.c \ 18 | regposix.c regposerr.c \ 19 | $(encdir)/unicode.c $(encdir)/ascii.c $(encdir)/utf8.c \ 20 | $(encdir)/utf16_be.c $(encdir)/utf16_le.c \ 21 | $(encdir)/utf32_be.c $(encdir)/utf32_le.c \ 22 | $(encdir)/euc_jp.c $(encdir)/sjis.c $(encdir)/iso8859_1.c \ 23 | $(encdir)/iso8859_2.c $(encdir)/iso8859_3.c \ 24 | $(encdir)/iso8859_4.c $(encdir)/iso8859_5.c \ 25 | $(encdir)/iso8859_6.c $(encdir)/iso8859_7.c \ 26 | $(encdir)/iso8859_8.c $(encdir)/iso8859_9.c \ 27 | $(encdir)/iso8859_10.c $(encdir)/iso8859_11.c \ 28 | $(encdir)/iso8859_13.c $(encdir)/iso8859_14.c \ 29 | $(encdir)/iso8859_15.c $(encdir)/iso8859_16.c \ 30 | $(encdir)/euc_tw.c $(encdir)/euc_kr.c $(encdir)/big5.c \ 31 | $(encdir)/gb18030.c $(encdir)/koi8_r.c $(encdir)/cp1251.c 32 | 33 | libonig_la_LDFLAGS = -version-info $(LTVERSION) 34 | 35 | EXTRA_DIST = HISTORY README.ja index.html index_ja.html \ 36 | doc/API doc/API.ja doc/RE doc/RE.ja doc/FAQ doc/FAQ.ja \ 37 | win32/Makefile win32/config.h win32/testc.c \ 38 | $(encdir)/koi8.c $(encdir)/mktable.c \ 39 | $(sampledir)/encode.c $(sampledir)/listcap.c $(sampledir)/names.c \ 40 | $(sampledir)/posix.c $(sampledir)/simple.c $(sampledir)/sql.c \ 41 | $(sampledir)/syntax.c 42 | 43 | bin_SCRIPTS = onig-config 44 | 45 | onig-config: onig-config.in 46 | 47 | dll: 48 | $(CXX) -shared -Wl,--output-def,libonig.def -o libonig.dll *.o \ 49 | $(LIBS) 50 | strip libonig.dll 51 | 52 | # Ruby TEST 53 | rtest: 54 | $(RUBYDIR)/ruby -w -Ke $(srcdir)/test.rb 55 | 56 | # character-types-table source generator 57 | mktable: $(encdir)/mktable.c $(srcdir)/regenc.h 58 | $(CC) -I$(top_srcdir) -o mktable $(encdir)/mktable.c 59 | 60 | 61 | # TEST 62 | TESTS = testc testp testcu 63 | 64 | check_PROGRAMS = testc testp testcu 65 | 66 | atest: testc testp testcu 67 | @echo "[Oniguruma API, ASCII/EUC-JP check]" 68 | @$(top_builddir)/testc | grep RESULT 69 | @echo "[POSIX API, ASCII/EUC-JP check]" 70 | @$(top_builddir)/testp | grep RESULT 71 | @echo "[Oniguruma API, UTF-16 check]" 72 | @$(top_builddir)/testcu | grep RESULT 73 | 74 | testc_SOURCES = testc.c 75 | testc_LDADD = libonig.la 76 | 77 | testp_SOURCES = testc.c 78 | testp_LDADD = libonig.la 79 | testp_CFLAGS = -DPOSIX_TEST 80 | 81 | testcu_SOURCES = testu.c 82 | testcu_LDADD = libonig.la 83 | 84 | 85 | #testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb 86 | # ruby -Ke $(srcdir)/testconv.rb < $(srcdir)/test.rb > $@ 87 | 88 | #testu.c: $(srcdir)/test.rb $(srcdir)/testconvu.rb 89 | # ruby -Ke $(srcdir)/testconvu.rb $(srcdir)/test.rb > $@ 90 | 91 | #win32/testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb 92 | # ruby -Ke $(srcdir)/testconv.rb -win < $(srcdir)/test.rb | nkf -cs > $@ 93 | 94 | ## END OF FILE 95 | -------------------------------------------------------------------------------- /oniguruma/sample/listcap.c: -------------------------------------------------------------------------------- 1 | /* 2 | * listcap.c 3 | * 4 | * capture history (?@...) sample. 5 | */ 6 | #include 7 | #include 8 | #include "oniguruma.h" 9 | 10 | static int 11 | node_callback(int group, int beg, int end, int level, int at, void* arg) 12 | { 13 | int i; 14 | 15 | if (at != ONIG_TRAVERSE_CALLBACK_AT_FIRST) 16 | return -1; /* error */ 17 | 18 | /* indent */ 19 | for (i = 0; i < level * 2; i++) 20 | fputc(' ', stderr); 21 | 22 | fprintf(stderr, "%d: (%d-%d)\n", group, beg, end); 23 | return 0; 24 | } 25 | 26 | extern int ex(unsigned char* str, unsigned char* pattern, 27 | OnigSyntaxType* syntax) 28 | { 29 | int r; 30 | unsigned char *start, *range, *end; 31 | regex_t* reg; 32 | OnigErrorInfo einfo; 33 | OnigRegion *region; 34 | 35 | r = onig_new(®, pattern, pattern + strlen((char* )pattern), 36 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); 37 | if (r != ONIG_NORMAL) { 38 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 39 | onig_error_code_to_str(s, r, &einfo); 40 | fprintf(stderr, "ERROR: %s\n", s); 41 | return -1; 42 | } 43 | 44 | fprintf(stderr, "number of captures: %d\n", onig_number_of_captures(reg)); 45 | fprintf(stderr, "number of capture histories: %d\n", 46 | onig_number_of_capture_histories(reg)); 47 | 48 | region = onig_region_new(); 49 | 50 | end = str + strlen((char* )str); 51 | start = str; 52 | range = end; 53 | r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); 54 | if (r >= 0) { 55 | int i; 56 | 57 | fprintf(stderr, "match at %d\n", r); 58 | for (i = 0; i < region->num_regs; i++) { 59 | fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); 60 | } 61 | fprintf(stderr, "\n"); 62 | 63 | r = onig_capture_tree_traverse(region, ONIG_TRAVERSE_CALLBACK_AT_FIRST, 64 | node_callback, (void* )0); 65 | } 66 | else if (r == ONIG_MISMATCH) { 67 | fprintf(stderr, "search fail\n"); 68 | } 69 | else { /* error */ 70 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 71 | onig_error_code_to_str(s, r); 72 | return -1; 73 | } 74 | 75 | onig_region_free(region, 1 /* 1:free self, 0:free contents only */); 76 | onig_free(reg); 77 | return 0; 78 | } 79 | 80 | 81 | extern int main(int argc, char* argv[]) 82 | { 83 | int r; 84 | OnigSyntaxType syn; 85 | 86 | static UChar* str1 = (UChar* )"((())())"; 87 | static UChar* pattern1 88 | = (UChar* )"\\g

(?@

\\(\\g\\)){0}(?@(?:\\g

)*|){0}"; 89 | 90 | static UChar* str2 = (UChar* )"x00x00x00"; 91 | static UChar* pattern2 = (UChar* )"(?@x(?@\\d+))+"; 92 | 93 | static UChar* str3 = (UChar* )"0123"; 94 | static UChar* pattern3 = (UChar* )"(?@.)(?@.)(?@.)(?@.)"; 95 | 96 | /* enable capture hostory */ 97 | onig_copy_syntax(&syn, ONIG_SYNTAX_DEFAULT); 98 | onig_set_syntax_op2(&syn, 99 | onig_get_syntax_op2(&syn) | ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY); 100 | 101 | r = ex(str1, pattern1, &syn); 102 | r = ex(str2, pattern2, &syn); 103 | r = ex(str3, pattern3, &syn); 104 | 105 | onig_end(); 106 | return 0; 107 | } 108 | -------------------------------------------------------------------------------- /oniguruma/oniggnu.h: -------------------------------------------------------------------------------- 1 | #ifndef ONIGGNU_H 2 | #define ONIGGNU_H 3 | /********************************************************************** 4 | oniggnu.h - Oniguruma (regular expression library) 5 | **********************************************************************/ 6 | /*- 7 | * Copyright (c) 2002-2005 K.Kosako 8 | * All rights reserved. 9 | * 10 | * Redistribution and use in source and binary forms, with or without 11 | * modification, are permitted provided that the following conditions 12 | * are met: 13 | * 1. Redistributions of source code must retain the above copyright 14 | * notice, this list of conditions and the following disclaimer. 15 | * 2. Redistributions in binary form must reproduce the above copyright 16 | * notice, this list of conditions and the following disclaimer in the 17 | * documentation and/or other materials provided with the distribution. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 | * SUCH DAMAGE. 30 | */ 31 | 32 | #include "oniguruma.h" 33 | 34 | #ifdef __cplusplus 35 | extern "C" { 36 | #endif 37 | 38 | #define RE_MBCTYPE_ASCII 0 39 | #define RE_MBCTYPE_EUC 1 40 | #define RE_MBCTYPE_SJIS 2 41 | #define RE_MBCTYPE_UTF8 3 42 | 43 | /* GNU regex options */ 44 | #ifndef RE_NREGS 45 | #define RE_NREGS ONIG_NREGION 46 | #endif 47 | 48 | #define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE 49 | #define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND 50 | #define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE 51 | #define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE 52 | #define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST 53 | #define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE) 54 | #define RE_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY 55 | #define RE_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE 56 | #define RE_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP 57 | #define RE_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP 58 | 59 | 60 | ONIG_EXTERN 61 | void re_mbcinit P_((int)); 62 | ONIG_EXTERN 63 | int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); 64 | ONIG_EXTERN 65 | int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); 66 | ONIG_EXTERN 67 | void re_free_pattern P_((struct re_pattern_buffer*)); 68 | ONIG_EXTERN 69 | int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int)); 70 | ONIG_EXTERN 71 | int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*)); 72 | ONIG_EXTERN 73 | int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*)); 74 | ONIG_EXTERN 75 | void re_set_casetable P_((const char*)); 76 | ONIG_EXTERN 77 | void re_free_registers P_((struct re_registers*)); 78 | ONIG_EXTERN 79 | int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */ 80 | 81 | #ifdef __cplusplus 82 | } 83 | #endif 84 | 85 | #endif /* ONIGGNU_H */ 86 | -------------------------------------------------------------------------------- /oniguruma/regposerr.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | regposerr.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "config.h" 31 | #include "onigposix.h" 32 | 33 | #ifdef HAVE_STRING_H 34 | # include 35 | #else 36 | # include 37 | #endif 38 | 39 | #if defined(__GNUC__) 40 | # define ARG_UNUSED __attribute__ ((unused)) 41 | #else 42 | # define ARG_UNUSED 43 | #endif 44 | 45 | static char* ESTRING[] = { 46 | NULL, 47 | "failed to match", /* REG_NOMATCH */ 48 | "Invalid regular expression", /* REG_BADPAT */ 49 | "invalid collating element referenced", /* REG_ECOLLATE */ 50 | "invalid character class type referenced", /* REG_ECTYPE */ 51 | "bad backslash-escape sequence", /* REG_EESCAPE */ 52 | "invalid back reference number", /* REG_ESUBREG */ 53 | "imbalanced [ and ]", /* REG_EBRACK */ 54 | "imbalanced ( and )", /* REG_EPAREN */ 55 | "imbalanced { and }", /* REG_EBRACE */ 56 | "invalid repeat range {n,m}", /* REG_BADBR */ 57 | "invalid range", /* REG_ERANGE */ 58 | "Out of memory", /* REG_ESPACE */ 59 | "? * + not preceded by valid regular expression", /* REG_BADRPT */ 60 | 61 | /* Extended errors */ 62 | "internal error", /* REG_EONIG_INTERNAL */ 63 | "invalid wide char value", /* REG_EONIG_BADWC */ 64 | "invalid argument", /* REG_EONIG_BADARG */ 65 | "multi-thread error" /* REG_EONIG_THREAD */ 66 | }; 67 | 68 | #include 69 | 70 | 71 | extern size_t 72 | regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf, 73 | size_t size) 74 | { 75 | char* s; 76 | char tbuf[35]; 77 | size_t len; 78 | 79 | if (posix_ecode > 0 80 | && posix_ecode < (int )(sizeof(ESTRING) / sizeof(ESTRING[0]))) { 81 | s = ESTRING[posix_ecode]; 82 | } 83 | else if (posix_ecode == 0) { 84 | s = ""; 85 | } 86 | else { 87 | sprintf(tbuf, "undefined error code (%d)", posix_ecode); 88 | s = tbuf; 89 | } 90 | 91 | len = strlen(s) + 1; /* use strlen() because s is ascii encoding. */ 92 | 93 | if (buf != NULL && size > 0) { 94 | strncpy(buf, s, size - 1); 95 | buf[size - 1] = '\0'; 96 | } 97 | return len; 98 | } 99 | -------------------------------------------------------------------------------- /oniguruma/sample/crnl.c: -------------------------------------------------------------------------------- 1 | /* 2 | * crnl.c 2007/05/30 K.Kosako 3 | * 4 | * !!! You should enable USE_CRNL_AS_LINE_TERMINATOR. !!! 5 | * 6 | * USE_CRNL_AS_LINE_TERMINATOR config test program. 7 | */ 8 | #include 9 | #include 10 | #include "oniguruma.h" 11 | 12 | static int nfail = 0; 13 | 14 | static void result(int no, int from, int to, 15 | int expected_from, int expected_to) 16 | { 17 | fprintf(stderr, "%3d: ", no); 18 | if (from == expected_from && to == expected_to) { 19 | fprintf(stderr, "Success\n"); 20 | } 21 | else { 22 | fprintf(stderr, "Fail: expected: (%d-%d), result: (%d-%d)\n", 23 | expected_from, expected_to, from, to); 24 | 25 | nfail++; 26 | } 27 | } 28 | 29 | static int 30 | x(int no, char* pattern_arg, char* str_arg, 31 | int expected_from, int expected_to) 32 | { 33 | int r; 34 | unsigned char *start, *range, *end; 35 | regex_t* reg; 36 | OnigErrorInfo einfo; 37 | OnigRegion *region; 38 | UChar *pattern, *str; 39 | 40 | pattern = (UChar* )pattern_arg; 41 | str = (UChar* )str_arg; 42 | 43 | r = onig_new(®, pattern, pattern + strlen((char* )pattern), 44 | ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); 45 | if (r != ONIG_NORMAL) { 46 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 47 | onig_error_code_to_str(s, r, &einfo); 48 | fprintf(stderr, "ERROR: %s\n", s); 49 | return -1; 50 | } 51 | 52 | region = onig_region_new(); 53 | 54 | end = str + strlen((char* )str); 55 | start = str; 56 | range = end; 57 | r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); 58 | if (r >= 0 || r == ONIG_MISMATCH) { 59 | result(no, region->beg[0], region->end[0], expected_from, expected_to); 60 | } 61 | else if (r == ONIG_MISMATCH) { 62 | result(no, r, -1, expected_from, expected_to); 63 | } 64 | else { /* error */ 65 | char s[ONIG_MAX_ERROR_MESSAGE_LEN]; 66 | onig_error_code_to_str(s, r); 67 | fprintf(stderr, "ERROR: %s\n", s); 68 | return -1; 69 | } 70 | 71 | onig_region_free(region, 1 /* 1:free self, 0:free contents only */); 72 | onig_free(reg); 73 | return 0; 74 | } 75 | 76 | static int 77 | f(int no, char* pattern_arg, char* str_arg) 78 | { 79 | return x(no, pattern_arg, str_arg, -1, -1); 80 | } 81 | 82 | extern int main(int argc, char* argv[]) 83 | { 84 | x( 1, "", "\r\n", 0, 0); 85 | x( 2, ".", "\r\n", 0, 1); 86 | f( 3, "..", "\r\n"); 87 | x( 4, "^", "\r\n", 0, 0); 88 | x( 5, "\\n^", "\r\nf", 1, 2); 89 | x( 6, "\\n^a", "\r\na", 1, 3); 90 | x( 7, "$", "\r\n", 0, 0); 91 | x( 8, "T$", "T\r\n", 0, 1); 92 | x( 9, "T$", "T\raT\r\n", 3, 4); 93 | x(10, "\\z", "\r\n", 2, 2); 94 | f(11, "a\\z", "a\r\n"); 95 | x(12, "\\Z", "\r\n", 0, 0); 96 | x(13, "\\Z", "\r\na", 3, 3); 97 | x(14, "\\Z", "\r\n\r\n\n", 4, 4); 98 | x(15, "\\Z", "\r\n\r\nX", 5, 5); 99 | x(16, "a\\Z", "a\r\n", 0, 1); 100 | x(17, "aaaaaaaaaaaaaaa\\Z", "aaaaaaaaaaaaaaa\r\n", 0, 15); 101 | x(18, "a|$", "b\r\n", 1, 1); 102 | x(19, "$|b", "\rb", 1, 2); 103 | x(20, "a$|ab$", "\r\nab\r\n", 2, 4); 104 | 105 | x(21, "a|\\Z", "b\r\n", 1, 1); 106 | x(22, "\\Z|b", "\rb", 1, 2); 107 | x(23, "a\\Z|ab\\Z", "\r\nab\r\n", 2, 4); 108 | x(24, "(?=a$).", "a\r\n", 0, 1); 109 | f(25, "(?=a$).", "a\r"); 110 | x(26, "(?!a$)..", "a\r", 0, 2); 111 | x(27, "(?<=a$).\\n", "a\r\n", 1, 3); 112 | f(28, "(? 0) { 120 | fprintf(stderr, "\n"); 121 | fprintf(stderr, "!!! You have to enable USE_CRNL_AS_LINE_TERMINATOR\n"); 122 | fprintf(stderr, "!!! in regenc.h for this test program.\n"); 123 | fprintf(stderr, "\n"); 124 | } 125 | 126 | return 0; 127 | } 128 | -------------------------------------------------------------------------------- /examples/test_proxy.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | int 12 | make_request(evbase_t * evbase, 13 | evthr_t * evthr, 14 | const char * const host, 15 | const short port, 16 | const char * const path, 17 | evhtp_headers_t * headers, 18 | evhtp_callback_cb cb, 19 | void * arg) { 20 | evhtp_connection_t * conn; 21 | evhtp_request_t * request; 22 | 23 | conn = evhtp_connection_new(evbase, host, port); 24 | conn->thread = evthr; 25 | request = evhtp_request_new(cb, arg); 26 | 27 | evhtp_headers_add_header(request->headers_out, 28 | evhtp_header_new("Host", "localhost", 0, 0)); 29 | evhtp_headers_add_header(request->headers_out, 30 | evhtp_header_new("User-Agent", "libevhtp", 0, 0)); 31 | evhtp_headers_add_header(request->headers_out, 32 | evhtp_header_new("Connection", "close", 0, 0)); 33 | 34 | evhtp_headers_add_headers(request->headers_out, headers); 35 | 36 | printf("Making backend request...\n"); 37 | evhtp_make_request(conn, request, htp_method_GET, path); 38 | printf("Ok.\n"); 39 | 40 | return 0; 41 | } 42 | 43 | static void 44 | backend_cb(evhtp_request_t * backend_req, void * arg) { 45 | evhtp_request_t * frontend_req = (evhtp_request_t *)arg; 46 | 47 | evbuffer_prepend_buffer(frontend_req->buffer_out, backend_req->buffer_in); 48 | evhtp_headers_add_headers(frontend_req->headers_out, backend_req->headers_in); 49 | 50 | /* 51 | * char body[1024] = { '\0' }; 52 | * ev_ssize_t len = evbuffer_copyout(frontend_req->buffer_out, body, sizeof(body)); 53 | * printf("Backend %zu: %s\n", len, body); 54 | */ 55 | 56 | evhtp_send_reply(frontend_req, EVHTP_RES_OK); 57 | evhtp_request_resume(frontend_req); 58 | } 59 | 60 | static void 61 | frontend_cb(evhtp_request_t * req, void * arg) { 62 | int * aux; 63 | int thr; 64 | 65 | aux = (int *)evthr_get_aux(req->conn->thread); 66 | thr = *aux; 67 | 68 | printf(" Received frontend request on thread %d... ", thr); 69 | 70 | /* Pause the frontend request while we run the backend requests. */ 71 | evhtp_request_pause(req); 72 | 73 | make_request(evthr_get_base(req->conn->thread), 74 | req->conn->thread, 75 | "127.0.0.1", 80, 76 | req->uri->path->full, 77 | req->headers_in, backend_cb, req); 78 | 79 | printf("Ok.\n"); 80 | } 81 | 82 | /* Terminate gracefully on SIGTERM */ 83 | void 84 | sigterm_cb(int fd, short event, void * arg) { 85 | evbase_t * evbase = (evbase_t *)arg; 86 | struct timeval tv = { .tv_usec = 100000, .tv_sec = 0 }; /* 100 ms */ 87 | 88 | event_base_loopexit(evbase, &tv); 89 | } 90 | 91 | void 92 | init_thread_cb(evhtp_t * htp, evthr_t * thr, void * arg) { 93 | static int aux = 0; 94 | 95 | printf("Spinning up a thread: %d\n", ++aux); 96 | evthr_set_aux(thr, &aux); 97 | } 98 | 99 | int 100 | main(int argc, char ** argv) { 101 | struct event *ev_sigterm; 102 | evbase_t * evbase = event_base_new(); 103 | evhtp_t * evhtp = evhtp_new(evbase, NULL); 104 | 105 | evhtp_set_gencb(evhtp, frontend_cb, NULL); 106 | 107 | #if 0 108 | #ifndef EVHTP_DISABLE_SSL 109 | evhtp_ssl_cfg_t scfg1 = { 0 }; 110 | 111 | scfg1.pemfile = "./server.pem"; 112 | scfg1.privfile = "./server.pem"; 113 | 114 | evhtp_ssl_init(evhtp, &scfg1); 115 | #endif 116 | #endif 117 | 118 | evhtp_use_threads(evhtp, init_thread_cb, 8, NULL); 119 | #ifndef WIN32 120 | ev_sigterm = evsignal_new(evbase, SIGTERM, sigterm_cb, evbase); 121 | evsignal_add(ev_sigterm, NULL); 122 | #endif 123 | evhtp_bind_socket(evhtp, "0.0.0.0", 8081, 1024); 124 | event_base_loop(evbase, 0); 125 | 126 | printf("Clean exit\n"); 127 | return 0; 128 | } 129 | 130 | -------------------------------------------------------------------------------- /htparse/htparse.h: -------------------------------------------------------------------------------- 1 | #ifndef __HTPARSE_H__ 2 | #define __HTPARSE_H__ 3 | 4 | struct htparser; 5 | 6 | enum htp_type { 7 | htp_type_request = 0, 8 | htp_type_response 9 | }; 10 | 11 | enum htp_scheme { 12 | htp_scheme_none = 0, 13 | htp_scheme_ftp, 14 | htp_scheme_http, 15 | htp_scheme_https, 16 | htp_scheme_nfs, 17 | htp_scheme_unknown 18 | }; 19 | 20 | enum htp_method { 21 | htp_method_GET = 0, 22 | htp_method_HEAD, 23 | htp_method_POST, 24 | htp_method_PUT, 25 | htp_method_DELETE, 26 | htp_method_MKCOL, 27 | htp_method_COPY, 28 | htp_method_MOVE, 29 | htp_method_OPTIONS, 30 | htp_method_PROPFIND, 31 | htp_method_PROPPATCH, 32 | htp_method_LOCK, 33 | htp_method_UNLOCK, 34 | htp_method_TRACE, 35 | htp_method_CONNECT, /* RFC 2616 */ 36 | htp_method_PATCH, /* RFC 5789 */ 37 | htp_method_UNKNOWN, 38 | }; 39 | 40 | enum htpparse_error { 41 | htparse_error_none = 0, 42 | htparse_error_too_big, 43 | htparse_error_inval_method, 44 | htparse_error_inval_reqline, 45 | htparse_error_inval_schema, 46 | htparse_error_inval_proto, 47 | htparse_error_inval_ver, 48 | htparse_error_inval_hdr, 49 | htparse_error_inval_chunk_sz, 50 | htparse_error_inval_chunk, 51 | htparse_error_inval_state, 52 | htparse_error_user, 53 | htparse_error_status, 54 | htparse_error_generic 55 | }; 56 | 57 | typedef struct htparser htparser; 58 | typedef struct htparse_hooks htparse_hooks; 59 | 60 | typedef enum htp_scheme htp_scheme; 61 | typedef enum htp_method htp_method; 62 | typedef enum htp_type htp_type; 63 | typedef enum htpparse_error htpparse_error; 64 | 65 | typedef int (*htparse_hook)(htparser *); 66 | typedef int (*htparse_data_hook)(htparser *, const char *, size_t); 67 | 68 | 69 | struct htparse_hooks { 70 | htparse_hook on_msg_begin; 71 | htparse_data_hook method; 72 | htparse_data_hook scheme; /* called if scheme is found */ 73 | htparse_data_hook host; /* called if a host was in the request scheme */ 74 | htparse_data_hook port; /* called if a port was in the request scheme */ 75 | htparse_data_hook path; /* only the path of the uri */ 76 | htparse_data_hook args; /* only the arguments of the uri */ 77 | htparse_data_hook uri; /* the entire uri including path/args */ 78 | htparse_hook on_hdrs_begin; 79 | htparse_data_hook hdr_key; 80 | htparse_data_hook hdr_val; 81 | htparse_data_hook hostname; 82 | htparse_hook on_hdrs_complete; 83 | htparse_hook on_new_chunk; /* called after parsed chunk octet */ 84 | htparse_hook on_chunk_complete; /* called after single parsed chunk */ 85 | htparse_hook on_chunks_complete; /* called after all parsed chunks processed */ 86 | htparse_data_hook body; 87 | htparse_hook on_msg_complete; 88 | }; 89 | 90 | 91 | size_t htparser_run(htparser *, htparse_hooks *, const char *, size_t); 92 | int htparser_should_keep_alive(htparser * p); 93 | htp_scheme htparser_get_scheme(htparser *); 94 | htp_method htparser_get_method(htparser *); 95 | const char * htparser_get_methodstr(htparser *); 96 | const char * htparser_get_methodstr_m(htp_method); 97 | void htparser_set_major(htparser *, unsigned char); 98 | void htparser_set_minor(htparser *, unsigned char); 99 | unsigned char htparser_get_major(htparser *); 100 | unsigned char htparser_get_minor(htparser *); 101 | unsigned char htparser_get_multipart(htparser *); 102 | unsigned int htparser_get_status(htparser *); 103 | uint64_t htparser_get_content_length(htparser *); 104 | uint64_t htparser_get_content_pending(htparser *); 105 | uint64_t htparser_get_total_bytes_read(htparser *); 106 | htpparse_error htparser_get_error(htparser *); 107 | const char * htparser_get_strerror(htparser *); 108 | void * htparser_get_userdata(htparser *); 109 | void htparser_set_userdata(htparser *, void *); 110 | void htparser_init(htparser *, htp_type); 111 | htparser * htparser_new(void); 112 | 113 | #endif 114 | 115 | -------------------------------------------------------------------------------- /oniguruma/enc/euc_tw.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | euc_tw.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static const int EncLen_EUCTW[] = { 33 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 34 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 35 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 42 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 44 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 45 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 49 | }; 50 | 51 | static int 52 | euctw_mbc_enc_len(const UChar* p) 53 | { 54 | return EncLen_EUCTW[*p]; 55 | } 56 | 57 | static OnigCodePoint 58 | euctw_mbc_to_code(const UChar* p, const UChar* end) 59 | { 60 | return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end); 61 | } 62 | 63 | static int 64 | euctw_code_to_mbc(OnigCodePoint code, UChar *buf) 65 | { 66 | return onigenc_mb4_code_to_mbc(ONIG_ENCODING_EUC_TW, code, buf); 67 | } 68 | 69 | static int 70 | euctw_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, 71 | UChar* lower) 72 | { 73 | return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_TW, flag, 74 | pp, end, lower); 75 | } 76 | 77 | static int 78 | euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype) 79 | { 80 | return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype); 81 | } 82 | 83 | #define euctw_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) 84 | 85 | static UChar* 86 | euctw_left_adjust_char_head(const UChar* start, const UChar* s) 87 | { 88 | /* Assumed in this encoding, 89 | mb-trail bytes don't mix with single bytes. 90 | */ 91 | const UChar *p; 92 | int len; 93 | 94 | if (s <= start) return (UChar* )s; 95 | p = s; 96 | 97 | while (!euctw_islead(*p) && p > start) p--; 98 | len = enclen(ONIG_ENCODING_EUC_TW, p); 99 | if (p + len > s) return (UChar* )p; 100 | p += len; 101 | return (UChar* )(p + ((s - p) & ~1)); 102 | } 103 | 104 | static int 105 | euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) 106 | { 107 | const UChar c = *s; 108 | if (c <= 0x7e) return TRUE; 109 | else return FALSE; 110 | } 111 | 112 | OnigEncodingType OnigEncodingEUC_TW = { 113 | euctw_mbc_enc_len, 114 | "EUC-TW", /* name */ 115 | 4, /* max enc length */ 116 | 1, /* min enc length */ 117 | onigenc_is_mbc_newline_0x0a, 118 | euctw_mbc_to_code, 119 | onigenc_mb4_code_to_mbclen, 120 | euctw_code_to_mbc, 121 | euctw_mbc_case_fold, 122 | onigenc_ascii_apply_all_case_fold, 123 | onigenc_ascii_get_case_fold_codes_by_str, 124 | onigenc_minimum_property_name_to_ctype, 125 | euctw_is_code_ctype, 126 | onigenc_not_support_get_ctype_code_range, 127 | euctw_left_adjust_char_head, 128 | euctw_is_allowed_reverse_match 129 | }; 130 | -------------------------------------------------------------------------------- /oniguruma/enc/iso8859_6.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | iso8859_6.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | #define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \ 33 | ((EncISO_8859_6_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) 34 | 35 | static const unsigned short EncISO_8859_6_CtypeTable[256] = { 36 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 37 | 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 38 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 39 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 40 | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 41 | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 42 | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 43 | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 44 | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 45 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 46 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 47 | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 48 | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 49 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 50 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 51 | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 52 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 53 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 54 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 55 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 56 | 0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000, 57 | 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000, 58 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 59 | 0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0, 60 | 0x0000, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 61 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 62 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 63 | 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 64 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 65 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 66 | 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 67 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 68 | }; 69 | 70 | static int 71 | is_code_ctype(OnigCodePoint code, unsigned int ctype) 72 | { 73 | if (code < 256) 74 | return ENC_IS_ISO_8859_6_CTYPE(code, ctype); 75 | else 76 | return FALSE; 77 | } 78 | 79 | OnigEncodingType OnigEncodingISO_8859_6 = { 80 | onigenc_single_byte_mbc_enc_len, 81 | "ISO-8859-6", /* name */ 82 | 1, /* max enc length */ 83 | 1, /* min enc length */ 84 | onigenc_is_mbc_newline_0x0a, 85 | onigenc_single_byte_mbc_to_code, 86 | onigenc_single_byte_code_to_mbclen, 87 | onigenc_single_byte_code_to_mbc, 88 | onigenc_ascii_mbc_case_fold, 89 | onigenc_ascii_apply_all_case_fold, 90 | onigenc_ascii_get_case_fold_codes_by_str, 91 | onigenc_minimum_property_name_to_ctype, 92 | is_code_ctype, 93 | onigenc_not_support_get_ctype_code_range, 94 | onigenc_single_byte_left_adjust_char_head, 95 | onigenc_always_true_is_allowed_reverse_match 96 | }; 97 | -------------------------------------------------------------------------------- /oniguruma/enc/iso8859_8.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | iso8859_8.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | #define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \ 33 | ((EncISO_8859_8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) 34 | 35 | static const unsigned short EncISO_8859_8_CtypeTable[256] = { 36 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 37 | 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 38 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 39 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 40 | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 41 | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 42 | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 43 | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 44 | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 45 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 46 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 47 | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 48 | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 49 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 50 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 51 | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 52 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 53 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 54 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 55 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 56 | 0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 57 | 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, 58 | 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, 59 | 0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000, 60 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 61 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 62 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 63 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 64 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 65 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 66 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 67 | 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 68 | }; 69 | 70 | static int 71 | is_code_ctype(OnigCodePoint code, unsigned int ctype) 72 | { 73 | if (code < 256) 74 | return ENC_IS_ISO_8859_8_CTYPE(code, ctype); 75 | else 76 | return FALSE; 77 | } 78 | 79 | OnigEncodingType OnigEncodingISO_8859_8 = { 80 | onigenc_single_byte_mbc_enc_len, 81 | "ISO-8859-8", /* name */ 82 | 1, /* max enc length */ 83 | 1, /* min enc length */ 84 | onigenc_is_mbc_newline_0x0a, 85 | onigenc_single_byte_mbc_to_code, 86 | onigenc_single_byte_code_to_mbclen, 87 | onigenc_single_byte_code_to_mbc, 88 | onigenc_ascii_mbc_case_fold, 89 | onigenc_ascii_apply_all_case_fold, 90 | onigenc_ascii_get_case_fold_codes_by_str, 91 | onigenc_minimum_property_name_to_ctype, 92 | is_code_ctype, 93 | onigenc_not_support_get_ctype_code_range, 94 | onigenc_single_byte_left_adjust_char_head, 95 | onigenc_always_true_is_allowed_reverse_match 96 | }; 97 | -------------------------------------------------------------------------------- /oniguruma/enc/iso8859_11.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | iso8859_11.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | #define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \ 33 | ((EncISO_8859_11_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) 34 | 35 | static const unsigned short EncISO_8859_11_CtypeTable[256] = { 36 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 37 | 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 38 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 39 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 40 | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 41 | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 42 | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 43 | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 44 | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 45 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 46 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 47 | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 48 | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 49 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 50 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 51 | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 52 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 53 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 54 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 55 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 56 | 0x0284, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 57 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 58 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 59 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 60 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 61 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 62 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 63 | 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x30a2, 64 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 65 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 66 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 67 | 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000 68 | }; 69 | 70 | static int 71 | is_code_ctype(OnigCodePoint code, unsigned int ctype) 72 | { 73 | if (code < 256) 74 | return ENC_IS_ISO_8859_11_CTYPE(code, ctype); 75 | else 76 | return FALSE; 77 | } 78 | 79 | OnigEncodingType OnigEncodingISO_8859_11 = { 80 | onigenc_single_byte_mbc_enc_len, 81 | "ISO-8859-11", /* name */ 82 | 1, /* max enc length */ 83 | 1, /* min enc length */ 84 | onigenc_is_mbc_newline_0x0a, 85 | onigenc_single_byte_mbc_to_code, 86 | onigenc_single_byte_code_to_mbclen, 87 | onigenc_single_byte_code_to_mbc, 88 | onigenc_ascii_mbc_case_fold, 89 | onigenc_ascii_apply_all_case_fold, 90 | onigenc_ascii_get_case_fold_codes_by_str, 91 | onigenc_minimum_property_name_to_ctype, 92 | is_code_ctype, 93 | onigenc_not_support_get_ctype_code_range, 94 | onigenc_single_byte_left_adjust_char_head, 95 | onigenc_always_true_is_allowed_reverse_match 96 | }; 97 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | # Libevhtp 2 | ***** 3 | 4 | This document describes details on using the evhtp API. This document is 5 | probably not very awesome, it's best to look at test.c to see advanced usage. 6 | 7 | ## Required Dependencies 8 | * [gcc](http://gcc.gnu.org/) 9 | * [Libevent2](http://libevent.org) 10 | 11 | ## Optional Dependencies 12 | * [OpenSSL](http://openssl.org) 13 | * pthreads 14 | 15 | ## Building 16 | * cd build 17 | * cmake .. 18 | * make 19 | * make examples 20 | 21 | ## Overview 22 | *** 23 | 24 | Libevhtp was created as a replacement API for Libevent's current HTTP API. The reality of libevent's http interface is that it was created as a JIT server, meaning the developer never thought of it being used for creating a full-fledged HTTP service. Infact I am under the impression that the libevent http API was designed almost as an example of what you can do with libevent. It's not Apache in a box, but more and more developers are attempting to use it as so. 25 | 26 | ### Libevent's HTTP pitfalls 27 | *** 28 | 29 | * It was not designed to be a fully functional HTTP server. 30 | * The code is messy, abstractions are almost non-existent, and feature-creep has made long-term maintainability very hard. 31 | * The parsing code is slow and requires data to be buffered before a full parse can be completed. This results in extranious memory usage and lots of string comparison functions. 32 | * There is no method for a user to access various parts of the request processing cycle. For example if the "Content-Length" header has a value of 50000, your callback is not executed until all 50000 bytes have been read. 33 | * Setting callback URI's do exact matches; meaning if you set a callback for "/foo/", requests for "/foo/bar/" are ignored. 34 | * Creating an HTTPS server is hard, it requires a bunch of work to be done on the underlying bufferevents. 35 | * As far as I know, streaming data back to a client is hard, if not impossible without messing with underlying bufferevents. 36 | * It's confusing to work with, this is probably due to the lack of proper documentation. 37 | 38 | Libevhtp attempts to address these problems along with a wide variety of cool mechanisms allowing a developer to have complete control over your server operations. This is not to say the API cannot be used in a very simplistic manner - a developer can easily create a backwards compatible version of libevent's HTTP server to libevhtp. 39 | 40 | ### A bit about the architecture of libevhtp 41 | *** 42 | 43 | #### Bootstrapping 44 | 45 | 1. Create a parent evhtp_t structure. 46 | 2. Assign callbacks to the parent for specific URIs or posix-regex based URI's 47 | 3. Optionally assign per-connection hooks (see hooks) to the callbacks. 48 | 4. Optionally assign pre-accept and post-accept callbacks for incoming connections. 49 | 5. Optionally enable built-in threadpool for connection handling (lock-free, and non-blocking). 50 | 6. Optionally morph your server to HTTPS. 51 | 7. Start the evhtp listener. 52 | 53 | #### Request handling. 54 | 55 | 1. Optionally deal with pre-accept and post-accept callbacks if they exist, allowing for a connection to be rejected if the function deems it as unacceptable. 56 | 2. Optionally assign per-request hooks (see hooks) for a request (the most optimal place for setting these hooks is on a post-accept callback). 57 | 3. Deal with either per-connection or per-request hook callbacks if they exist. 58 | 4. Once the request has been fully processed, inform evhtp to send a reply. 59 | 60 | ##### A very basic example with no optional conditions. 61 | 62 | #include 63 | #include 64 | 65 | void 66 | testcb(evhtp_request_t * req, void * a) { 67 | evbuffer_add_reference(req->buffer_out, "foobar", 6, NULL, NULL); 68 | evhtp_send_reply(req, EVHTP_RES_OK); 69 | } 70 | 71 | int 72 | main(int argc, char ** argv) { 73 | evbase_t * evbase = event_base_new(); 74 | evhtp_t * htp = evhtp_new(evbase, NULL); 75 | 76 | evhtp_set_cb(htp, "/test", testcb, NULL); 77 | evhtp_bind_socket(htp, "0.0.0.0", 8080, 1024); 78 | event_base_loop(evbase, 0); 79 | return 0; 80 | } 81 | 82 | 83 | ## Is evhtp thread-safe? 84 | 85 | For simple usage with evhtp_use_threads(), yes. But for more extreme cases: 86 | sorta, you are bound to the thread mechanisms of libevent itself. 87 | 88 | But with proper design around libevhtp, thread issues can be out-of-sight, 89 | out-of-mind. 90 | 91 | What do you mean by this "proper design" statement? 92 | 93 | Refer to the code in ./examples/thread_design.c. The comments go into great detail 94 | of the hows and whys for proper design using libevhtp's threading model. 95 | 96 | This example uses redis, mainly because most people who have asked me "is evhtp 97 | thread-safe" were attempting to *other things* before sending a response to a 98 | request. And on more than one occasion, those *other things* were communicating 99 | with redis. 100 | 101 | 102 | ## For Windows MinGW 103 | 104 | cmake -G "MSYS Makefiles" -DCMAKE_INCLUDE_PATH=/mingw/include -DCMAKE_LIBRARY_PATH=/mingw/lib -DCMAKE_INSTALL_PREFIX=/mingw . 105 | 106 | make 107 | -------------------------------------------------------------------------------- /oniguruma/reggnu.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | reggnu.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regint.h" 31 | 32 | #ifndef ONIGGNU_H 33 | #include "oniggnu.h" 34 | #endif 35 | 36 | extern void 37 | re_free_registers(OnigRegion* r) 38 | { 39 | /* 0: don't free self */ 40 | onig_region_free(r, 0); 41 | } 42 | 43 | extern int 44 | re_adjust_startpos(regex_t* reg, const char* string, int size, 45 | int startpos, int range) 46 | { 47 | if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) { 48 | UChar *p; 49 | UChar *s = (UChar* )string + startpos; 50 | 51 | if (range > 0) { 52 | p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s); 53 | } 54 | else { 55 | p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s); 56 | } 57 | return p - (UChar* )string; 58 | } 59 | 60 | return startpos; 61 | } 62 | 63 | extern int 64 | re_match(regex_t* reg, const char* str, int size, int pos, 65 | struct re_registers* regs) 66 | { 67 | return onig_match(reg, (UChar* )str, (UChar* )(str + size), 68 | (UChar* )(str + pos), regs, ONIG_OPTION_NONE); 69 | } 70 | 71 | extern int 72 | re_search(regex_t* bufp, const char* string, int size, int startpos, int range, 73 | struct re_registers* regs) 74 | { 75 | return onig_search(bufp, (UChar* )string, (UChar* )(string + size), 76 | (UChar* )(string + startpos), 77 | (UChar* )(string + startpos + range), 78 | regs, ONIG_OPTION_NONE); 79 | } 80 | 81 | extern int 82 | re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) 83 | { 84 | int r; 85 | OnigErrorInfo einfo; 86 | 87 | r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo); 88 | if (r != ONIG_NORMAL) { 89 | if (IS_NOT_NULL(ebuf)) 90 | (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); 91 | } 92 | 93 | return r; 94 | } 95 | 96 | #ifdef USE_RECOMPILE_API 97 | extern int 98 | re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) 99 | { 100 | int r; 101 | OnigErrorInfo einfo; 102 | OnigEncoding enc; 103 | 104 | /* I think encoding and options should be arguments of this function. 105 | But this is adapted to present re.c. (2002/11/29) 106 | */ 107 | enc = OnigEncDefaultCharEncoding; 108 | 109 | r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size), 110 | reg->options, enc, OnigDefaultSyntax, &einfo); 111 | if (r != ONIG_NORMAL) { 112 | if (IS_NOT_NULL(ebuf)) 113 | (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); 114 | } 115 | return r; 116 | } 117 | #endif 118 | 119 | extern void 120 | re_free_pattern(regex_t* reg) 121 | { 122 | onig_free(reg); 123 | } 124 | 125 | extern int 126 | re_alloc_pattern(regex_t** reg) 127 | { 128 | *reg = (regex_t* )xmalloc(sizeof(regex_t)); 129 | if (IS_NULL(*reg)) return ONIGERR_MEMORY; 130 | 131 | return onig_reg_init(*reg, ONIG_OPTION_DEFAULT, 132 | ONIGENC_CASE_FOLD_DEFAULT, 133 | OnigEncDefaultCharEncoding, 134 | OnigDefaultSyntax); 135 | } 136 | 137 | extern void 138 | re_set_casetable(const char* table) 139 | { 140 | onigenc_set_default_caseconv_table((UChar* )table); 141 | } 142 | 143 | extern void 144 | re_mbcinit(int mb_code) 145 | { 146 | OnigEncoding enc; 147 | 148 | switch (mb_code) { 149 | case RE_MBCTYPE_ASCII: 150 | enc = ONIG_ENCODING_ASCII; 151 | break; 152 | case RE_MBCTYPE_EUC: 153 | enc = ONIG_ENCODING_EUC_JP; 154 | break; 155 | case RE_MBCTYPE_SJIS: 156 | enc = ONIG_ENCODING_SJIS; 157 | break; 158 | case RE_MBCTYPE_UTF8: 159 | enc = ONIG_ENCODING_UTF8; 160 | break; 161 | default: 162 | return ; 163 | break; 164 | } 165 | 166 | onigenc_set_default_encoding(enc); 167 | } 168 | -------------------------------------------------------------------------------- /oniguruma/enc/euc_kr.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | euc_kr.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static const int EncLen_EUCKR[] = { 33 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 34 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 35 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 44 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 45 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 49 | }; 50 | 51 | static int 52 | euckr_mbc_enc_len(const UChar* p) 53 | { 54 | return EncLen_EUCKR[*p]; 55 | } 56 | 57 | static OnigCodePoint 58 | euckr_mbc_to_code(const UChar* p, const UChar* end) 59 | { 60 | return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end); 61 | } 62 | 63 | static int 64 | euckr_code_to_mbc(OnigCodePoint code, UChar *buf) 65 | { 66 | return onigenc_mb2_code_to_mbc(ONIG_ENCODING_EUC_KR, code, buf); 67 | } 68 | 69 | static int 70 | euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, 71 | UChar* lower) 72 | { 73 | return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_KR, flag, 74 | pp, end, lower); 75 | } 76 | 77 | #if 0 78 | static int 79 | euckr_is_mbc_ambiguous(OnigCaseFoldType flag, 80 | const UChar** pp, const UChar* end) 81 | { 82 | return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end); 83 | } 84 | #endif 85 | 86 | static int 87 | euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype) 88 | { 89 | return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype); 90 | } 91 | 92 | #define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff) 93 | 94 | static UChar* 95 | euckr_left_adjust_char_head(const UChar* start, const UChar* s) 96 | { 97 | /* Assumed in this encoding, 98 | mb-trail bytes don't mix with single bytes. 99 | */ 100 | const UChar *p; 101 | int len; 102 | 103 | if (s <= start) return (UChar* )s; 104 | p = s; 105 | 106 | while (!euckr_islead(*p) && p > start) p--; 107 | len = enclen(ONIG_ENCODING_EUC_KR, p); 108 | if (p + len > s) return (UChar* )p; 109 | p += len; 110 | return (UChar* )(p + ((s - p) & ~1)); 111 | } 112 | 113 | static int 114 | euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) 115 | { 116 | const UChar c = *s; 117 | if (c <= 0x7e) return TRUE; 118 | else return FALSE; 119 | } 120 | 121 | OnigEncodingType OnigEncodingEUC_KR = { 122 | euckr_mbc_enc_len, 123 | "EUC-KR", /* name */ 124 | 2, /* max enc length */ 125 | 1, /* min enc length */ 126 | onigenc_is_mbc_newline_0x0a, 127 | euckr_mbc_to_code, 128 | onigenc_mb2_code_to_mbclen, 129 | euckr_code_to_mbc, 130 | euckr_mbc_case_fold, 131 | onigenc_ascii_apply_all_case_fold, 132 | onigenc_ascii_get_case_fold_codes_by_str, 133 | onigenc_minimum_property_name_to_ctype, 134 | euckr_is_code_ctype, 135 | onigenc_not_support_get_ctype_code_range, 136 | euckr_left_adjust_char_head, 137 | euckr_is_allowed_reverse_match 138 | }; 139 | 140 | /* Same with OnigEncodingEUC_KR except the name */ 141 | OnigEncodingType OnigEncodingEUC_CN = { 142 | euckr_mbc_enc_len, 143 | "EUC-CN", /* name */ 144 | 2, /* max enc length */ 145 | 1, /* min enc length */ 146 | onigenc_is_mbc_newline_0x0a, 147 | euckr_mbc_to_code, 148 | onigenc_mb2_code_to_mbclen, 149 | euckr_code_to_mbc, 150 | euckr_mbc_case_fold, 151 | onigenc_ascii_apply_all_case_fold, 152 | onigenc_ascii_get_case_fold_codes_by_str, 153 | onigenc_minimum_property_name_to_ctype, 154 | euckr_is_code_ctype, 155 | onigenc_not_support_get_ctype_code_range, 156 | euckr_left_adjust_char_head, 157 | euckr_is_allowed_reverse_match 158 | }; 159 | -------------------------------------------------------------------------------- /oniguruma/enc/utf32_be.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | utf32_be.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static int 33 | utf32be_mbc_enc_len(const UChar* p ARG_UNUSED) 34 | { 35 | return 4; 36 | } 37 | 38 | static int 39 | utf32be_is_mbc_newline(const UChar* p, const UChar* end) 40 | { 41 | if (p + 3 < end) { 42 | if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0) 43 | return 1; 44 | #ifdef USE_UNICODE_ALL_LINE_TERMINATORS 45 | if (( 46 | #ifndef USE_CRNL_AS_LINE_TERMINATOR 47 | *(p+3) == 0x0d || 48 | #endif 49 | *(p+3) == 0x85) 50 | && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00) 51 | return 1; 52 | if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28) 53 | && *(p+1) == 0 && *p == 0) 54 | return 1; 55 | #endif 56 | } 57 | return 0; 58 | } 59 | 60 | static OnigCodePoint 61 | utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) 62 | { 63 | return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]); 64 | } 65 | 66 | static int 67 | utf32be_code_to_mbclen(OnigCodePoint code ARG_UNUSED) 68 | { 69 | return 4; 70 | } 71 | 72 | static int 73 | utf32be_code_to_mbc(OnigCodePoint code, UChar *buf) 74 | { 75 | UChar* p = buf; 76 | 77 | *p++ = (UChar )((code & 0xff000000) >>24); 78 | *p++ = (UChar )((code & 0xff0000) >>16); 79 | *p++ = (UChar )((code & 0xff00) >> 8); 80 | *p++ = (UChar ) (code & 0xff); 81 | return 4; 82 | } 83 | 84 | static int 85 | utf32be_mbc_case_fold(OnigCaseFoldType flag, 86 | const UChar** pp, const UChar* end, UChar* fold) 87 | { 88 | const UChar* p = *pp; 89 | 90 | if (ONIGENC_IS_ASCII_CODE(*(p+3)) && *(p+2) == 0 && *(p+1) == 0 && *p == 0) { 91 | *fold++ = 0; 92 | *fold++ = 0; 93 | 94 | #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI 95 | if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { 96 | if (*(p+3) == 0x49) { 97 | *fold++ = 0x01; 98 | *fold = 0x31; 99 | (*pp) += 4; 100 | return 4; 101 | } 102 | } 103 | #endif 104 | 105 | *fold++ = 0; 106 | *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*(p+3)); 107 | *pp += 4; 108 | return 4; 109 | } 110 | else 111 | return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_BE, flag, pp, end, 112 | fold); 113 | } 114 | 115 | #if 0 116 | static int 117 | utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) 118 | { 119 | const UChar* p = *pp; 120 | 121 | (*pp) += 4; 122 | 123 | if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { 124 | int c, v; 125 | 126 | p += 3; 127 | if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { 128 | return TRUE; 129 | } 130 | 131 | c = *p; 132 | v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, 133 | (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); 134 | if ((v | BIT_CTYPE_LOWER) != 0) { 135 | /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ 136 | if (c >= 0xaa && c <= 0xba) 137 | return FALSE; 138 | else 139 | return TRUE; 140 | } 141 | return (v != 0 ? TRUE : FALSE); 142 | } 143 | 144 | return FALSE; 145 | } 146 | #endif 147 | 148 | static UChar* 149 | utf32be_left_adjust_char_head(const UChar* start, const UChar* s) 150 | { 151 | int rem; 152 | 153 | if (s <= start) return (UChar* )s; 154 | 155 | rem = (s - start) % 4; 156 | return (UChar* )(s - rem); 157 | } 158 | 159 | static int 160 | utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag, 161 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 162 | { 163 | return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_BE, 164 | flag, p, end, items); 165 | } 166 | 167 | OnigEncodingType OnigEncodingUTF32_BE = { 168 | utf32be_mbc_enc_len, 169 | "UTF-32BE", /* name */ 170 | 4, /* max byte length */ 171 | 4, /* min byte length */ 172 | utf32be_is_mbc_newline, 173 | utf32be_mbc_to_code, 174 | utf32be_code_to_mbclen, 175 | utf32be_code_to_mbc, 176 | utf32be_mbc_case_fold, 177 | onigenc_unicode_apply_all_case_fold, 178 | utf32be_get_case_fold_codes_by_str, 179 | onigenc_unicode_property_name_to_ctype, 180 | onigenc_unicode_is_code_ctype, 181 | onigenc_utf16_32_get_ctype_code_range, 182 | utf32be_left_adjust_char_head, 183 | onigenc_always_false_is_allowed_reverse_match 184 | }; 185 | -------------------------------------------------------------------------------- /oniguruma/enc/utf32_le.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | utf32_le.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static int 33 | utf32le_mbc_enc_len(const UChar* p ARG_UNUSED) 34 | { 35 | return 4; 36 | } 37 | 38 | static int 39 | utf32le_is_mbc_newline(const UChar* p, const UChar* end) 40 | { 41 | if (p + 3 < end) { 42 | if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) 43 | return 1; 44 | #ifdef USE_UNICODE_ALL_LINE_TERMINATORS 45 | if (( 46 | #ifndef USE_CRNL_AS_LINE_TERMINATOR 47 | *p == 0x0d || 48 | #endif 49 | *p == 0x85) 50 | && *(p+1) == 0x00 && (p+2) == 0x00 && *(p+3) == 0x00) 51 | return 1; 52 | if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28) 53 | && *(p+2) == 0x00 && *(p+3) == 0x00) 54 | return 1; 55 | #endif 56 | } 57 | return 0; 58 | } 59 | 60 | static OnigCodePoint 61 | utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) 62 | { 63 | return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]); 64 | } 65 | 66 | static int 67 | utf32le_code_to_mbclen(OnigCodePoint code ARG_UNUSED) 68 | { 69 | return 4; 70 | } 71 | 72 | static int 73 | utf32le_code_to_mbc(OnigCodePoint code, UChar *buf) 74 | { 75 | UChar* p = buf; 76 | 77 | *p++ = (UChar ) (code & 0xff); 78 | *p++ = (UChar )((code & 0xff00) >> 8); 79 | *p++ = (UChar )((code & 0xff0000) >>16); 80 | *p++ = (UChar )((code & 0xff000000) >>24); 81 | return 4; 82 | } 83 | 84 | static int 85 | utf32le_mbc_case_fold(OnigCaseFoldType flag, 86 | const UChar** pp, const UChar* end, UChar* fold) 87 | { 88 | const UChar* p = *pp; 89 | 90 | if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { 91 | #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI 92 | if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { 93 | if (*p == 0x49) { 94 | *fold++ = 0x31; 95 | *fold++ = 0x01; 96 | } 97 | } 98 | else { 99 | #endif 100 | *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 101 | *fold++ = 0; 102 | #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI 103 | } 104 | #endif 105 | 106 | *fold++ = 0; 107 | *fold = 0; 108 | *pp += 4; 109 | return 4; 110 | } 111 | else 112 | return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_LE, flag, pp, end, 113 | fold); 114 | } 115 | 116 | #if 0 117 | static int 118 | utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) 119 | { 120 | const UChar* p = *pp; 121 | 122 | (*pp) += 4; 123 | 124 | if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { 125 | int c, v; 126 | 127 | if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { 128 | return TRUE; 129 | } 130 | 131 | c = *p; 132 | v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, 133 | (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); 134 | if ((v | BIT_CTYPE_LOWER) != 0) { 135 | /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ 136 | if (c >= 0xaa && c <= 0xba) 137 | return FALSE; 138 | else 139 | return TRUE; 140 | } 141 | return (v != 0 ? TRUE : FALSE); 142 | } 143 | 144 | return FALSE; 145 | } 146 | #endif 147 | 148 | static UChar* 149 | utf32le_left_adjust_char_head(const UChar* start, const UChar* s) 150 | { 151 | int rem; 152 | 153 | if (s <= start) return (UChar* )s; 154 | 155 | rem = (s - start) % 4; 156 | return (UChar* )(s - rem); 157 | } 158 | 159 | static int 160 | utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag, 161 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 162 | { 163 | return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_LE, 164 | flag, p, end, items); 165 | } 166 | 167 | OnigEncodingType OnigEncodingUTF32_LE = { 168 | utf32le_mbc_enc_len, 169 | "UTF-32LE", /* name */ 170 | 4, /* max byte length */ 171 | 4, /* min byte length */ 172 | utf32le_is_mbc_newline, 173 | utf32le_mbc_to_code, 174 | utf32le_code_to_mbclen, 175 | utf32le_code_to_mbc, 176 | utf32le_mbc_case_fold, 177 | onigenc_unicode_apply_all_case_fold, 178 | utf32le_get_case_fold_codes_by_str, 179 | onigenc_unicode_property_name_to_ctype, 180 | onigenc_unicode_is_code_ctype, 181 | onigenc_utf16_32_get_ctype_code_range, 182 | utf32le_left_adjust_char_head, 183 | onigenc_always_false_is_allowed_reverse_match 184 | }; 185 | -------------------------------------------------------------------------------- /oniguruma/enc/big5.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | big5.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static const int EncLen_BIG5[] = { 33 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 34 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 35 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 44 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 45 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 49 | }; 50 | 51 | static int 52 | big5_mbc_enc_len(const UChar* p) 53 | { 54 | return EncLen_BIG5[*p]; 55 | } 56 | 57 | static OnigCodePoint 58 | big5_mbc_to_code(const UChar* p, const UChar* end) 59 | { 60 | return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end); 61 | } 62 | 63 | static int 64 | big5_code_to_mbc(OnigCodePoint code, UChar *buf) 65 | { 66 | return onigenc_mb2_code_to_mbc(ONIG_ENCODING_BIG5, code, buf); 67 | } 68 | 69 | static int 70 | big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, 71 | UChar* lower) 72 | { 73 | return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_BIG5, flag, 74 | pp, end, lower); 75 | } 76 | 77 | #if 0 78 | static int 79 | big5_is_mbc_ambiguous(OnigCaseFoldType flag, 80 | const UChar** pp, const UChar* end) 81 | { 82 | return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end); 83 | } 84 | #endif 85 | 86 | static int 87 | big5_is_code_ctype(OnigCodePoint code, unsigned int ctype) 88 | { 89 | return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype); 90 | } 91 | 92 | static const char BIG5_CAN_BE_TRAIL_TABLE[256] = { 93 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 98 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 100 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 101 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 102 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 103 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 104 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 105 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 106 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 107 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 108 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 109 | }; 110 | 111 | #define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1) 112 | #define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)] 113 | 114 | static UChar* 115 | big5_left_adjust_char_head(const UChar* start, const UChar* s) 116 | { 117 | const UChar *p; 118 | int len; 119 | 120 | if (s <= start) return (UChar* )s; 121 | p = s; 122 | 123 | if (BIG5_ISMB_TRAIL(*p)) { 124 | while (p > start) { 125 | if (! BIG5_ISMB_FIRST(*--p)) { 126 | p++; 127 | break; 128 | } 129 | } 130 | } 131 | len = enclen(ONIG_ENCODING_BIG5, p); 132 | if (p + len > s) return (UChar* )p; 133 | p += len; 134 | return (UChar* )(p + ((s - p) & ~1)); 135 | } 136 | 137 | static int 138 | big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) 139 | { 140 | const UChar c = *s; 141 | 142 | return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE); 143 | } 144 | 145 | OnigEncodingType OnigEncodingBIG5 = { 146 | big5_mbc_enc_len, 147 | "Big5", /* name */ 148 | 2, /* max enc length */ 149 | 1, /* min enc length */ 150 | onigenc_is_mbc_newline_0x0a, 151 | big5_mbc_to_code, 152 | onigenc_mb2_code_to_mbclen, 153 | big5_code_to_mbc, 154 | big5_mbc_case_fold, 155 | onigenc_ascii_apply_all_case_fold, 156 | onigenc_ascii_get_case_fold_codes_by_str, 157 | onigenc_minimum_property_name_to_ctype, 158 | big5_is_code_ctype, 159 | onigenc_not_support_get_ctype_code_range, 160 | big5_left_adjust_char_head, 161 | big5_is_allowed_reverse_match 162 | }; 163 | -------------------------------------------------------------------------------- /oniguruma/onigposix.h: -------------------------------------------------------------------------------- 1 | #ifndef ONIGPOSIX_H 2 | #define ONIGPOSIX_H 3 | /********************************************************************** 4 | onigposix.h - Oniguruma (regular expression library) 5 | **********************************************************************/ 6 | /*- 7 | * Copyright (c) 2002-2005 K.Kosako 8 | * All rights reserved. 9 | * 10 | * Redistribution and use in source and binary forms, with or without 11 | * modification, are permitted provided that the following conditions 12 | * are met: 13 | * 1. Redistributions of source code must retain the above copyright 14 | * notice, this list of conditions and the following disclaimer. 15 | * 2. Redistributions in binary form must reproduce the above copyright 16 | * notice, this list of conditions and the following disclaimer in the 17 | * documentation and/or other materials provided with the distribution. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 | * SUCH DAMAGE. 30 | */ 31 | #include 32 | 33 | #ifdef __cplusplus 34 | extern "C" { 35 | #endif 36 | 37 | /* options */ 38 | #define REG_ICASE (1<<0) 39 | #define REG_NEWLINE (1<<1) 40 | #define REG_NOTBOL (1<<2) 41 | #define REG_NOTEOL (1<<3) 42 | #define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */ 43 | #define REG_NOSUB (1<<5) 44 | 45 | /* POSIX error codes */ 46 | #define REG_NOMATCH 1 47 | #define REG_BADPAT 2 48 | #define REG_ECOLLATE 3 49 | #define REG_ECTYPE 4 50 | #define REG_EESCAPE 5 51 | #define REG_ESUBREG 6 52 | #define REG_EBRACK 7 53 | #define REG_EPAREN 8 54 | #define REG_EBRACE 9 55 | #define REG_BADBR 10 56 | #define REG_ERANGE 11 57 | #define REG_ESPACE 12 58 | #define REG_BADRPT 13 59 | 60 | /* extended error codes */ 61 | #define REG_EONIG_INTERNAL 14 62 | #define REG_EONIG_BADWC 15 63 | #define REG_EONIG_BADARG 16 64 | #define REG_EONIG_THREAD 17 65 | 66 | /* character encodings (for reg_set_encoding()) */ 67 | #define REG_POSIX_ENCODING_ASCII 0 68 | #define REG_POSIX_ENCODING_EUC_JP 1 69 | #define REG_POSIX_ENCODING_SJIS 2 70 | #define REG_POSIX_ENCODING_UTF8 3 71 | #define REG_POSIX_ENCODING_UTF16_BE 4 72 | #define REG_POSIX_ENCODING_UTF16_LE 5 73 | 74 | 75 | typedef int regoff_t; 76 | 77 | typedef struct { 78 | regoff_t rm_so; 79 | regoff_t rm_eo; 80 | } regmatch_t; 81 | 82 | /* POSIX regex_t */ 83 | typedef struct { 84 | void* onig; /* Oniguruma regex_t* */ 85 | size_t re_nsub; 86 | int comp_options; 87 | } regex_t; 88 | 89 | 90 | #ifndef P_ 91 | #if defined(__STDC__) || defined(_WIN32) 92 | # define P_(args) args 93 | #else 94 | # define P_(args) () 95 | #endif 96 | #endif 97 | 98 | #ifndef ONIG_EXTERN 99 | #if defined(_WIN32) && !defined(__GNUC__) 100 | #if defined(EXPORT) 101 | #define ONIG_EXTERN extern __declspec(dllexport) 102 | #else 103 | #define ONIG_EXTERN extern __declspec(dllimport) 104 | #endif 105 | #endif 106 | #endif 107 | 108 | #ifndef ONIG_EXTERN 109 | #define ONIG_EXTERN extern 110 | #endif 111 | 112 | #ifndef ONIGURUMA_H 113 | typedef unsigned int OnigOptionType; 114 | 115 | /* syntax */ 116 | typedef struct { 117 | unsigned int op; 118 | unsigned int op2; 119 | unsigned int behavior; 120 | OnigOptionType options; /* default option */ 121 | } OnigSyntaxType; 122 | 123 | ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; 124 | ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; 125 | ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; 126 | ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; 127 | ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; 128 | ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; 129 | ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; 130 | ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; 131 | 132 | /* predefined syntaxes (see regsyntax.c) */ 133 | #define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) 134 | #define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) 135 | #define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) 136 | #define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) 137 | #define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) 138 | #define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) 139 | #define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) 140 | #define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) 141 | /* default syntax */ 142 | #define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax 143 | 144 | ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; 145 | 146 | ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax)); 147 | ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); 148 | ONIG_EXTERN const char* onig_version P_((void)); 149 | ONIG_EXTERN const char* onig_copyright P_((void)); 150 | 151 | #endif /* ONIGURUMA_H */ 152 | 153 | 154 | ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options)); 155 | ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options)); 156 | ONIG_EXTERN void regfree P_((regex_t* reg)); 157 | ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size)); 158 | 159 | /* extended API */ 160 | ONIG_EXTERN void reg_set_encoding P_((int enc)); 161 | ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums)); 162 | ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg)); 163 | ONIG_EXTERN int reg_number_of_names P_((regex_t* reg)); 164 | 165 | #ifdef __cplusplus 166 | } 167 | #endif 168 | 169 | #endif /* ONIGPOSIX_H */ 170 | -------------------------------------------------------------------------------- /oniguruma/README: -------------------------------------------------------------------------------- 1 | README 2007/05/31 2 | 3 | Oniguruma ---- (C) K.Kosako 4 | 5 | http://www.geocities.jp/kosako3/oniguruma/ 6 | 7 | Oniguruma is a regular expressions library. 8 | The characteristics of this library is that different character encoding 9 | for every regular expression object can be specified. 10 | 11 | Supported character encodings: 12 | 13 | ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, 14 | EUC-JP, EUC-TW, EUC-KR, EUC-CN, 15 | Shift_JIS, Big5, GB18030, KOI8-R, CP1251, 16 | ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, 17 | ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, 18 | ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 19 | 20 | * GB18030: contributed by KUBO Takehiro 21 | * CP1251: contributed by Byte 22 | ------------------------------------------------------------ 23 | 24 | License 25 | 26 | BSD license. 27 | 28 | 29 | Install 30 | 31 | Case 1: Unix and Cygwin platform 32 | 33 | 1. ./configure 34 | 2. make 35 | 3. make install 36 | 37 | * uninstall 38 | 39 | make uninstall 40 | 41 | * test (ASCII/EUC-JP) 42 | 43 | make atest 44 | 45 | * configuration check 46 | 47 | onig-config --cflags 48 | onig-config --libs 49 | onig-config --prefix 50 | onig-config --exec-prefix 51 | 52 | 53 | 54 | Case 2: Win32 platform (VC++) 55 | 56 | 1. copy win32\Makefile Makefile 57 | 2. copy win32\config.h config.h 58 | 3. nmake 59 | 60 | onig_s.lib: static link library 61 | onig.dll: dynamic link library 62 | 63 | * test (ASCII/Shift_JIS) 64 | 4. copy win32\testc.c testc.c 65 | 5. nmake ctest 66 | 67 | 68 | 69 | Regular Expressions 70 | 71 | See doc/RE (or doc/RE.ja for Japanese). 72 | 73 | 74 | Usage 75 | 76 | Include oniguruma.h in your program. (Oniguruma API) 77 | See doc/API for Oniguruma API. 78 | 79 | If you want to disable UChar type (== unsigned char) definition 80 | in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then 81 | include oniguruma.h. 82 | 83 | If you want to disable regex_t type definition in oniguruma.h, 84 | define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h. 85 | 86 | Example of the compiling/linking command line in Unix or Cygwin, 87 | (prefix == /usr/local case) 88 | 89 | cc sample.c -L/usr/local/lib -lonig 90 | 91 | 92 | If you want to use static link library(onig_s.lib) in Win32, 93 | add option -DONIG_EXTERN=extern to C compiler. 94 | 95 | 96 | 97 | Sample Programs 98 | 99 | sample/simple.c example of the minimum (Oniguruma API) 100 | sample/names.c example of the named group callback. 101 | sample/encode.c example of some encodings. 102 | sample/listcap.c example of the capture history. 103 | sample/posix.c POSIX API sample. 104 | sample/sql.c example of the variable meta characters. 105 | (SQL-like pattern matching) 106 | 107 | Test Programs 108 | sample/syntax.c Perl, Java and ASIS syntax test. 109 | sample/crnl.c --enable-crnl-as-line-terminator test 110 | 111 | 112 | Source Files 113 | 114 | oniguruma.h Oniguruma API header file. (public) 115 | onig-config.in configuration check program template. 116 | 117 | regenc.h character encodings framework header file. 118 | regint.h internal definitions 119 | regparse.h internal definitions for regparse.c and regcomp.c 120 | regcomp.c compiling and optimization functions 121 | regenc.c character encodings framework. 122 | regerror.c error message function 123 | regext.c extended API functions. (deluxe version API) 124 | regexec.c search and match functions 125 | regparse.c parsing functions. 126 | regsyntax.c pattern syntax functions and built-in syntax definitions. 127 | regtrav.c capture history tree data traverse functions. 128 | regversion.c version info function. 129 | st.h hash table functions header file 130 | st.c hash table functions 131 | 132 | oniggnu.h GNU regex API header file. (public) 133 | reggnu.c GNU regex API functions 134 | 135 | onigposix.h POSIX API header file. (public) 136 | regposerr.c POSIX error message function. 137 | regposix.c POSIX API functions. 138 | 139 | enc/mktable.c character type table generator. 140 | enc/ascii.c ASCII encoding. 141 | enc/euc_jp.c EUC-JP encoding. 142 | enc/euc_tw.c EUC-TW encoding. 143 | enc/euc_kr.c EUC-KR, EUC-CN encoding. 144 | enc/sjis.c Shift_JIS encoding. 145 | enc/big5.c Big5 encoding. 146 | enc/gb18030.c GB18030 encoding. 147 | enc/koi8.c KOI8 encoding. 148 | enc/koi8_r.c KOI8-R encoding. 149 | enc/cp1251.c CP1251 encoding. 150 | enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1) 151 | enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2) 152 | enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3) 153 | enc/iso8859_4.c ISO-8859-4 encoding. (Latin-4) 154 | enc/iso8859_5.c ISO-8859-5 encoding. (Cyrillic) 155 | enc/iso8859_6.c ISO-8859-6 encoding. (Arabic) 156 | enc/iso8859_7.c ISO-8859-7 encoding. (Greek) 157 | enc/iso8859_8.c ISO-8859-8 encoding. (Hebrew) 158 | enc/iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish) 159 | enc/iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic) 160 | enc/iso8859_11.c ISO-8859-11 encoding. (Thai) 161 | enc/iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim) 162 | enc/iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic) 163 | enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro) 164 | enc/iso8859_16.c ISO-8859-16 encoding. 165 | (Latin-10 or South-Eastern European with Euro) 166 | enc/utf8.c UTF-8 encoding. 167 | enc/utf16_be.c UTF-16BE encoding. 168 | enc/utf16_le.c UTF-16LE encoding. 169 | enc/utf32_be.c UTF-32BE encoding. 170 | enc/utf32_le.c UTF-32LE encoding. 171 | enc/unicode.c Unicode information data. 172 | 173 | win32/Makefile Makefile for Win32 (VC++) 174 | win32/config.h config.h for Win32 175 | 176 | 177 | 178 | ToDo 179 | 180 | ? case fold flag: Katakana <-> Hiragana. 181 | ? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z) 182 | ?? \X (== \PM\pM*) 183 | ?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS. 184 | ?? transmission stopper. (return ONIG_STOP from match_at()) 185 | 186 | and I'm thankful to Akinori MUSHA. 187 | 188 | 189 | Mail Address: K.Kosako 190 | -------------------------------------------------------------------------------- /oniguruma/regext.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | regext.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regint.h" 31 | 32 | static void 33 | conv_ext0be32(const UChar* s, const UChar* end, UChar* conv) 34 | { 35 | while (s < end) { 36 | *conv++ = '\0'; 37 | *conv++ = '\0'; 38 | *conv++ = '\0'; 39 | *conv++ = *s++; 40 | } 41 | } 42 | 43 | static void 44 | conv_ext0le32(const UChar* s, const UChar* end, UChar* conv) 45 | { 46 | while (s < end) { 47 | *conv++ = *s++; 48 | *conv++ = '\0'; 49 | *conv++ = '\0'; 50 | *conv++ = '\0'; 51 | } 52 | } 53 | 54 | static void 55 | conv_ext0be(const UChar* s, const UChar* end, UChar* conv) 56 | { 57 | while (s < end) { 58 | *conv++ = '\0'; 59 | *conv++ = *s++; 60 | } 61 | } 62 | 63 | static void 64 | conv_ext0le(const UChar* s, const UChar* end, UChar* conv) 65 | { 66 | while (s < end) { 67 | *conv++ = *s++; 68 | *conv++ = '\0'; 69 | } 70 | } 71 | 72 | static void 73 | conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv) 74 | { 75 | while (s < end) { 76 | *conv++ = s[3]; 77 | *conv++ = s[2]; 78 | *conv++ = s[1]; 79 | *conv++ = s[0]; 80 | s += 4; 81 | } 82 | } 83 | 84 | static void 85 | conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv) 86 | { 87 | while (s < end) { 88 | *conv++ = s[1]; 89 | *conv++ = s[0]; 90 | s += 2; 91 | } 92 | } 93 | 94 | static int 95 | conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end, 96 | UChar** conv, UChar** conv_end) 97 | { 98 | int len = end - s; 99 | 100 | if (to == ONIG_ENCODING_UTF16_BE) { 101 | if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { 102 | *conv = (UChar* )xmalloc(len * 2); 103 | CHECK_NULL_RETURN_MEMERR(*conv); 104 | *conv_end = *conv + (len * 2); 105 | conv_ext0be(s, end, *conv); 106 | return 0; 107 | } 108 | else if (from == ONIG_ENCODING_UTF16_LE) { 109 | swap16: 110 | *conv = (UChar* )xmalloc(len); 111 | CHECK_NULL_RETURN_MEMERR(*conv); 112 | *conv_end = *conv + len; 113 | conv_swap2bytes(s, end, *conv); 114 | return 0; 115 | } 116 | } 117 | else if (to == ONIG_ENCODING_UTF16_LE) { 118 | if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { 119 | *conv = (UChar* )xmalloc(len * 2); 120 | CHECK_NULL_RETURN_MEMERR(*conv); 121 | *conv_end = *conv + (len * 2); 122 | conv_ext0le(s, end, *conv); 123 | return 0; 124 | } 125 | else if (from == ONIG_ENCODING_UTF16_BE) { 126 | goto swap16; 127 | } 128 | } 129 | if (to == ONIG_ENCODING_UTF32_BE) { 130 | if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { 131 | *conv = (UChar* )xmalloc(len * 4); 132 | CHECK_NULL_RETURN_MEMERR(*conv); 133 | *conv_end = *conv + (len * 4); 134 | conv_ext0be32(s, end, *conv); 135 | return 0; 136 | } 137 | else if (from == ONIG_ENCODING_UTF32_LE) { 138 | swap32: 139 | *conv = (UChar* )xmalloc(len); 140 | CHECK_NULL_RETURN_MEMERR(*conv); 141 | *conv_end = *conv + len; 142 | conv_swap4bytes(s, end, *conv); 143 | return 0; 144 | } 145 | } 146 | else if (to == ONIG_ENCODING_UTF32_LE) { 147 | if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { 148 | *conv = (UChar* )xmalloc(len * 4); 149 | CHECK_NULL_RETURN_MEMERR(*conv); 150 | *conv_end = *conv + (len * 4); 151 | conv_ext0le32(s, end, *conv); 152 | return 0; 153 | } 154 | else if (from == ONIG_ENCODING_UTF32_BE) { 155 | goto swap32; 156 | } 157 | } 158 | 159 | return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION; 160 | } 161 | 162 | extern int 163 | onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, 164 | OnigCompileInfo* ci, OnigErrorInfo* einfo) 165 | { 166 | int r; 167 | UChar *cpat, *cpat_end; 168 | 169 | if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; 170 | 171 | if (ci->pattern_enc != ci->target_enc) { 172 | r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end, 173 | &cpat, &cpat_end); 174 | if (r) return r; 175 | } 176 | else { 177 | cpat = (UChar* )pattern; 178 | cpat_end = (UChar* )pattern_end; 179 | } 180 | 181 | *reg = (regex_t* )xmalloc(sizeof(regex_t)); 182 | if (IS_NULL(*reg)) { 183 | r = ONIGERR_MEMORY; 184 | goto err2; 185 | } 186 | 187 | r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc, 188 | ci->syntax); 189 | if (r) goto err; 190 | 191 | r = onig_compile(*reg, cpat, cpat_end, einfo); 192 | if (r) { 193 | err: 194 | onig_free(*reg); 195 | *reg = NULL; 196 | } 197 | 198 | err2: 199 | if (cpat != pattern) xfree(cpat); 200 | 201 | return r; 202 | } 203 | 204 | #ifdef USE_RECOMPILE_API 205 | extern int 206 | onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end, 207 | OnigCompileInfo* ci, OnigErrorInfo* einfo) 208 | { 209 | int r; 210 | regex_t *new_reg; 211 | 212 | r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo); 213 | if (r) return r; 214 | if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { 215 | onig_transfer(reg, new_reg); 216 | } 217 | else { 218 | onig_chain_link_add(reg, new_reg); 219 | } 220 | return 0; 221 | } 222 | #endif 223 | -------------------------------------------------------------------------------- /oniguruma/enc/utf16_be.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | utf16_be.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static const int EncLen_UTF16[] = { 33 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 34 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 35 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 36 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 37 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 38 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 39 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 40 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 41 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 42 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 43 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 44 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 45 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46 | 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 49 | }; 50 | 51 | static int 52 | utf16be_mbc_enc_len(const UChar* p) 53 | { 54 | return EncLen_UTF16[*p]; 55 | } 56 | 57 | static int 58 | utf16be_is_mbc_newline(const UChar* p, const UChar* end) 59 | { 60 | if (p + 1 < end) { 61 | if (*(p+1) == 0x0a && *p == 0x00) 62 | return 1; 63 | #ifdef USE_UNICODE_ALL_LINE_TERMINATORS 64 | if (( 65 | #ifndef USE_CRNL_AS_LINE_TERMINATOR 66 | *(p+1) == 0x0d || 67 | #endif 68 | *(p+1) == 0x85) && *p == 0x00) 69 | return 1; 70 | if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28)) 71 | return 1; 72 | #endif 73 | } 74 | return 0; 75 | } 76 | 77 | static OnigCodePoint 78 | utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) 79 | { 80 | OnigCodePoint code; 81 | 82 | if (UTF16_IS_SURROGATE_FIRST(*p)) { 83 | code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16) 84 | + ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8) 85 | + p[3]; 86 | } 87 | else { 88 | code = p[0] * 256 + p[1]; 89 | } 90 | return code; 91 | } 92 | 93 | static int 94 | utf16be_code_to_mbclen(OnigCodePoint code) 95 | { 96 | return (code > 0xffff ? 4 : 2); 97 | } 98 | 99 | static int 100 | utf16be_code_to_mbc(OnigCodePoint code, UChar *buf) 101 | { 102 | UChar* p = buf; 103 | 104 | if (code > 0xffff) { 105 | unsigned int plane, high; 106 | 107 | plane = (code >> 16) - 1; 108 | *p++ = (plane >> 2) + 0xd8; 109 | high = (code & 0xff00) >> 8; 110 | *p++ = ((plane & 0x03) << 6) + (high >> 2); 111 | *p++ = (high & 0x03) + 0xdc; 112 | *p = (UChar )(code & 0xff); 113 | return 4; 114 | } 115 | else { 116 | *p++ = (UChar )((code & 0xff00) >> 8); 117 | *p++ = (UChar )(code & 0xff); 118 | return 2; 119 | } 120 | } 121 | 122 | static int 123 | utf16be_mbc_case_fold(OnigCaseFoldType flag, 124 | const UChar** pp, const UChar* end, UChar* fold) 125 | { 126 | const UChar* p = *pp; 127 | 128 | if (ONIGENC_IS_ASCII_CODE(*(p+1)) && *p == 0) { 129 | p++; 130 | #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI 131 | if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { 132 | if (*p == 0x49) { 133 | *fold++ = 0x01; 134 | *fold = 0x31; 135 | (*pp) += 2; 136 | return 2; 137 | } 138 | } 139 | #endif 140 | 141 | *fold++ = 0; 142 | *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 143 | *pp += 2; 144 | return 2; 145 | } 146 | else 147 | return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_BE, flag, 148 | pp, end, fold); 149 | } 150 | 151 | #if 0 152 | static int 153 | utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) 154 | { 155 | const UChar* p = *pp; 156 | 157 | (*pp) += EncLen_UTF16[*p]; 158 | 159 | if (*p == 0) { 160 | int c, v; 161 | 162 | p++; 163 | if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { 164 | return TRUE; 165 | } 166 | 167 | c = *p; 168 | v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, 169 | (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); 170 | 171 | if ((v | BIT_CTYPE_LOWER) != 0) { 172 | /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ 173 | if (c >= 0xaa && c <= 0xba) 174 | return FALSE; 175 | else 176 | return TRUE; 177 | } 178 | return (v != 0 ? TRUE : FALSE); 179 | } 180 | 181 | return FALSE; 182 | } 183 | #endif 184 | 185 | static UChar* 186 | utf16be_left_adjust_char_head(const UChar* start, const UChar* s) 187 | { 188 | if (s <= start) return (UChar* )s; 189 | 190 | if ((s - start) % 2 == 1) { 191 | s--; 192 | } 193 | 194 | if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1) 195 | s -= 2; 196 | 197 | return (UChar* )s; 198 | } 199 | 200 | static int 201 | utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag, 202 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 203 | { 204 | return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_BE, 205 | flag, p, end, items); 206 | } 207 | 208 | OnigEncodingType OnigEncodingUTF16_BE = { 209 | utf16be_mbc_enc_len, 210 | "UTF-16BE", /* name */ 211 | 4, /* max byte length */ 212 | 2, /* min byte length */ 213 | utf16be_is_mbc_newline, 214 | utf16be_mbc_to_code, 215 | utf16be_code_to_mbclen, 216 | utf16be_code_to_mbc, 217 | utf16be_mbc_case_fold, 218 | onigenc_unicode_apply_all_case_fold, 219 | utf16be_get_case_fold_codes_by_str, 220 | onigenc_unicode_property_name_to_ctype, 221 | onigenc_unicode_is_code_ctype, 222 | onigenc_utf16_32_get_ctype_code_range, 223 | utf16be_left_adjust_char_head, 224 | onigenc_always_false_is_allowed_reverse_match 225 | }; 226 | -------------------------------------------------------------------------------- /oniguruma/enc/utf16_le.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | utf16_le.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | static const int EncLen_UTF16[] = { 33 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 34 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 35 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 36 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 37 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 38 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 39 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 40 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 41 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 42 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 43 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 44 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 45 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46 | 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 49 | }; 50 | 51 | static int 52 | utf16le_code_to_mbclen(OnigCodePoint code) 53 | { 54 | return (code > 0xffff ? 4 : 2); 55 | } 56 | 57 | static int 58 | utf16le_mbc_enc_len(const UChar* p) 59 | { 60 | return EncLen_UTF16[*(p+1)]; 61 | } 62 | 63 | static int 64 | utf16le_is_mbc_newline(const UChar* p, const UChar* end) 65 | { 66 | if (p + 1 < end) { 67 | if (*p == 0x0a && *(p+1) == 0x00) 68 | return 1; 69 | #ifdef USE_UNICODE_ALL_LINE_TERMINATORS 70 | if (( 71 | #ifndef USE_CRNL_AS_LINE_TERMINATOR 72 | *p == 0x0d || 73 | #endif 74 | *p == 0x85) && *(p+1) == 0x00) 75 | return 1; 76 | if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)) 77 | return 1; 78 | #endif 79 | } 80 | return 0; 81 | } 82 | 83 | static OnigCodePoint 84 | utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) 85 | { 86 | OnigCodePoint code; 87 | UChar c0 = *p; 88 | UChar c1 = *(p+1); 89 | 90 | if (UTF16_IS_SURROGATE_FIRST(c1)) { 91 | code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16) 92 | + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8) 93 | + p[2]; 94 | } 95 | else { 96 | code = c1 * 256 + p[0]; 97 | } 98 | return code; 99 | } 100 | 101 | static int 102 | utf16le_code_to_mbc(OnigCodePoint code, UChar *buf) 103 | { 104 | UChar* p = buf; 105 | 106 | if (code > 0xffff) { 107 | unsigned int plane, high; 108 | 109 | plane = (code >> 16) - 1; 110 | high = (code & 0xff00) >> 8; 111 | 112 | *p++ = ((plane & 0x03) << 6) + (high >> 2); 113 | *p++ = (plane >> 2) + 0xd8; 114 | *p++ = (UChar )(code & 0xff); 115 | *p = (high & 0x03) + 0xdc; 116 | return 4; 117 | } 118 | else { 119 | *p++ = (UChar )(code & 0xff); 120 | *p++ = (UChar )((code & 0xff00) >> 8); 121 | return 2; 122 | } 123 | } 124 | 125 | static int 126 | utf16le_mbc_case_fold(OnigCaseFoldType flag, 127 | const UChar** pp, const UChar* end, UChar* fold) 128 | { 129 | const UChar* p = *pp; 130 | 131 | if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) { 132 | #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI 133 | if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { 134 | if (*p == 0x49) { 135 | *fold++ = 0x31; 136 | *fold = 0x01; 137 | (*pp) += 2; 138 | return 2; 139 | } 140 | } 141 | #endif 142 | 143 | *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 144 | *fold = 0; 145 | *pp += 2; 146 | return 2; 147 | } 148 | else 149 | return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end, 150 | fold); 151 | } 152 | 153 | #if 0 154 | static int 155 | utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, 156 | const UChar* end) 157 | { 158 | const UChar* p = *pp; 159 | 160 | (*pp) += EncLen_UTF16[*(p+1)]; 161 | 162 | if (*(p+1) == 0) { 163 | int c, v; 164 | 165 | if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { 166 | return TRUE; 167 | } 168 | 169 | c = *p; 170 | v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, 171 | (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); 172 | if ((v | BIT_CTYPE_LOWER) != 0) { 173 | /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ 174 | if (c >= 0xaa && c <= 0xba) 175 | return FALSE; 176 | else 177 | return TRUE; 178 | } 179 | return (v != 0 ? TRUE : FALSE); 180 | } 181 | 182 | return FALSE; 183 | } 184 | #endif 185 | 186 | static UChar* 187 | utf16le_left_adjust_char_head(const UChar* start, const UChar* s) 188 | { 189 | if (s <= start) return (UChar* )s; 190 | 191 | if ((s - start) % 2 == 1) { 192 | s--; 193 | } 194 | 195 | if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1) 196 | s -= 2; 197 | 198 | return (UChar* )s; 199 | } 200 | 201 | static int 202 | utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag, 203 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 204 | { 205 | return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE, 206 | flag, p, end, items); 207 | } 208 | 209 | OnigEncodingType OnigEncodingUTF16_LE = { 210 | utf16le_mbc_enc_len, 211 | "UTF-16LE", /* name */ 212 | 4, /* max byte length */ 213 | 2, /* min byte length */ 214 | utf16le_is_mbc_newline, 215 | utf16le_mbc_to_code, 216 | utf16le_code_to_mbclen, 217 | utf16le_code_to_mbc, 218 | utf16le_mbc_case_fold, 219 | onigenc_unicode_apply_all_case_fold, 220 | utf16le_get_case_fold_codes_by_str, 221 | onigenc_unicode_property_name_to_ctype, 222 | onigenc_unicode_is_code_ctype, 223 | onigenc_utf16_32_get_ctype_code_range, 224 | utf16le_left_adjust_char_head, 225 | onigenc_always_false_is_allowed_reverse_match 226 | }; 227 | -------------------------------------------------------------------------------- /oniguruma/win32/Makefile: -------------------------------------------------------------------------------- 1 | # Oniguruma Makefile for Win32 2 | 3 | product_name = oniguruma 4 | 5 | CPPFLAGS = 6 | CFLAGS = -O2 -nologo /W3 7 | LDFLAGS = 8 | LOADLIBES = 9 | ARLIB = lib 10 | ARLIB_FLAGS = -nologo 11 | ARDLL = cl 12 | ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll 13 | LINKFLAGS = -link -incremental:no -pdb:none 14 | 15 | INSTALL = install -c 16 | CP = copy 17 | CC = cl 18 | DEFS = -DHAVE_CONFIG_H -DNOT_RUBY -DEXPORT 19 | RUBYDIR = .. 20 | 21 | subdirs = 22 | 23 | libbase = onig 24 | libname = $(libbase)_s.lib 25 | dllname = $(libbase).dll 26 | dlllib = $(libbase).lib 27 | 28 | onigheaders = oniguruma.h regint.h regparse.h regenc.h st.h 29 | posixheaders = onigposix.h 30 | headers = $(posixheaders) $(onigheaders) 31 | 32 | onigobjs = reggnu.obj regerror.obj regparse.obj regext.obj regcomp.obj \ 33 | regexec.obj regenc.obj regsyntax.obj regtrav.obj \ 34 | regversion.obj st.obj 35 | posixobjs = regposix.obj regposerr.obj 36 | libobjs = $(onigobjs) $(posixobjs) 37 | 38 | jp_objs = $(encdir)\euc_jp.obj $(encdir)\sjis.obj 39 | iso8859_objs = $(encdir)\iso8859_1.obj $(encdir)\iso8859_2.obj \ 40 | $(encdir)\iso8859_3.obj $(encdir)\iso8859_4.obj \ 41 | $(encdir)\iso8859_5.obj $(encdir)\iso8859_6.obj \ 42 | $(encdir)\iso8859_7.obj $(encdir)\iso8859_8.obj \ 43 | $(encdir)\iso8859_9.obj $(encdir)\iso8859_10.obj \ 44 | $(encdir)\iso8859_11.obj $(encdir)\iso8859_13.obj \ 45 | $(encdir)\iso8859_14.obj $(encdir)\iso8859_15.obj \ 46 | $(encdir)\iso8859_16.obj 47 | 48 | encobjs = $(encdir)\ascii.obj $(encdir)\utf8.obj \ 49 | $(encdir)\unicode.obj \ 50 | $(encdir)\utf16_be.obj $(encdir)\utf16_le.obj \ 51 | $(encdir)\utf32_be.obj $(encdir)\utf32_le.obj \ 52 | $(jp_objs) $(iso8859_objs) \ 53 | $(encdir)\euc_tw.obj $(encdir)\euc_kr.obj $(encdir)\big5.obj \ 54 | $(encdir)\gb18030.obj \ 55 | $(encdir)\koi8_r.obj \ 56 | $(encdir)\cp1251.obj # $(encdir)\koi8.obj 57 | 58 | onigsources = regerror.c regparse.c regext.c regcomp.c regexec.c regenc.c \ 59 | regsyntax.c regtrav.c regversion.c reggnu.c st.c 60 | posixsources = regposix.c regposerr.c 61 | libsources = $(posixsources) $(onigsources) 62 | rubysources = $(onigsources) 63 | 64 | encdir = enc 65 | patchfiles = re.c.168.patch re.c.181.patch 66 | distfiles = README COPYING HISTORY \ 67 | Makefile.in configure.in config.h.in configure \ 68 | $(headers) $(libsources) $(patchfiles) \ 69 | test.rb testconv.rb 70 | testc = testc 71 | testp = testp 72 | 73 | makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' 74 | 75 | .SUFFIXES: 76 | .SUFFIXES: .obj .c .h .ps .dvi .info .texinfo 77 | 78 | .c.obj: 79 | $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $< 80 | 81 | # targets 82 | default: all 83 | 84 | setup: 85 | $(CP) win32\config.h config.h 86 | $(CP) win32\testc.c testc.c 87 | 88 | 89 | all: $(libname) $(dllname) 90 | 91 | $(libname): $(libobjs) $(encobjs) 92 | $(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs) $(encobjs) 93 | 94 | $(dllname): $(libobjs) $(encobjs) 95 | $(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS) 96 | 97 | regparse.obj: regparse.c $(onigheaders) config.h st.h 98 | regext.obj: regext.c $(onigheaders) config.h 99 | regtrav.obj: regtrav.c $(onigheaders) config.h 100 | regcomp.obj: regcomp.c $(onigheaders) config.h 101 | regexec.obj: regexec.c regint.h regenc.h oniguruma.h config.h 102 | reggnu.obj: reggnu.c regint.h regenc.h oniguruma.h config.h oniggnu.h 103 | regerror.obj: regerror.c regint.h regenc.h oniguruma.h config.h 104 | regenc.obj: regenc.c regenc.h oniguruma.h config.h 105 | regsyntax.obj: regsyntax.c regint.h regenc.h oniguruma.h config.h 106 | regversion.obj: regversion.c oniguruma.h config.h 107 | regposix.obj: regposix.c $(posixheaders) oniguruma.h config.h 108 | regposerr.obj: regposerr.c $(posixheaders) config.h 109 | st.obj: st.c regint.h oniguruma.h config.h st.h 110 | 111 | $(encdir)\ascii.obj: $(encdir)\ascii.c regenc.h config.h 112 | $(encdir)\unicode.obj: $(encdir)\unicode.c regenc.h config.h 113 | $(encdir)\utf8.obj: $(encdir)\utf8.c regenc.h config.h 114 | $(encdir)\utf16_be.obj: $(encdir)\utf16_be.c regenc.h config.h 115 | $(encdir)\utf16_le.obj: $(encdir)\utf16_le.c regenc.h config.h 116 | $(encdir)\utf32_be.obj: $(encdir)\utf32_be.c regenc.h config.h 117 | $(encdir)\utf32_le.obj: $(encdir)\utf32_le.c regenc.h config.h 118 | $(encdir)\euc_jp.obj: $(encdir)\euc_jp.c regenc.h config.h 119 | $(encdir)\euc_tw.obj: $(encdir)\euc_tw.c regenc.h config.h 120 | $(encdir)\euc_kr.obj: $(encdir)\euc_kr.c regenc.h config.h 121 | $(encdir)\sjis.obj: $(encdir)\sjis.c regenc.h config.h 122 | $(encdir)\iso8859_1.obj: $(encdir)\iso8859_1.c regenc.h config.h 123 | $(encdir)\iso8859_2.obj: $(encdir)\iso8859_2.c regenc.h config.h 124 | $(encdir)\iso8859_3.obj: $(encdir)\iso8859_3.c regenc.h config.h 125 | $(encdir)\iso8859_4.obj: $(encdir)\iso8859_4.c regenc.h config.h 126 | $(encdir)\iso8859_5.obj: $(encdir)\iso8859_5.c regenc.h config.h 127 | $(encdir)\iso8859_6.obj: $(encdir)\iso8859_6.c regenc.h config.h 128 | $(encdir)\iso8859_7.obj: $(encdir)\iso8859_7.c regenc.h config.h 129 | $(encdir)\iso8859_8.obj: $(encdir)\iso8859_8.c regenc.h config.h 130 | $(encdir)\iso8859_9.obj: $(encdir)\iso8859_9.c regenc.h config.h 131 | $(encdir)\iso8859_10.obj: $(encdir)\iso8859_10.c regenc.h config.h 132 | $(encdir)\iso8859_11.obj: $(encdir)\iso8859_11.c regenc.h config.h 133 | $(encdir)\iso8859_13.obj: $(encdir)\iso8859_13.c regenc.h config.h 134 | $(encdir)\iso8859_14.obj: $(encdir)\iso8859_14.c regenc.h config.h 135 | $(encdir)\iso8859_15.obj: $(encdir)\iso8859_15.c regenc.h config.h 136 | $(encdir)\iso8859_16.obj: $(encdir)\iso8859_16.c regenc.h config.h 137 | $(encdir)\koi8.obj: $(encdir)\koi8.c regenc.h config.h 138 | $(encdir)\koi8_r.obj: $(encdir)\koi8_r.c regenc.h config.h 139 | $(encdir)\cp1251.obj: $(encdir)\cp1251.c regenc.h config.h 140 | $(encdir)\big5.obj: $(encdir)\big5.c regenc.h config.h 141 | $(encdir)\gb18030.obj: $(encdir)\gb18030.c regenc.h config.h 142 | 143 | 144 | # Ruby test 145 | rtest: 146 | $(RUBYDIR)\win32\ruby -w -Ke test.rb 147 | 148 | # C library test 149 | ctest: $(testc) 150 | .\$(testc) 151 | 152 | # POSIX C library test 153 | ptest: $(testp) 154 | .\$(testp) 155 | 156 | $(testc): $(testc).c $(libname) 157 | $(CC) -nologo -o $(testc) -DONIG_EXTERN=extern $(testc).c $(libname) 158 | 159 | $(testp): $(testc).c $(dlllib) 160 | $(CC) -nologo -DPOSIX_TEST -o $(testp) $(testc).c $(dlllib) 161 | 162 | #$(testc)u.c: test.rb testconvu.rb 163 | # ruby -Ke testconvu.rb test.rb > $@ 164 | 165 | $(testc)u: $(testc)u.c $(libname) 166 | $(CC) -nologo -o $(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname) 167 | 168 | clean: 169 | del *.obj $(encdir)\*.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj 170 | 171 | 172 | # backup file suffix 173 | SORIG = ruby_orig 174 | 175 | # ruby 1.9 source update 176 | 19: 177 | $(CP) regerror.c $(RUBYDIR) 178 | $(CP) regparse.c $(RUBYDIR) 179 | $(CP) regcomp.c $(RUBYDIR) 180 | $(CP) regexec.c $(RUBYDIR) 181 | $(CP) regenc.c $(RUBYDIR) 182 | $(CP) regint.h $(RUBYDIR) 183 | $(CP) regparse.h $(RUBYDIR) 184 | $(CP) regenc.h $(RUBYDIR) 185 | $(CP) oniguruma.h $(RUBYDIR) 186 | $(CP) enc\ascii.c $(RUBYDIR) 187 | $(CP) enc\utf8.c $(RUBYDIR) 188 | $(CP) enc\euc_jp.c $(RUBYDIR) 189 | $(CP) enc\sjis.c $(RUBYDIR) 190 | $(CP) enc\unicode.c $(RUBYDIR) 191 | 192 | 193 | samples: all 194 | $(CC) $(CFLAGS) -I. -o simple sample\simple.c $(dlllib) 195 | $(CC) $(CFLAGS) -I. -o posix sample\posix.c $(dlllib) 196 | $(CC) $(CFLAGS) -I. -o names sample\names.c $(dlllib) 197 | $(CC) $(CFLAGS) -I. -o listcap sample\listcap.c $(dlllib) 198 | $(CC) $(CFLAGS) -I. -o sql sample\sql.c $(dlllib) 199 | $(CC) $(CFLAGS) -I. -o encode sample\encode.c $(dlllib) 200 | $(CC) $(CFLAGS) -I. -o syntax sample\syntax.c $(dlllib) 201 | -------------------------------------------------------------------------------- /oniguruma/enc/euc_jp.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | euc_jp.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2008 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regint.h" 31 | 32 | #define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) 33 | 34 | static const int EncLen_EUCJP[] = { 35 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 44 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 45 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 46 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 48 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 49 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 50 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 51 | }; 52 | 53 | static int 54 | mbc_enc_len(const UChar* p) 55 | { 56 | return EncLen_EUCJP[*p]; 57 | } 58 | 59 | static OnigCodePoint 60 | mbc_to_code(const UChar* p, const UChar* end) 61 | { 62 | int c, i, len; 63 | OnigCodePoint n; 64 | 65 | len = enclen(ONIG_ENCODING_EUC_JP, p); 66 | n = (OnigCodePoint )*p++; 67 | if (len == 1) return n; 68 | 69 | for (i = 1; i < len; i++) { 70 | if (p >= end) break; 71 | c = *p++; 72 | n <<= 8; n += c; 73 | } 74 | return n; 75 | } 76 | 77 | static int 78 | code_to_mbclen(OnigCodePoint code) 79 | { 80 | if (ONIGENC_IS_CODE_ASCII(code)) return 1; 81 | else if ((code & 0xff0000) != 0) return 3; 82 | else if ((code & 0xff00) != 0) return 2; 83 | else 84 | return ONIGERR_INVALID_CODE_POINT_VALUE; 85 | } 86 | 87 | #if 0 88 | static int 89 | code_to_mbc_first(OnigCodePoint code) 90 | { 91 | int first; 92 | 93 | if ((code & 0xff0000) != 0) { 94 | first = (code >> 16) & 0xff; 95 | } 96 | else if ((code & 0xff00) != 0) { 97 | first = (code >> 8) & 0xff; 98 | } 99 | else { 100 | return (int )code; 101 | } 102 | return first; 103 | } 104 | #endif 105 | 106 | static int 107 | code_to_mbc(OnigCodePoint code, UChar *buf) 108 | { 109 | UChar *p = buf; 110 | 111 | if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff)); 112 | if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); 113 | *p++ = (UChar )(code & 0xff); 114 | 115 | #if 1 116 | if (enclen(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) 117 | return ONIGERR_INVALID_CODE_POINT_VALUE; 118 | #endif 119 | return p - buf; 120 | } 121 | 122 | static int 123 | mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, 124 | const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) 125 | { 126 | int len; 127 | const UChar* p = *pp; 128 | 129 | if (ONIGENC_IS_MBC_ASCII(p)) { 130 | *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 131 | (*pp)++; 132 | return 1; 133 | } 134 | else { 135 | int i; 136 | 137 | len = enclen(ONIG_ENCODING_EUC_JP, p); 138 | for (i = 0; i < len; i++) { 139 | *lower++ = *p++; 140 | } 141 | (*pp) += len; 142 | return len; /* return byte length of converted char to lower */ 143 | } 144 | } 145 | 146 | static UChar* 147 | left_adjust_char_head(const UChar* start, const UChar* s) 148 | { 149 | /* In this encoding 150 | mb-trail bytes doesn't mix with single bytes. 151 | */ 152 | const UChar *p; 153 | int len; 154 | 155 | if (s <= start) return (UChar* )s; 156 | p = s; 157 | 158 | while (!eucjp_islead(*p) && p > start) p--; 159 | len = enclen(ONIG_ENCODING_EUC_JP, p); 160 | if (p + len > s) return (UChar* )p; 161 | p += len; 162 | return (UChar* )(p + ((s - p) & ~1)); 163 | } 164 | 165 | static int 166 | is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) 167 | { 168 | const UChar c = *s; 169 | if (c <= 0x7e || c == 0x8e || c == 0x8f) 170 | return TRUE; 171 | else 172 | return FALSE; 173 | } 174 | 175 | 176 | static int PropertyInited = 0; 177 | static const OnigCodePoint** PropertyList; 178 | static int PropertyListNum; 179 | static int PropertyListSize; 180 | static hash_table_type* PropertyNameTable; 181 | 182 | static const OnigCodePoint CR_Hiragana[] = { 183 | 1, 184 | 0xa4a1, 0xa4f3 185 | }; /* CR_Hiragana */ 186 | 187 | static const OnigCodePoint CR_Katakana[] = { 188 | 3, 189 | 0xa5a1, 0xa5f6, 190 | 0xaaa6, 0xaaaf, 191 | 0xaab1, 0xaadd 192 | }; /* CR_Katakana */ 193 | 194 | static int 195 | init_property_list(void) 196 | { 197 | int r; 198 | 199 | PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); 200 | PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); 201 | PropertyInited = 1; 202 | 203 | end: 204 | return r; 205 | } 206 | 207 | static int 208 | property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) 209 | { 210 | hash_data_type ctype; 211 | 212 | PROPERTY_LIST_INIT_CHECK; 213 | 214 | if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { 215 | return onigenc_minimum_property_name_to_ctype(enc, p, end); 216 | } 217 | 218 | return (int )ctype; 219 | } 220 | 221 | static int 222 | is_code_ctype(OnigCodePoint code, unsigned int ctype) 223 | { 224 | if (ctype <= ONIGENC_MAX_STD_CTYPE) { 225 | if (code < 128) 226 | return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 227 | else { 228 | if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { 229 | return (code_to_mbclen(code) > 1 ? TRUE : FALSE); 230 | } 231 | } 232 | } 233 | else { 234 | PROPERTY_LIST_INIT_CHECK; 235 | 236 | ctype -= (ONIGENC_MAX_STD_CTYPE + 1); 237 | if (ctype >= (unsigned int )PropertyListNum) 238 | return ONIGERR_TYPE_BUG; 239 | 240 | return onig_is_in_code_range((UChar* )PropertyList[ctype], code); 241 | } 242 | 243 | return FALSE; 244 | } 245 | 246 | static int 247 | get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, 248 | const OnigCodePoint* ranges[]) 249 | { 250 | if (ctype <= ONIGENC_MAX_STD_CTYPE) { 251 | return ONIG_NO_SUPPORT_CONFIG; 252 | } 253 | else { 254 | *sb_out = 0x80; 255 | 256 | PROPERTY_LIST_INIT_CHECK; 257 | 258 | ctype -= (ONIGENC_MAX_STD_CTYPE + 1); 259 | if (ctype >= (OnigCtype )PropertyListNum) 260 | return ONIGERR_TYPE_BUG; 261 | 262 | *ranges = PropertyList[ctype]; 263 | return 0; 264 | } 265 | } 266 | 267 | 268 | OnigEncodingType OnigEncodingEUC_JP = { 269 | mbc_enc_len, 270 | "EUC-JP", /* name */ 271 | 3, /* max enc length */ 272 | 1, /* min enc length */ 273 | onigenc_is_mbc_newline_0x0a, 274 | mbc_to_code, 275 | code_to_mbclen, 276 | code_to_mbc, 277 | mbc_case_fold, 278 | onigenc_ascii_apply_all_case_fold, 279 | onigenc_ascii_get_case_fold_codes_by_str, 280 | property_name_to_ctype, 281 | is_code_ctype, 282 | get_ctype_code_range, 283 | left_adjust_char_head, 284 | is_allowed_reverse_match 285 | }; 286 | -------------------------------------------------------------------------------- /oniguruma/enc/cp1251.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | cp1251.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2006-2007 Byte 6 | * K.Kosako 7 | * All rights reserved. 8 | * 9 | * Redistribution and use in source and binary forms, with or without 10 | * modification, are permitted provided that the following conditions 11 | * are met: 12 | * 1. Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * 2. Redistributions in binary form must reproduce the above copyright 15 | * notice, this list of conditions and the following disclaimer in the 16 | * documentation and/or other materials provided with the distribution. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 | * SUCH DAMAGE. 29 | */ 30 | 31 | #include "regenc.h" 32 | 33 | #define ENC_CP1251_TO_LOWER_CASE(c) EncCP1251_ToLowerCaseTable[c] 34 | #define ENC_IS_CP1251_CTYPE(code,ctype) \ 35 | ((EncCP1251_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) 36 | 37 | static const UChar EncCP1251_ToLowerCaseTable[256] = { 38 | '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 39 | '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 40 | '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 41 | '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 42 | '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 43 | '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 44 | '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 45 | '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 46 | '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 47 | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 48 | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 49 | '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', 50 | '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 51 | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 52 | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 53 | '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', 54 | '\220', '\203', '\202', '\203', '\204', '\205', '\206', '\207', 55 | '\210', '\211', '\232', '\213', '\234', '\235', '\236', '\237', 56 | '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 57 | '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 58 | '\240', '\242', '\242', '\274', '\244', '\264', '\246', '\247', 59 | '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277', 60 | '\260', '\261', '\263', '\263', '\264', '\265', '\266', '\267', 61 | '\270', '\271', '\272', '\273', '\274', '\276', '\276', '\277', 62 | '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 63 | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 64 | '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 65 | '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', 66 | '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 67 | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 68 | '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 69 | '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' 70 | }; 71 | 72 | static const unsigned short EncCP1251_CtypeTable[256] = { 73 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 74 | 0x4008, 0x428c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 75 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 76 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 77 | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 78 | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 79 | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 80 | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 81 | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 82 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 83 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 84 | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 85 | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 86 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 87 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 88 | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 89 | 0x34a2, 0x34a2, 0x01a0, 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 90 | 0x0000, 0x01a0, 0x34a2, 0x01a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 91 | 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 92 | 0x0008, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 93 | 0x0280, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, 94 | 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x34a2, 95 | 0x01a0, 0x01a0, 0x34a2, 0x30e2, 0x30e2, 0x31e2, 0x01a0, 0x01a0, 96 | 0x30e2, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x34a2, 0x30e2, 0x30e2, 97 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 98 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 99 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 100 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 101 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 102 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 103 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 104 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 105 | }; 106 | 107 | static int 108 | cp1251_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, 109 | const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) 110 | { 111 | const UChar* p = *pp; 112 | 113 | *lower = ENC_CP1251_TO_LOWER_CASE(*p); 114 | (*pp)++; 115 | return 1; 116 | } 117 | 118 | static int 119 | cp1251_is_code_ctype(OnigCodePoint code, unsigned int ctype) 120 | { 121 | if (code < 256) 122 | return ENC_IS_CP1251_CTYPE(code, ctype); 123 | else 124 | return FALSE; 125 | } 126 | 127 | static const OnigPairCaseFoldCodes CaseFoldMap[] = { 128 | { 0xb8, 0xa8 }, 129 | 130 | { 0xe0, 0xc0 }, 131 | { 0xe1, 0xc1 }, 132 | { 0xe2, 0xc2 }, 133 | { 0xe3, 0xc3 }, 134 | { 0xe4, 0xc4 }, 135 | { 0xe5, 0xc5 }, 136 | { 0xe6, 0xc6 }, 137 | { 0xe7, 0xc7 }, 138 | { 0xe8, 0xc8 }, 139 | { 0xe9, 0xc9 }, 140 | { 0xea, 0xca }, 141 | { 0xeb, 0xcb }, 142 | { 0xec, 0xcc }, 143 | { 0xed, 0xcd }, 144 | { 0xee, 0xce }, 145 | { 0xef, 0xcf }, 146 | 147 | { 0xf0, 0xd0 }, 148 | { 0xf1, 0xd1 }, 149 | { 0xf2, 0xd2 }, 150 | { 0xf3, 0xd3 }, 151 | { 0xf4, 0xd4 }, 152 | { 0xf5, 0xd5 }, 153 | { 0xf6, 0xd6 }, 154 | { 0xf7, 0xd7 }, 155 | { 0xf8, 0xd8 }, 156 | { 0xf9, 0xd9 }, 157 | { 0xfa, 0xda }, 158 | { 0xfb, 0xdb }, 159 | { 0xfc, 0xdc }, 160 | { 0xfd, 0xdd }, 161 | { 0xfe, 0xde }, 162 | { 0xff, 0xdf } 163 | }; 164 | 165 | static int 166 | cp1251_apply_all_case_fold(OnigCaseFoldType flag, 167 | OnigApplyAllCaseFoldFunc f, void* arg) 168 | { 169 | return onigenc_apply_all_case_fold_with_map( 170 | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, 171 | flag, f, arg); 172 | } 173 | 174 | static int 175 | cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag, 176 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 177 | { 178 | return onigenc_get_case_fold_codes_by_str_with_map( 179 | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, 180 | flag, p, end, items); 181 | } 182 | 183 | OnigEncodingType OnigEncodingCP1251 = { 184 | onigenc_single_byte_mbc_enc_len, 185 | "CP1251", /* name */ 186 | 1, /* max enc length */ 187 | 1, /* min enc length */ 188 | onigenc_is_mbc_newline_0x0a, 189 | onigenc_single_byte_mbc_to_code, 190 | onigenc_single_byte_code_to_mbclen, 191 | onigenc_single_byte_code_to_mbc, 192 | cp1251_mbc_case_fold, 193 | cp1251_apply_all_case_fold, 194 | cp1251_get_case_fold_codes_by_str, 195 | onigenc_minimum_property_name_to_ctype, 196 | cp1251_is_code_ctype, 197 | onigenc_not_support_get_ctype_code_range, 198 | onigenc_single_byte_left_adjust_char_head, 199 | onigenc_always_true_is_allowed_reverse_match 200 | }; 201 | -------------------------------------------------------------------------------- /oniguruma/enc/koi8_r.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | koi8_r.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | #define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c] 33 | #define ENC_IS_KOI8_R_CTYPE(code,ctype) \ 34 | ((EncKOI8_R_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) 35 | 36 | static const UChar EncKOI8_R_ToLowerCaseTable[256] = { 37 | '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 38 | '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 39 | '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 40 | '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 41 | '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 42 | '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 43 | '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 44 | '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 45 | '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 46 | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 47 | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 48 | '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', 49 | '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 50 | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 51 | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 52 | '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', 53 | '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 54 | '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 55 | '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 56 | '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 57 | '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 58 | '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 59 | '\260', '\261', '\262', '\243', '\264', '\265', '\266', '\267', 60 | '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', 61 | '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 62 | '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 63 | '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', 64 | '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', 65 | '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 66 | '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 67 | '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', 68 | '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' 69 | }; 70 | 71 | static const unsigned short EncKOI8_R_CtypeTable[256] = { 72 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 73 | 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 74 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 75 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 76 | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 77 | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 78 | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 79 | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 80 | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 81 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 82 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 83 | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 84 | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 85 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 86 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 87 | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 88 | 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 89 | 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 90 | 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 91 | 0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0, 92 | 0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 93 | 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 94 | 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 95 | 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 96 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 97 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 98 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 99 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 100 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 101 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 102 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 103 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2 104 | }; 105 | 106 | static int 107 | koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, 108 | const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) 109 | { 110 | const UChar* p = *pp; 111 | 112 | *lower = ENC_KOI8_R_TO_LOWER_CASE(*p); 113 | (*pp)++; 114 | return 1; 115 | } 116 | 117 | #if 0 118 | static int 119 | koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) 120 | { 121 | int v; 122 | const UChar* p = *pp; 123 | 124 | (*pp)++; 125 | v = (EncKOI8_R_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); 126 | return (v != 0 ? TRUE : FALSE); 127 | } 128 | #endif 129 | 130 | static int 131 | koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype) 132 | { 133 | if (code < 256) 134 | return ENC_IS_KOI8_R_CTYPE(code, ctype); 135 | else 136 | return FALSE; 137 | } 138 | 139 | static const OnigPairCaseFoldCodes CaseFoldMap[] = { 140 | { 0xa3, 0xb3 }, 141 | 142 | { 0xc0, 0xe0 }, 143 | { 0xc1, 0xe1 }, 144 | { 0xc2, 0xe2 }, 145 | { 0xc3, 0xe3 }, 146 | { 0xc4, 0xe4 }, 147 | { 0xc5, 0xe5 }, 148 | { 0xc6, 0xe6 }, 149 | { 0xc7, 0xe7 }, 150 | { 0xc8, 0xe8 }, 151 | { 0xc9, 0xe9 }, 152 | { 0xca, 0xea }, 153 | { 0xcb, 0xeb }, 154 | { 0xcc, 0xec }, 155 | { 0xcd, 0xed }, 156 | { 0xce, 0xee }, 157 | { 0xcf, 0xef }, 158 | 159 | { 0xd0, 0xf0 }, 160 | { 0xd1, 0xf1 }, 161 | { 0xd2, 0xf2 }, 162 | { 0xd3, 0xf3 }, 163 | { 0xd4, 0xf4 }, 164 | { 0xd5, 0xf5 }, 165 | { 0xd6, 0xf6 }, 166 | { 0xd7, 0xf7 }, 167 | { 0xd8, 0xf8 }, 168 | { 0xd9, 0xf9 }, 169 | { 0xda, 0xfa }, 170 | { 0xdb, 0xfb }, 171 | { 0xdc, 0xfc }, 172 | { 0xdd, 0xfd }, 173 | { 0xde, 0xfe }, 174 | { 0xdf, 0xff } 175 | }; 176 | 177 | static int 178 | koi8_r_apply_all_case_fold(OnigCaseFoldType flag, 179 | OnigApplyAllCaseFoldFunc f, void* arg) 180 | { 181 | return onigenc_apply_all_case_fold_with_map( 182 | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, 183 | flag, f, arg); 184 | } 185 | 186 | static int 187 | koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag, 188 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 189 | { 190 | return onigenc_get_case_fold_codes_by_str_with_map( 191 | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, 192 | flag, p, end, items); 193 | } 194 | 195 | OnigEncodingType OnigEncodingKOI8_R = { 196 | onigenc_single_byte_mbc_enc_len, 197 | "KOI8-R", /* name */ 198 | 1, /* max enc length */ 199 | 1, /* min enc length */ 200 | onigenc_is_mbc_newline_0x0a, 201 | onigenc_single_byte_mbc_to_code, 202 | onigenc_single_byte_code_to_mbclen, 203 | onigenc_single_byte_code_to_mbc, 204 | koi8_r_mbc_case_fold, 205 | koi8_r_apply_all_case_fold, 206 | koi8_r_get_case_fold_codes_by_str, 207 | onigenc_minimum_property_name_to_ctype, 208 | koi8_r_is_code_ctype, 209 | onigenc_not_support_get_ctype_code_range, 210 | onigenc_single_byte_left_adjust_char_head, 211 | onigenc_always_true_is_allowed_reverse_match 212 | }; 213 | -------------------------------------------------------------------------------- /oniguruma/enc/iso8859_7.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | iso8859_7.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | #define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c] 33 | #define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \ 34 | ((EncISO_8859_7_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) 35 | 36 | static const UChar EncISO_8859_7_ToLowerCaseTable[256] = { 37 | '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 38 | '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 39 | '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 40 | '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 41 | '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 42 | '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 43 | '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 44 | '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 45 | '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 46 | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 47 | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 48 | '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', 49 | '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 50 | '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 51 | '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 52 | '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', 53 | '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 54 | '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 55 | '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 56 | '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 57 | '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 58 | '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 59 | '\260', '\261', '\262', '\263', '\264', '\265', '\334', '\267', 60 | '\335', '\336', '\337', '\273', '\374', '\275', '\375', '\376', 61 | '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 62 | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 63 | '\360', '\361', '\322', '\363', '\364', '\365', '\366', '\367', 64 | '\370', '\371', '\372', '\373', '\334', '\335', '\336', '\337', 65 | '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 66 | '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 67 | '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 68 | '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' 69 | }; 70 | 71 | static const unsigned short EncISO_8859_7_CtypeTable[256] = { 72 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 73 | 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 74 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 75 | 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 76 | 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 77 | 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 78 | 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 79 | 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 80 | 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 81 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 82 | 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 83 | 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 84 | 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 85 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 86 | 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 87 | 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 88 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 89 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 90 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 91 | 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 92 | 0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0, 93 | 0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0, 94 | 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x34a2, 0x01a0, 95 | 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x10a0, 0x34a2, 0x34a2, 96 | 0x30e2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 97 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 98 | 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 99 | 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 100 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 101 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 102 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 103 | 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x0000 104 | }; 105 | 106 | static int 107 | mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, 108 | const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) 109 | { 110 | const UChar* p = *pp; 111 | 112 | *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p); 113 | (*pp)++; 114 | return 1; 115 | } 116 | 117 | #if 0 118 | static int 119 | is_mbc_ambiguous(OnigCaseFoldType flag, 120 | const UChar** pp, const UChar* end) 121 | { 122 | int v; 123 | const UChar* p = *pp; 124 | 125 | (*pp)++; 126 | v = (EncISO_8859_7_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); 127 | if ((v | BIT_CTYPE_LOWER) != 0) { 128 | if (*p == 0xc0 || *p == 0xe0) 129 | return FALSE; 130 | else 131 | return TRUE; 132 | } 133 | 134 | return (v != 0 ? TRUE : FALSE); 135 | } 136 | #endif 137 | 138 | static int 139 | is_code_ctype(OnigCodePoint code, unsigned int ctype) 140 | { 141 | if (code < 256) 142 | return ENC_IS_ISO_8859_7_CTYPE(code, ctype); 143 | else 144 | return FALSE; 145 | } 146 | 147 | static const OnigPairCaseFoldCodes CaseFoldMap[] = { 148 | { 0xb6, 0xdc }, 149 | { 0xb8, 0xdd }, 150 | { 0xb9, 0xde }, 151 | { 0xba, 0xdf }, 152 | { 0xbc, 0xfc }, 153 | { 0xbe, 0xfd }, 154 | { 0xbf, 0xfe }, 155 | 156 | { 0xc1, 0xe1 }, 157 | { 0xc2, 0xe2 }, 158 | { 0xc3, 0xe3 }, 159 | { 0xc4, 0xe4 }, 160 | { 0xc5, 0xe5 }, 161 | { 0xc6, 0xe6 }, 162 | { 0xc7, 0xe7 }, 163 | { 0xc8, 0xe8 }, 164 | { 0xc9, 0xe9 }, 165 | { 0xca, 0xea }, 166 | { 0xcb, 0xeb }, 167 | { 0xcc, 0xec }, 168 | { 0xcd, 0xed }, 169 | { 0xce, 0xee }, 170 | { 0xcf, 0xef }, 171 | 172 | { 0xd0, 0xf0 }, 173 | { 0xd1, 0xf1 }, 174 | { 0xd2, 0xf2 }, 175 | { 0xd3, 0xf3 }, 176 | { 0xd4, 0xf4 }, 177 | { 0xd5, 0xf5 }, 178 | { 0xd6, 0xf6 }, 179 | { 0xd7, 0xf7 }, 180 | { 0xd8, 0xf8 }, 181 | { 0xd9, 0xf9 }, 182 | { 0xda, 0xfa }, 183 | { 0xdb, 0xfb } 184 | }; 185 | 186 | static int 187 | apply_all_case_fold(OnigCaseFoldType flag, 188 | OnigApplyAllCaseFoldFunc f, void* arg) 189 | { 190 | return onigenc_apply_all_case_fold_with_map( 191 | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, 192 | flag, f, arg); 193 | } 194 | 195 | static int 196 | get_case_fold_codes_by_str(OnigCaseFoldType flag, 197 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 198 | { 199 | return onigenc_get_case_fold_codes_by_str_with_map( 200 | sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, 201 | flag, p, end, items); 202 | } 203 | 204 | 205 | OnigEncodingType OnigEncodingISO_8859_7 = { 206 | onigenc_single_byte_mbc_enc_len, 207 | "ISO-8859-7", /* name */ 208 | 1, /* max enc length */ 209 | 1, /* min enc length */ 210 | onigenc_is_mbc_newline_0x0a, 211 | onigenc_single_byte_mbc_to_code, 212 | onigenc_single_byte_code_to_mbclen, 213 | onigenc_single_byte_code_to_mbc, 214 | mbc_case_fold, 215 | apply_all_case_fold, 216 | get_case_fold_codes_by_str, 217 | onigenc_minimum_property_name_to_ctype, 218 | is_code_ctype, 219 | onigenc_not_support_get_ctype_code_range, 220 | onigenc_single_byte_left_adjust_char_head, 221 | onigenc_always_true_is_allowed_reverse_match 222 | }; 223 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(reason) 3 | 4 | set(PROJECT_MAJOR_VERSION 1) 5 | set(PROJECT_MINOR_VERSION 2) 6 | set(PROJECT_PATCH_VERSION 9) 7 | 8 | set (PROJECT_VERSION ${PROJECT_MAJOR_VERSION}.${PROJECT_MINOR_VERSION}.${PROJECT_PATCH_VERSION}) 9 | set (CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/CMakeModules) 10 | 11 | INCLUDE (CheckFunctionExists) 12 | INCLUDE (CheckIncludeFiles) 13 | INCLUDE (CheckTypeSize) 14 | 15 | CHECK_FUNCTION_EXISTS(alloca C_ALLOCA) 16 | CHECK_FUNCTION_EXISTS(memcmp HAVE_MEMCMP) 17 | CHECK_FUNCTION_EXISTS(strndup HAVE_STRNDUP) 18 | CHECK_FUNCTION_EXISTS(strnlen HAVE_STRNLEN) 19 | 20 | CHECK_INCLUDE_FILES(alloca.h HAVE_ALLOCA_H) 21 | CHECK_INCLUDE_FILES(strings.h HAVE_STRINGS_H) 22 | CHECK_INCLUDE_FILES(string.h HAVE_STRING_H) 23 | CHECK_INCLUDE_FILES(stdlib.h HAVE_STDLIB_H) 24 | CHECK_INCLUDE_FILES(sys/time.h HAVE_SYS_TIME_H) 25 | CHECK_INCLUDE_FILES(sys/times.h HAVE_SYS_TIMES_H) 26 | CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) 27 | CHECK_INCLUDE_FILES(memory.h HAVE_MEMORY_H) 28 | CHECK_INCLUDE_FILES(stdarg.h HAVE_STDARG_PROTOTYPES) 29 | CHECK_INCLUDE_FILES(sys/tree.h HAVE_SYS_TREE) 30 | CHECK_INCLUDE_FILES(sys/queue.h HAVE_SYS_QUEUE) 31 | CHECK_INCLUDE_FILES(sys/un.h HAVE_SYS_UN) 32 | 33 | CHECK_TYPE_SIZE("int" SIZEOF_INT) 34 | CHECK_TYPE_SIZE("long" SIZEOF_LONG) 35 | CHECK_TYPE_SIZE("short" SIZEOF_SHORT) 36 | 37 | if (NOT HAVE_SYS_TREE) 38 | CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/compat/sys/tree.h.in ${CMAKE_CURRENT_BINARY_DIR}/compat/sys/tree.h) 39 | endif(NOT HAVE_SYS_TREE) 40 | 41 | if (NOT HAVE_SYS_QUEUE) 42 | CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/compat/sys/queue.h.in ${CMAKE_CURRENT_BINARY_DIR}/compat/sys/queue.h) 43 | endif(NOT HAVE_SYS_QUEUE) 44 | 45 | if (NOT HAVE_STRNDUP) 46 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNO_STRNDUP") 47 | endif(NOT HAVE_STRNDUP) 48 | 49 | if (NOT HAVE_STRNLEN) 50 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNO_STRNLEN") 51 | endif(NOT HAVE_STRNLEN) 52 | 53 | if (NOT HAVE_SYS_UN) 54 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNO_SYS_UN") 55 | endif(NOT HAVE_SYS_UN) 56 | 57 | # -DEVHTP_DISABLE_SSL:STRING=ON 58 | OPTION(EVHTP_DISABLE_SSL "Disable ssl support" OFF) 59 | 60 | # -DEVHTP_DISABLE_EVTHR:STRING=ON 61 | OPTION(EVHTP_DISABLE_EVTHR "Disable evthread support" OFF) 62 | 63 | # -DEVHTP_DISABLE_REGEX:STRING=ON 64 | OPTION(EVHTP_DISABLE_REGEX "Disable regex support" OFF) 65 | 66 | # -DEVHTP_BUILD_SHARED:STRING=ON 67 | OPTION(EVHTP_BUILD_SHARED "Build shared library too" OFF) 68 | 69 | # -DEVHTP_USE_DEFER_ACCEPT:STRING=ON 70 | OPTION(EVHTP_USE_DEFER_ACCEPT "Enable TCP_DEFER_ACCEPT" OFF) 71 | 72 | if (EVHTP_USE_DEFER_ACCEPT) 73 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DUSE_DEFER_ACCEPT") 74 | endif(EVHTP_USE_DEFER_ACCEPT) 75 | 76 | SET(CMAKE_INCLUDE_CURRENT_DIR ON) 77 | 78 | include(BaseConfig) 79 | 80 | message("Build Type: ${CMAKE_BUILD_TYPE}") 81 | message("Std CFLAGS: ${CMAKE_C_FLAGS}") 82 | message("Dbg CFLAGS: ${CMAKE_C_FLAGS_DEBUG}") 83 | message("Rel CFLAGS: ${CMAKE_C_FLAGS_RELEASE}") 84 | 85 | find_package(LibEvent REQUIRED) 86 | 87 | if (NOT LIBEVENT_OPENSSL_LIBRARY) 88 | set (EVHTP_DISABLE_SSL ON) 89 | endif() 90 | 91 | if (NOT EVHTP_DISABLE_SSL) 92 | find_package(OpenSSL) 93 | endif() 94 | 95 | find_path(LIBEVENT_INCLUDE_DIR event2/event.h REQUIRED) 96 | 97 | if (NOT EVHTP_DISABLE_REGEX) 98 | find_library(HAS_SYS_ONIG onig) 99 | endif() 100 | 101 | if (NOT OPENSSL_FOUND) 102 | message("Diabling SSL") 103 | set (EVHTP_DISABLE_SSL ON) 104 | set (OPENSSL_CRYPTO_LIBRARY "") 105 | set (OPENSSL_INCLUDE_DIR "") 106 | set (OPENSSL_LIBRARIES "") 107 | set (LIBEVENT_OPENSSL_LIBRARY "") 108 | endif() 109 | 110 | if (NOT EVHTP_DISABLE_REGEX) 111 | if (NOT HAS_SYS_ONIG) 112 | CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/oniguruma/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/oniguruma/config.h) 113 | set(ONIG_SOURCES 114 | oniguruma/regerror.c 115 | oniguruma/regparse.c 116 | oniguruma/regext.c 117 | oniguruma/regcomp.c 118 | oniguruma/regexec.c 119 | oniguruma/reggnu.c 120 | oniguruma/regenc.c 121 | oniguruma/regsyntax.c 122 | oniguruma/regtrav.c 123 | oniguruma/regversion.c 124 | oniguruma/st.c 125 | oniguruma/regposix.c 126 | oniguruma/regposerr.c 127 | oniguruma/enc/unicode.c 128 | oniguruma/enc/ascii.c 129 | oniguruma/enc/utf8.c 130 | oniguruma/enc/utf16_be.c 131 | oniguruma/enc/utf16_le.c 132 | oniguruma/enc/utf32_be.c 133 | oniguruma/enc/utf32_le.c 134 | oniguruma/enc/euc_jp.c 135 | oniguruma/enc/sjis.c 136 | oniguruma/enc/iso8859_1.c 137 | oniguruma/enc/iso8859_2.c 138 | oniguruma/enc/iso8859_3.c 139 | oniguruma/enc/iso8859_4.c 140 | oniguruma/enc/iso8859_5.c 141 | oniguruma/enc/iso8859_6.c 142 | oniguruma/enc/iso8859_7.c 143 | oniguruma/enc/iso8859_8.c 144 | oniguruma/enc/iso8859_9.c 145 | oniguruma/enc/iso8859_10.c 146 | oniguruma/enc/iso8859_11.c 147 | oniguruma/enc/iso8859_13.c 148 | oniguruma/enc/iso8859_14.c 149 | oniguruma/enc/iso8859_15.c 150 | oniguruma/enc/iso8859_16.c 151 | oniguruma/enc/euc_tw.c 152 | oniguruma/enc/euc_kr.c 153 | oniguruma/enc/big5.c 154 | oniguruma/enc/gb18030.c 155 | oniguruma/enc/koi8_r.c 156 | oniguruma/enc/cp1251.c) 157 | set (ONIG_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/oniguruma;${CMAKE_CURRENT_SOURCE_DIR}/oniguruma) 158 | else() 159 | message("-- Using system libonig") 160 | set(ONIG_SOURCES "") 161 | set(ONIG_LIBS ${HAS_SYS_ONIG}) 162 | set(ONIG_INCLUDE_DIR "") 163 | endif(NOT HAS_SYS_ONIG) 164 | else() 165 | set(ONIG_SOURCES "") 166 | set(ONIG_LIBS "") 167 | set(ONIG_INCLUDE_DIR "") 168 | endif() 169 | 170 | include_directories( 171 | ${CMAKE_CURRENT_BINARY_DIR}/compat 172 | ${CMAKE_CURRENT_SOURCE_DIR}/htparse 173 | ${CMAKE_CURRENT_SOURCE_DIR}/evthr 174 | ${CMAKE_CURRENT_SOURCE_DIR} 175 | ${ONIG_INCLUDE_DIR} 176 | ${OPENSSL_INCLUDE_DIR} 177 | ${LIBEVENT_INCLUDE_DIR} 178 | ) 179 | 180 | set(LIBEVHTP_EXTERNAL_LIBS 181 | ${LIBEVENT_LIBRARY} 182 | ${LIBEVENT_PTHREADS_LIBRARY} 183 | ${LIBEVENT_OPENSSL_LIBRARY} 184 | ${OPENSSL_LIBRARIES} 185 | ${ONIG_LIBS} 186 | ) 187 | 188 | if (NOT ${LIBEVENT_PTHREADS_FOUND}) 189 | set(EVHTP_DISABLE_EVTHR ON) 190 | endif(NOT ${LIBEVENT_PTHREADS_FOUND}) 191 | 192 | if (NOT ${LIBEVENT_OPENSSL_FOUND}) 193 | set (EVHTP_DISABLE_SSL ON) 194 | endif(NOT ${LIBEVENT_OPENSSL_FOUND}) 195 | 196 | set(LIBEVHTP_SOURCES evhtp.c htparse/htparse.c) 197 | 198 | if (NOT EVHTP_DISABLE_EVTHR) 199 | set (LIBEVHTP_EXTERNAL_LIBS ${LIBEVHTP_EXTERNAL_LIBS} pthread) 200 | set (LIBEVHTP_SOURCES ${LIBEVHTP_SOURCES} evthr/evthr.c) 201 | endif(NOT EVHTP_DISABLE_EVTHR) 202 | 203 | if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug") 204 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNDEBUG") 205 | endif (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug") 206 | 207 | IF (WIN32) 208 | ADD_DEFINITIONS(-DWIN32) 209 | ADD_DEFINITIONS(-march=i486) 210 | find_library (LIB_WS32 ws2_32) 211 | set (SYS_LIBS ${SYS_LIBS} ${LIB_WS32}) 212 | ELSE () 213 | if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD") 214 | find_library (LIB_DL dl) 215 | set (SYS_LIBS ${LIB_DL}) 216 | endif() 217 | 218 | if (NOT APPLE) 219 | find_library (LIB_RT rt) 220 | set (SYS_LIBS ${SYS_LIBS} ${LIB_RT}) 221 | endif() 222 | 223 | ENDIF (WIN32) 224 | 225 | add_custom_target(examples) 226 | 227 | if (EVHTP_BUILD_SHARED) 228 | set (EVHTP_LIBTYPE SHARED) 229 | else() 230 | set (EVHTP_LIBTYPE STATIC) 231 | endif() 232 | 233 | configure_file( 234 | ${CMAKE_CURRENT_SOURCE_DIR}/evhtp-config.h.in 235 | ${CMAKE_CURRENT_BINARY_DIR}/evhtp-config.h) 236 | 237 | add_library(libevhtp ${EVHTP_LIBTYPE} ${LIBEVHTP_SOURCES} ${ONIG_SOURCES}) 238 | 239 | set_target_properties(libevhtp PROPERTIES OUTPUT_NAME "evhtp") 240 | target_link_libraries(libevhtp ${LIBEVHTP_EXTERNAL_LIBS}) 241 | 242 | add_executable(test EXCLUDE_FROM_ALL examples/test.c) 243 | add_executable(test_basic EXCLUDE_FROM_ALL examples/test_basic.c) 244 | add_executable(test_vhost EXCLUDE_FROM_ALL examples/test_vhost.c) 245 | add_executable(test_client EXCLUDE_FROM_ALL examples/test_client.c) 246 | add_executable(test_proxy EXCLUDE_FROM_ALL examples/test_proxy.c) 247 | 248 | target_link_libraries(test libevhtp ${LIBEVHTP_EXTERNAL_LIBS} ${SYS_LIBS}) 249 | target_link_libraries(test_basic libevhtp ${LIBEVHTP_EXTERNAL_LIBS} ${SYS_LIBS}) 250 | target_link_libraries(test_vhost libevhtp ${LIBEVHTP_EXTERNAL_LIBS} ${SYS_LIBS}) 251 | target_link_libraries(test_client libevhtp ${LIBEVHTP_EXTERNAL_LIBS} ${SYS_LIBS}) 252 | target_link_libraries(test_proxy libevhtp ${LIBEVHTP_EXTERNAL_LIBS} ${SYS_LIBS}) 253 | 254 | add_dependencies(examples test test_basic test_vhost test_client test_proxy) 255 | 256 | if (NOT LIB_INSTALL_DIR) 257 | set (LIB_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/lib") 258 | endif (NOT LIB_INSTALL_DIR) 259 | 260 | if(NOT INCLUDE_INSTALL_DIR) 261 | set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/include) 262 | endif() 263 | 264 | install (TARGETS libevhtp DESTINATION ${LIB_INSTALL_DIR}) 265 | install (FILES evhtp.h DESTINATION ${INCLUDE_INSTALL_DIR}) 266 | install (FILES htparse/htparse.h DESTINATION ${INCLUDE_INSTALL_DIR}) 267 | install (FILES ${CMAKE_CURRENT_BINARY_DIR}/evhtp-config.h DESTINATION ${INCLUDE_INSTALL_DIR}) 268 | 269 | if (NOT EVHTP_DISABLE_EVTHR) 270 | install (FILES evthr/evthr.h DESTINATION ${INCLUDE_INSTALL_DIR}) 271 | endif() 272 | 273 | # oniguruma/onigposix.h 274 | 275 | if (NOT EVHTP_DISABLE_REGEX) 276 | if (NOT HAS_SYS_ONIG) 277 | install (FILES oniguruma/onigposix.h DESTINATION ${INCLUDE_INSTALL_DIR}) 278 | endif() 279 | endif() 280 | 281 | IF (WIN32) 282 | install (FILES compat/sys/queue.h DESTINATION ${INCLUDE_INSTALL_DIR}/sys) 283 | install (FILES oniguruma/onigposix.h DESTINATION ${INCLUDE_INSTALL_DIR}) 284 | ENDIF (WIN32) 285 | -------------------------------------------------------------------------------- /oniguruma/enc/utf8.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | utf8.c - Oniguruma (regular expression library) 3 | **********************************************************************/ 4 | /*- 5 | * Copyright (c) 2002-2007 K.Kosako 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | 30 | #include "regenc.h" 31 | 32 | #define USE_INVALID_CODE_SCHEME 33 | 34 | #ifdef USE_INVALID_CODE_SCHEME 35 | /* virtual codepoint values for invalid encoding byte 0xfe and 0xff */ 36 | #define INVALID_CODE_FE 0xfffffffe 37 | #define INVALID_CODE_FF 0xffffffff 38 | #define VALID_CODE_LIMIT 0x7fffffff 39 | #endif 40 | 41 | #define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80) 42 | 43 | static const int EncLen_UTF8[] = { 44 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 45 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 47 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 49 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 50 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 51 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 52 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 53 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 54 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 55 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 56 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 57 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 58 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 59 | 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 60 | }; 61 | 62 | static int 63 | mbc_enc_len(const UChar* p) 64 | { 65 | return EncLen_UTF8[*p]; 66 | } 67 | 68 | static int 69 | is_mbc_newline(const UChar* p, const UChar* end) 70 | { 71 | if (p < end) { 72 | if (*p == 0x0a) return 1; 73 | 74 | #ifdef USE_UNICODE_ALL_LINE_TERMINATORS 75 | #ifndef USE_CRNL_AS_LINE_TERMINATOR 76 | if (*p == 0x0d) return 1; 77 | #endif 78 | if (p + 1 < end) { 79 | if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ 80 | return 1; 81 | if (p + 2 < end) { 82 | if ((*(p+2) == 0xa8 || *(p+2) == 0xa9) 83 | && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */ 84 | return 1; 85 | } 86 | } 87 | #endif 88 | } 89 | 90 | return 0; 91 | } 92 | 93 | static OnigCodePoint 94 | mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) 95 | { 96 | int c, len; 97 | OnigCodePoint n; 98 | 99 | len = enclen(ONIG_ENCODING_UTF8, p); 100 | c = *p++; 101 | if (len > 1) { 102 | len--; 103 | n = c & ((1 << (6 - len)) - 1); 104 | while (len--) { 105 | c = *p++; 106 | n = (n << 6) | (c & ((1 << 6) - 1)); 107 | } 108 | return n; 109 | } 110 | else { 111 | #ifdef USE_INVALID_CODE_SCHEME 112 | if (c > 0xfd) { 113 | return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF); 114 | } 115 | #endif 116 | return (OnigCodePoint )c; 117 | } 118 | } 119 | 120 | static int 121 | code_to_mbclen(OnigCodePoint code) 122 | { 123 | if ((code & 0xffffff80) == 0) return 1; 124 | else if ((code & 0xfffff800) == 0) return 2; 125 | else if ((code & 0xffff0000) == 0) return 3; 126 | else if ((code & 0xffe00000) == 0) return 4; 127 | else if ((code & 0xfc000000) == 0) return 5; 128 | else if ((code & 0x80000000) == 0) return 6; 129 | #ifdef USE_INVALID_CODE_SCHEME 130 | else if (code == INVALID_CODE_FE) return 1; 131 | else if (code == INVALID_CODE_FF) return 1; 132 | #endif 133 | else 134 | return ONIGERR_INVALID_CODE_POINT_VALUE; 135 | } 136 | 137 | static int 138 | code_to_mbc(OnigCodePoint code, UChar *buf) 139 | { 140 | #define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80) 141 | #define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80) 142 | 143 | if ((code & 0xffffff80) == 0) { 144 | *buf = (UChar )code; 145 | return 1; 146 | } 147 | else { 148 | UChar *p = buf; 149 | 150 | if ((code & 0xfffff800) == 0) { 151 | *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0); 152 | } 153 | else if ((code & 0xffff0000) == 0) { 154 | *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0); 155 | *p++ = UTF8_TRAILS(code, 6); 156 | } 157 | else if ((code & 0xffe00000) == 0) { 158 | *p++ = (UChar )(((code>>18) & 0x07) | 0xf0); 159 | *p++ = UTF8_TRAILS(code, 12); 160 | *p++ = UTF8_TRAILS(code, 6); 161 | } 162 | else if ((code & 0xfc000000) == 0) { 163 | *p++ = (UChar )(((code>>24) & 0x03) | 0xf8); 164 | *p++ = UTF8_TRAILS(code, 18); 165 | *p++ = UTF8_TRAILS(code, 12); 166 | *p++ = UTF8_TRAILS(code, 6); 167 | } 168 | else if ((code & 0x80000000) == 0) { 169 | *p++ = (UChar )(((code>>30) & 0x01) | 0xfc); 170 | *p++ = UTF8_TRAILS(code, 24); 171 | *p++ = UTF8_TRAILS(code, 18); 172 | *p++ = UTF8_TRAILS(code, 12); 173 | *p++ = UTF8_TRAILS(code, 6); 174 | } 175 | #ifdef USE_INVALID_CODE_SCHEME 176 | else if (code == INVALID_CODE_FE) { 177 | *p = 0xfe; 178 | return 1; 179 | } 180 | else if (code == INVALID_CODE_FF) { 181 | *p = 0xff; 182 | return 1; 183 | } 184 | #endif 185 | else { 186 | return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; 187 | } 188 | 189 | *p++ = UTF8_TRAIL0(code); 190 | return p - buf; 191 | } 192 | } 193 | 194 | static int 195 | mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, 196 | const UChar* end, UChar* fold) 197 | { 198 | const UChar* p = *pp; 199 | 200 | if (ONIGENC_IS_MBC_ASCII(p)) { 201 | #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI 202 | if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { 203 | if (*p == 0x49) { 204 | *fold++ = 0xc4; 205 | *fold = 0xb1; 206 | (*pp)++; 207 | return 2; 208 | } 209 | } 210 | #endif 211 | 212 | *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 213 | (*pp)++; 214 | return 1; /* return byte length of converted char to lower */ 215 | } 216 | else { 217 | return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF8, flag, 218 | pp, end, fold); 219 | } 220 | } 221 | 222 | #if 0 223 | static int 224 | is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) 225 | { 226 | const UChar* p = *pp; 227 | 228 | if (ONIGENC_IS_MBC_ASCII(p)) { 229 | (*pp)++; 230 | return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); 231 | } 232 | else { 233 | (*pp) += enclen(ONIG_ENCODING_UTF8, p); 234 | 235 | if (*p == 0xc3) { 236 | int c = *(p + 1); 237 | if (c >= 0x80) { 238 | if (c <= (UChar )0x9e) { /* upper */ 239 | if (c == (UChar )0x97) return FALSE; 240 | return TRUE; 241 | } 242 | else if (c >= (UChar )0xa0 && c <= (UChar )0xbe) { /* lower */ 243 | if (c == (UChar )'\267') return FALSE; 244 | return TRUE; 245 | } 246 | else if (c == (UChar )0x9f && 247 | (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { 248 | return TRUE; 249 | } 250 | } 251 | } 252 | } 253 | 254 | return FALSE; 255 | } 256 | #endif 257 | 258 | 259 | static int 260 | get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, 261 | const OnigCodePoint* ranges[]) 262 | { 263 | *sb_out = 0x80; 264 | return onigenc_unicode_ctype_code_range(ctype, ranges); 265 | } 266 | 267 | 268 | static UChar* 269 | left_adjust_char_head(const UChar* start, const UChar* s) 270 | { 271 | const UChar *p; 272 | 273 | if (s <= start) return (UChar* )s; 274 | p = s; 275 | 276 | while (!utf8_islead(*p) && p > start) p--; 277 | return (UChar* )p; 278 | } 279 | 280 | static int 281 | get_case_fold_codes_by_str(OnigCaseFoldType flag, 282 | const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 283 | { 284 | return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF8, 285 | flag, p, end, items); 286 | } 287 | 288 | OnigEncodingType OnigEncodingUTF8 = { 289 | mbc_enc_len, 290 | "UTF-8", /* name */ 291 | 6, /* max byte length */ 292 | 1, /* min byte length */ 293 | is_mbc_newline, 294 | mbc_to_code, 295 | code_to_mbclen, 296 | code_to_mbc, 297 | mbc_case_fold, 298 | onigenc_unicode_apply_all_case_fold, 299 | get_case_fold_codes_by_str, 300 | onigenc_unicode_property_name_to_ctype, 301 | onigenc_unicode_is_code_ctype, 302 | get_ctype_code_range, 303 | left_adjust_char_head, 304 | onigenc_always_true_is_allowed_reverse_match 305 | }; 306 | --------------------------------------------------------------------------------