├── .gitignore ├── README.md ├── appveyor.yml ├── bregonig.html ├── bsd_license.txt ├── pack.bat ├── perl_license.txt ├── perl_license_jp.txt ├── pkgfiles.lst ├── src ├── Makefile ├── afxres.h ├── bregexp.h ├── bregonig.cpp ├── bregonig.h ├── bregonig.rc ├── bsplit.cpp ├── btrans.cpp ├── dbgtrace.h ├── mem_vc6.h ├── python │ ├── BregPool.py │ ├── bregonig.py │ ├── sample_bomatch.py │ ├── sample_bosubst.py │ ├── sample_bosubst_utf8.py │ ├── sample_match.py │ ├── sample_match_utf16.py │ ├── sample_split.py │ ├── sample_subst.py │ ├── sample_trans.py │ ├── test_common.py │ ├── test_crnl.py │ ├── test_match.py │ └── test_subst.py ├── resource.h ├── sample │ ├── bregpool.h │ ├── sample.c │ ├── sample_bosubst.cpp │ ├── sample_match.cpp │ ├── sample_split.cpp │ ├── sample_subst.cpp │ └── sample_trans.cpp ├── subst.cpp ├── sv.cpp ├── sv.h └── version.h └── srcfiles.lst /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.a 4 | *.obj 5 | *.dll 6 | *.exp 7 | *.lib 8 | *.pyc 9 | *.bak 10 | *.BAK 11 | *~ 12 | *.swp 13 | *.orig 14 | *.rej 15 | *.map 16 | *.RES 17 | *.res 18 | *.pdb 19 | 20 | *.zip 21 | *.7z 22 | 23 | tags 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build status](https://ci.appveyor.com/api/projects/status/plm9o9tkcf27g7hc/branch/master?svg=true)](https://ci.appveyor.com/project/k-takata/bregonig/branch/master) 2 | 3 | # bregonig.dll 4 | 5 | This is a source code package of bregonig.dll regular expression library. 6 | 7 | Binary packages and documents are available at the following site: 8 | http://k-takata.o.oo7.jp/mysoft/bregonig.html (Japanese) 9 | 10 | Bregonig.dll is a regular expression library compatible with bregexp.dll. 11 | Bregexp.dll was widely used in Japanese Win32 applications, but the regexp 12 | engine was very old. (It seems to be a modified version of Perl 5.00x.) 13 | On the other hand, bregonig.dll uses Oniguruma (or Onigmo) to support 14 | more powerful regexp patterns. 15 | 16 | ## LICENSE 17 | 18 | You may distribute under the terms of either the GNU General Public 19 | License or the Artistic License. 20 | 21 | ## References 22 | 23 | * bregexp.dll: 24 | http://www.hi-ho.ne.jp/babaq/bregexp.html (Japanese) 25 | 26 | * Oniguruma: 27 | http://github.com/kkos/oniguruma 28 | Compatible with Perl 5.8's regexp patterns. 29 | 30 | * Onigmo (Oniguruma-mod): 31 | https://github.com/k-takata/Onigmo 32 | Compatible with Perl 5.14's regexp patterns. 33 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | version: 1.0.{build} 2 | 3 | environment: 4 | matrix: 5 | - ARCH: x64 6 | PYOPT: -3 7 | - ARCH: x86 8 | PYOPT: -3.4-32 9 | 10 | build_script: 11 | - cmd: |- 12 | "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /%ARCH% /release 13 | git clone -q --branch=bregonig --depth 1 https://github.com/k-takata/Onigmo.git ..\onigmo 14 | cd ..\onigmo 15 | nmake -f win32\Makefile lib 16 | cd %APPVEYOR_BUILD_FOLDER%\src 17 | nmake ONIG_DIR=..\..\onigmo 18 | dir obj%ARCH%\*.dll 19 | \msys64\usr\bin\file obj%ARCH%/*.dll 20 | 21 | test_script: 22 | - cmd: |- 23 | copy obj%ARCH%\bregonig.dll python 24 | cd python 25 | py %PYOPT% test_crnl.py SJIS 26 | py %PYOPT% test_crnl.py UTF-8 27 | py %PYOPT% test_crnl.py UTF-16LE 28 | py %PYOPT% test_match.py SJIS 29 | py %PYOPT% test_match.py UTF-8 30 | py %PYOPT% test_match.py UTF-16LE 31 | py %PYOPT% test_subst.py SJIS 32 | py %PYOPT% test_subst.py UTF-8 33 | py %PYOPT% test_subst.py UTF-16LE 34 | 35 | # vim: ts=2 sw=2 sts=2 et 36 | -------------------------------------------------------------------------------- /bregonig.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k-takata/bregonig/670b229063a33910e717a9f220d89ce8ab46d790/bregonig.html -------------------------------------------------------------------------------- /bsd_license.txt: -------------------------------------------------------------------------------- 1 | Onigmo (Oniguruma-mod) LICENSE 2 | ------------------------------ 3 | 4 | Copyright (c) 2002-2018 K.Kosako 5 | Copyright (c) 2011-2019 K.Takata 6 | All rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions 10 | are met: 11 | 1. Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | 2. Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | SUCH DAMAGE. 28 | 29 | 30 | 31 | Oniguruma LICENSE 32 | ----------------- 33 | 34 | Copyright (c) 2002-2018 K.Kosako 35 | All rights reserved. 36 | 37 | Redistribution and use in source and binary forms, with or without 38 | modification, are permitted provided that the following conditions 39 | are met: 40 | 1. Redistributions of source code must retain the above copyright 41 | notice, this list of conditions and the following disclaimer. 42 | 2. Redistributions in binary form must reproduce the above copyright 43 | notice, this list of conditions and the following disclaimer in the 44 | documentation and/or other materials provided with the distribution. 45 | 46 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 47 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 50 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 | SUCH DAMAGE. 57 | 58 | 59 | 60 | Ruby BSDL 61 | --------- 62 | Copyright (C) 1993-2013 Yukihiro Matsumoto. All rights reserved. 63 | 64 | Redistribution and use in source and binary forms, with or without 65 | modification, are permitted provided that the following conditions 66 | are met: 67 | 1. Redistributions of source code must retain the above copyright 68 | notice, this list of conditions and the following disclaimer. 69 | 2. Redistributions in binary form must reproduce the above copyright 70 | notice, this list of conditions and the following disclaimer in the 71 | documentation and/or other materials provided with the distribution. 72 | 73 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 74 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 77 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83 | SUCH DAMAGE. 84 | 85 | 86 | 87 | dbgtrace.h and mem_vc6.h LICENSE 88 | -------------------------------- 89 | 90 | /* 91 | * Copyright (C) 2006 K.Takata 92 | * All rights reserved. 93 | * 94 | * Redistribution and use in source and binary forms, with or without 95 | * modification, are permitted provided that the following conditions 96 | * are met: 97 | * 98 | * 1. Redistributions of source code must retain the above copyright 99 | * notice, this list of conditions and the following disclaimer. 100 | * 2. Redistributions in binary form must reproduce the above copyright 101 | * notice, this list of conditions and the following disclaimer in the 102 | * documentation and/or other materials provided with the distribution. 103 | * 104 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 105 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 106 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 107 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 108 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 109 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 110 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 111 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 112 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 113 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 114 | * POSSIBILITY OF SUCH DAMAGE. 115 | */ 116 | 117 | -------------------------------------------------------------------------------- /pack.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | if "%1"=="" goto usage 3 | setlocal DISABLEDELAYEDEXPANSION 4 | 5 | :: 7z (official version) or 7-zip32 (undll + common archiver) can be used 6 | if "%SEVENZIP%"=="" set SEVENZIP=7-zip32 7 | 8 | if not exist x64 mkdir x64 9 | 10 | cd src 11 | copy /y objx86\*.lib . 12 | "%SEVENZIP%" a -m0=PPMd ..\src.7z @..\srcfiles.lst 13 | del *.lib 14 | cd .. 15 | 16 | copy /y src\objx86\*.dll . 17 | copy /y src\objx64\bregonig.dll x64 18 | "%SEVENZIP%" a -mx=9 %1 @pkgfiles.lst 19 | del *.dll x64\*.dll src.7z 20 | rd x64 21 | 22 | goto end 23 | 24 | :usage 25 | echo. 26 | echo usage: pack ^ 27 | echo. 28 | echo ^: ex. bron400.zip 29 | 30 | :end 31 | -------------------------------------------------------------------------------- /perl_license.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | The "Perl Artistic License" 7 | 8 | Preamble 9 | 10 | The intent of this document is to state the conditions under which the Perl 11 | Package may be copied, such that the Copyright Holder maintains some 12 | semblance of artistic control over its development, while giving Perl users 13 | the right to use and distribute Perl in a more-or-less customary fashion, 14 | plus the right to make reasonable modifications. 15 | 16 | Definitions: 17 | 18 | "Package" refers to the collection of Perl-kit files distributed by the 19 | Copyright Holder, and derivatives of that collection of files 20 | created through textual modification. 21 | 22 | "Standard Version" refers to such a Package if it has not been 23 | modified, or has been modified as specified below. 24 | 25 | "Copyright Holder" is whoever is named in the copyright or 26 | copyrights for the Perl package. 27 | 28 | "You" is you, if you're thinking about copying or distributing 29 | this Package. 30 | 31 | "Reasonable copying fee" is whatever you can justify on the 32 | basis of media cost, duplication charges, time of people involved, 33 | and so on. (You will not be required to justify it to the 34 | Copyright Holder, but only to the computing community at large 35 | as a market that must bear the fee.) 36 | 37 | "Freely Available" means that no fee is charged for the item 38 | itself, though there may be fees involved in handling the item. 39 | It also means that recipients of the item may redistribute it 40 | under the same conditions they received it. 41 | 42 | 1. You may make and give away verbatim copies of the source form of the 43 | Standard Version of this Perl Package without restriction, provided that you 44 | duplicate all of the original copyright notices and associated disclaimers. 45 | 46 | 2. You may apply bug fixes, portability fixes and other modifications 47 | derived from the Public Domain or from the Copyright Holder. A Package 48 | modified in such a way shall still be considered the Standard Version. 49 | 50 | 3. You may otherwise modify your copy of this Perl Package in any way, 51 | provided that you insert a prominent notice in each changed file stating how 52 | and when you changed that file, and provided that you do at least ONE of the 53 | following: 54 | 55 | a) place your modifications in the Public Domain or otherwise make them 56 | Freely Available, such as by posting said modifications to Usenet or 57 | an equivalent medium, or placing the modifications on a major archive 58 | site such as uunet.uu.net, or by allowing the Copyright Holder to include 59 | your modifications in the Standard Version of the Perl Package. 60 | 61 | b) use the modified Perl Package only within your corporation or 62 | organization. 63 | 64 | c) rename any non-standard executables so the names do not conflict 65 | with standard executables, which must also be provided, and provide 66 | a separate manual page for each non-standard executable that clearly 67 | documents how it differs from the Standard Version. 68 | 69 | d) make other distribution arrangements with the Copyright Holder. 70 | 71 | 4. You may distribute the programs of this Perl Package in object code or 72 | executable form, provided that you do at least ONE of the following: 73 | 74 | a) distribute a Standard Version of the executables and library files, 75 | together with instructions (in the manual page or equivalent) on where 76 | to get the Standard Version. 77 | 78 | b) accompany the distribution with the machine-readable source of 79 | the Perl Package with your modifications. 80 | 81 | c) accompany any non-standard executables with their corresponding 82 | Standard Version executables, giving the non-standard executables 83 | non-standard names, and clearly documenting the differences in manual 84 | pages (or equivalent), together with instructions on where to get 85 | the Standard Version. 86 | 87 | d) make other distribution arrangements with the Copyright Holder. 88 | 89 | 5. You may charge a reasonable copying fee for any distribution of this 90 | Perl Package. You may charge any fee you choose for support of this Perl 91 | Package. You may not charge a fee for this Perl Package itself. However, 92 | you may distribute this Perl Package in aggregate with other (possibly 93 | commercial) programs as part of a larger (possibly commercial) software 94 | distribution provided that you do not advertise this Perl Package as a 95 | product of your own. 96 | 97 | 6. The scripts and library files supplied as input to or produced as 98 | output from the programs of this Perl Package do not automatically fall 99 | under the copyright of this Perl Package, but belong to whomever generated 100 | them, and may be sold commercially, and may be aggregated with this Perl 101 | Package. 102 | 103 | 7. C subroutines supplied by you and linked into this Perl Package in order 104 | to emulate subroutines and variables of the language defined by this Perl 105 | Package shall not be considered part of this Perl Package, but are the 106 | equivalent of input as in Paragraph 6, provided these subroutines do 107 | not change the language in any way that would cause it to fail the 108 | regression tests for the language. 109 | 110 | 8. The name of the Copyright Holder may not be used to endorse or promote 111 | products derived from this software without specific prior written permission. 112 | 113 | 9. This PERL PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR 114 | IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 115 | WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 116 | 117 | The End 118 | 119 | -------------------------------------------------------------------------------- /perl_license_jp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k-takata/bregonig/670b229063a33910e717a9f220d89ce8ab46d790/perl_license_jp.txt -------------------------------------------------------------------------------- /pkgfiles.lst: -------------------------------------------------------------------------------- 1 | bregonig.html 2 | bregonig.dll 3 | k2regexp.dll 4 | x64\bregonig.dll 5 | perl_license.txt 6 | perl_license_jp.txt 7 | bsd_license.txt 8 | src.7z 9 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Makefile for bregonig.dll 3 | # 4 | # Copyright (C) 2006-2014 K.Takata 5 | # 6 | 7 | #VER1 = 1 8 | USE_LTCG = 1 9 | #USE_MSVCRT = 1 10 | #USE_ONIG_DLL = 1 11 | USE_ONIGMO_6 = 1 12 | 13 | !ifndef TARGET_CPU 14 | !if ("$(CPU)"=="AMD64" && !DEFINED(386)) || DEFINED(AMD64) || "$(PLATFORM)"=="x64" || "$(PLATFORM)"=="X64" 15 | TARGET_CPU = x64 16 | !elseif DEFINED(IA64) 17 | TARGET_CPU = ia64 18 | !else 19 | TARGET_CPU = x86 20 | !endif 21 | !endif 22 | 23 | BASEADDR = 0x60500000 24 | 25 | !ifdef DEBUG 26 | DBGFLG = d 27 | !else 28 | DBGFLG = 29 | !endif 30 | 31 | !ifdef USE_ONIGMO_6 32 | # Onigmo 6.0 or later 33 | ONIG_DIR = ../onigmo-6.0.0 34 | !ifdef USE_ONIG_DLL 35 | ONIG_LIB = $(ONIG_DIR)/build_$(TARGET_CPU)$(DBGFLG)/onigmo.lib 36 | !else 37 | ONIG_LIB = $(ONIG_DIR)/build_$(TARGET_CPU)$(DBGFLG)/onigmo_s.lib 38 | !endif 39 | ONIG_H = $(ONIG_DIR)/onigmo.h 40 | 41 | !else 42 | # Onigmo 5.15 or earlier 43 | ONIG_DIR = ../onig-5.15.0 44 | !ifdef USE_ONIG_DLL 45 | ONIG_LIB = $(ONIG_DIR)/onig.lib 46 | !else 47 | ONIG_LIB = $(ONIG_DIR)/onig_s.lib 48 | !endif 49 | ONIG_H = $(ONIG_DIR)/oniguruma.h 50 | 51 | !endif 52 | 53 | CPPFLAGS = /O2 /W3 /EHsc /LD /nologo /I$(ONIG_DIR) 54 | !ifdef VER1 55 | CPPFLAGS = $(CPPFLAGS) /DUSE_VTAB /DPERL_5_8_COMPAT /DNAMEGROUP_RIGHTMOST 56 | !endif 57 | !ifdef USE_ONIGMO_6 58 | CPPFLAGS = $(CPPFLAGS) /DUSE_ONIGMO_6 59 | !endif 60 | LD = link 61 | LDFLAGS = /DLL /nologo /MAP /BASE:$(BASEADDR) /merge:.rdata=.text 62 | 63 | !ifdef USE_MSVCRT 64 | CPPFLAGS = $(CPPFLAGS) /MD 65 | !else 66 | !ifdef DEBUG 67 | CPPFLAGS = $(CPPFLAGS) /MTd 68 | !else 69 | CPPFLAGS = $(CPPFLAGS) /MT 70 | !endif 71 | !endif 72 | 73 | !ifndef USE_ONIG_DLL 74 | CPPFLAGS = $(CPPFLAGS) /DONIG_EXTERN=extern 75 | !endif 76 | 77 | # Get the version of cl.exe. 78 | # 1. Write the version to a work file (mscver$(_NMAKE_VER).~). 79 | !if ![(echo _MSC_VER>mscver$(_NMAKE_VER).c) && \ 80 | (for /f %I in ('"$(CC) /EP mscver$(_NMAKE_VER).c 2>nul"') do @echo _MSC_VER=%I> mscver$(_NMAKE_VER).~)] 81 | # 2. Include it. 82 | !include mscver$(_NMAKE_VER).~ 83 | # 3. Clean up. 84 | !if [del mscver$(_NMAKE_VER).~ mscver$(_NMAKE_VER).c] 85 | !endif 86 | !endif 87 | 88 | !if DEFINED(USE_LTCG) && $(USE_LTCG) 89 | # Use LTCG (Link Time Code Generation). 90 | # Check if cl.exe is newer than VC++ 7.0 (_MSC_VER >= 1300). 91 | !if $(_MSC_VER) >= 1300 92 | CPPFLAGS = $(CPPFLAGS) /GL 93 | LDFLAGS = $(LDFLAGS) /LTCG 94 | !endif 95 | !endif 96 | 97 | !if $(_MSC_VER) < 1500 98 | LDFLAGS = $(LDFLAGS) /opt:nowin98 99 | !endif 100 | 101 | !ifdef DEBUG 102 | CPPFLAGS = $(CPPFLAGS) /D_DEBUG /Zi 103 | LDFLAGS = $(LDFLAGS) /debug 104 | RFLAGS = $(RFLAGS) /D_DEBUG 105 | !endif 106 | 107 | OBJDIR = obj$(TARGET_CPU)$(DBGFLG) 108 | WOBJDIR = $(OBJDIR)\unicode 109 | 110 | OBJS = $(OBJDIR)\subst.obj $(OBJDIR)\bsplit.obj $(OBJDIR)\btrans.obj $(OBJDIR)\sv.obj 111 | WOBJS = $(WOBJDIR)\subst.obj $(WOBJDIR)\bsplit.obj $(WOBJDIR)\btrans.obj $(WOBJDIR)\sv.obj 112 | !ifdef VER1 113 | BROBJS = $(OBJDIR)\bregonig.obj $(OBJDIR)\bregonig.res $(OBJS) 114 | !else 115 | BROBJS = $(OBJDIR)\bregonig.obj $(WOBJDIR)\bregonig.obj $(OBJDIR)\bregonig.res $(OBJS) $(WOBJS) 116 | !endif 117 | K2OBJS = $(OBJDIR)\k2regexp.obj $(OBJDIR)\k2regexp.res $(OBJS) 118 | 119 | 120 | all: $(OBJDIR)\bregonig.dll $(OBJDIR)\k2regexp.dll 121 | 122 | 123 | $(OBJDIR)\bregonig.dll: $(WOBJDIR) $(BROBJS) $(ONIG_LIB) 124 | $(LD) $(BROBJS) $(ONIG_LIB) /out:$@ $(LDFLAGS) 125 | 126 | $(OBJDIR)\k2regexp.dll: $(WOBJDIR) $(K2OBJS) $(ONIG_LIB) 127 | $(LD) $(K2OBJS) $(ONIG_LIB) /out:$@ $(LDFLAGS) 128 | 129 | 130 | $(WOBJDIR): 131 | if not exist $(OBJDIR)\nul mkdir $(OBJDIR) 132 | if not exist $(WOBJDIR)\nul mkdir $(WOBJDIR) 133 | 134 | 135 | .cpp{$(OBJDIR)\}.obj:: 136 | $(CPP) $(CPPFLAGS) /Fo$(OBJDIR)\ /c $< 137 | .cpp{$(WOBJDIR)\}.obj:: 138 | $(CPP) $(CPPFLAGS) /DUNICODE /D_UNICODE /Fo$(WOBJDIR)\ /c $< 139 | 140 | .rc{$(OBJDIR)\}.res: 141 | $(RC) $(RFLAGS) /Fo$@ /r $< 142 | 143 | $(OBJDIR)\bregonig.obj: bregonig.cpp bregexp.h bregonig.h mem_vc6.h dbgtrace.h version.h $(ONIG_H) 144 | 145 | $(WOBJDIR)\bregonig.obj: bregonig.cpp bregexp.h bregonig.h mem_vc6.h dbgtrace.h version.h $(ONIG_H) 146 | 147 | $(OBJDIR)\bregonig.res: bregonig.rc version.h 148 | 149 | $(OBJDIR)\k2regexp.obj: bregonig.cpp bregexp.h bregonig.h mem_vc6.h dbgtrace.h version.h $(ONIG_H) 150 | $(CPP) $(CPPFLAGS) /c /D_K2REGEXP_ /Fo$@ bregonig.cpp 151 | 152 | #$(WOBJDIR)\k2regexp.obj: bregonig.cpp bregexp.h bregonig.h mem_vc6.h dbgtrace.h version.h $(ONIG_H) 153 | # $(CPP) $(CPPFLAGS) /c /D_K2REGEXP_ /DUNICODE /D_UNICODE /Fo$@ bregonig.cpp 154 | 155 | $(OBJDIR)\k2regexp.res: bregonig.rc version.h 156 | $(RC) $(RFLAGS) /D_K2REGEXP_ /Fo$@ /r bregonig.rc 157 | 158 | 159 | $(OBJDIR)\subst.obj: subst.cpp bregexp.h bregonig.h mem_vc6.h dbgtrace.h $(ONIG_H) 160 | 161 | $(WOBJDIR)\subst.obj: subst.cpp bregexp.h bregonig.h mem_vc6.h dbgtrace.h $(ONIG_H) 162 | 163 | $(OBJDIR)\bsplit.obj: bsplit.cpp bregexp.h bregonig.h mem_vc6.h dbgtrace.h $(ONIG_H) 164 | 165 | $(WOBJDIR)\bsplit.obj: bsplit.cpp bregexp.h bregonig.h mem_vc6.h dbgtrace.h $(ONIG_H) 166 | 167 | $(OBJDIR)\btrans.obj: btrans.cpp bregexp.h bregonig.h mem_vc6.h dbgtrace.h sv.h $(ONIG_H) 168 | 169 | $(WOBJDIR)\btrans.obj: btrans.cpp bregexp.h bregonig.h mem_vc6.h dbgtrace.h sv.h $(ONIG_H) 170 | 171 | $(OBJDIR)\sv.obj: sv.cpp sv.h 172 | 173 | $(WOBJDIR)\sv.obj: sv.cpp sv.h 174 | 175 | 176 | clean: 177 | del $(BROBJS) $(OBJDIR)\bregonig.lib $(OBJDIR)\bregonig.dll $(OBJDIR)\bregonig.exp $(OBJDIR)\bregonig.map \ 178 | $(OBJDIR)\k2regexp.obj $(OBJDIR)\k2regexp.res $(OBJDIR)\k2regexp.lib $(OBJDIR)\k2regexp.dll $(OBJDIR)\k2regexp.exp $(OBJDIR)\k2regexp.map 179 | -------------------------------------------------------------------------------- /src/afxres.h: -------------------------------------------------------------------------------- 1 | #include 2 | #define IDC_STATIC (-1) 3 | -------------------------------------------------------------------------------- /src/bregexp.h: -------------------------------------------------------------------------------- 1 | /* bregexp.h 2 | external use header file 3 | 1999.11.22 T.Baba 4 | */ 5 | /* 6 | * 2002.08.24 modified by K2 7 | * 2011.06.17 modified by K.Takata 8 | */ 9 | 10 | #include 11 | 12 | #ifdef _BREGEXP_ 13 | /* for internal use */ 14 | #define BREGEXPAPI __declspec(dllexport) 15 | #define BREGCONST 16 | #else 17 | /* for external use */ 18 | #define BREGEXPAPI __declspec(dllimport) 19 | #define BREGCONST const 20 | #endif 21 | 22 | 23 | #ifdef UNICODE 24 | #define BMatch BMatchW 25 | #define BSubst BSubstW 26 | #define BMatchEx BMatchExW 27 | #define BSubstEx BSubstExW 28 | #define BTrans BTransW 29 | #define BSplit BSplitW 30 | #define BRegfree BRegfreeW 31 | #define BRegexpVersion BRegexpVersionW 32 | 33 | #define BoMatch BoMatchW 34 | #define BoSubst BoSubstW 35 | #endif /* UNICODE */ 36 | 37 | #define BREGEXP_MAX_ERROR_MESSAGE_LEN 80 38 | 39 | 40 | typedef struct bregexp { 41 | BREGCONST TCHAR *outp; /* result string start ptr */ 42 | BREGCONST TCHAR *outendp; /* result string end ptr */ 43 | BREGCONST int splitctr; /* split result counter */ 44 | BREGCONST TCHAR **splitp; /* split result pointer ptr */ 45 | INT_PTR rsv1; /* reserved for external use */ 46 | TCHAR *parap; /* parameter start ptr ie. "s/xxxxx/yy/gi" */ 47 | TCHAR *paraendp; /* parameter end ptr */ 48 | TCHAR *transtblp; /* translate table ptr */ 49 | TCHAR **startp; /* match string start ptr */ 50 | TCHAR **endp; /* match string end ptr */ 51 | int nparens; /* number of parentheses */ 52 | } BREGEXP; 53 | 54 | typedef BOOL (__stdcall *BCallBack)(int kind, int value, ptrdiff_t index); 55 | 56 | #if defined(__cplusplus) 57 | extern "C" 58 | { 59 | #endif 60 | 61 | #ifdef _K2REGEXP_ 62 | /* K2Editor */ 63 | BREGEXPAPI 64 | int BMatch(TCHAR *str, TCHAR *target, TCHAR *targetstartp, TCHAR *targetendp, 65 | int one_shot, 66 | BREGEXP **rxp, TCHAR *msg); 67 | BREGEXPAPI 68 | int BSubst(TCHAR *str, TCHAR *target, TCHAR *targetstartp, TCHAR *targetendp, 69 | BREGEXP **rxp, TCHAR *msg, BCallBack callback); 70 | #else 71 | /* Original */ 72 | BREGEXPAPI 73 | int BMatch(TCHAR *str, TCHAR *target, TCHAR *targetendp, 74 | BREGEXP **rxp, TCHAR *msg); 75 | BREGEXPAPI 76 | int BSubst(TCHAR *str, TCHAR *target, TCHAR *targetendp, 77 | BREGEXP **rxp, TCHAR *msg); 78 | 79 | /* Sakura Editor */ 80 | BREGEXPAPI 81 | int BMatchEx(TCHAR *str, TCHAR *targetbegp, TCHAR *target, TCHAR *targetendp, 82 | BREGEXP **rxp, TCHAR *msg); 83 | BREGEXPAPI 84 | int BSubstEx(TCHAR *str, TCHAR *targetbegp, TCHAR *target, TCHAR *targetendp, 85 | BREGEXP **rxp, TCHAR *msg); 86 | #endif 87 | 88 | 89 | BREGEXPAPI 90 | int BTrans(TCHAR *str, TCHAR *target, TCHAR *targetendp, 91 | BREGEXP **rxp, TCHAR *msg); 92 | BREGEXPAPI 93 | int BSplit(TCHAR *str, TCHAR *target, TCHAR *targetendp, 94 | int limit, BREGEXP **rxp, TCHAR *msg); 95 | BREGEXPAPI 96 | void BRegfree(BREGEXP *rx); 97 | 98 | BREGEXPAPI 99 | TCHAR *BRegexpVersion(void); 100 | 101 | 102 | #ifndef _K2REGEXP_ 103 | /* bregonig.dll native APIs */ 104 | 105 | BREGEXPAPI 106 | int BoMatch(const TCHAR *patternp, const TCHAR *optionp, 107 | const TCHAR *strstartp, 108 | const TCHAR *targetstartp, const TCHAR *targetendp, 109 | BOOL one_shot, 110 | BREGEXP **rxp, TCHAR *msg); 111 | 112 | BREGEXPAPI 113 | int BoSubst(const TCHAR *patternp, const TCHAR *substp, const TCHAR *optionp, 114 | const TCHAR *strstartp, 115 | const TCHAR *targetstartp, const TCHAR *targetendp, 116 | BCallBack callback, 117 | BREGEXP **rxp, TCHAR *msg); 118 | 119 | #endif 120 | 121 | 122 | #if defined(__cplusplus) 123 | } 124 | #endif 125 | 126 | -------------------------------------------------------------------------------- /src/bregonig.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * bregonig.cpp 3 | */ 4 | /* 5 | * Copyright (C) 2006-2011 K.Takata 6 | * 7 | * You may distribute under the terms of either the GNU General Public 8 | * License or the Artistic License, as specified in the perl_license.txt file. 9 | */ 10 | /* 11 | * Note: 12 | * This file is based on the following files: 13 | * Bregexp.dll (main.cc, bsubst.cc) by Tatsuo Baba 14 | * K2Regexp.dll (main.cpp) by Koyabu Kazuya (K2) 15 | * Bregexp.dll for SAKURA (main.cpp) by Karoto 16 | */ 17 | 18 | 19 | #define _CRT_SECURE_NO_DEPRECATE 20 | #define WIN32_LEAN_AND_MEAN 21 | 22 | #define _BREGEXP_ 23 | #define GLOBAL_VALUE_DEFINE 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #ifdef USE_ONIGMO_6 34 | # include 35 | #else 36 | # include 37 | #endif 38 | #include "bregexp.h" 39 | //#include "global.h" 40 | #include "bregonig.h" 41 | #include "version.h" 42 | #include "mem_vc6.h" 43 | #include "dbgtrace.h" 44 | 45 | 46 | using namespace BREGONIG_NS; 47 | 48 | 49 | extern OnigSyntaxType OnigSyntaxPerl_NG_EX; 50 | #ifndef UNICODE 51 | OnigSyntaxType OnigSyntaxPerl_NG_EX = OnigSyntaxPerl; 52 | /* 53 | OnigSyntaxType OnigSyntaxPerl_NG_EX = { 54 | ONIG_SYNTAX_PERL_NG->op, 55 | ONIG_SYNTAX_PERL_NG->op2, 56 | ONIG_SYNTAX_PERL_NG->behavior | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND, 57 | ONIG_SYNTAX_PERL_NG->options, 58 | }; 59 | */ 60 | 61 | 62 | BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) 63 | { 64 | switch (fdwReason) { 65 | case DLL_PROCESS_ATTACH: 66 | /* Enable bregonig.dll extensions. */ 67 | OnigSyntaxPerl_NG_EX.op2 |= 68 | #ifdef ONIG_SYN_OP2_QMARK_TILDE_ABSENT 69 | ONIG_SYN_OP2_QMARK_TILDE_ABSENT | 70 | #endif 71 | ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | 72 | ONIG_SYN_OP2_CCLASS_SET_OP; 73 | OnigSyntaxPerl_NG_EX.behavior |= 74 | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND; 75 | 76 | onig_init(); 77 | break; 78 | 79 | case DLL_PROCESS_DETACH: 80 | if (lpvReserved == NULL) { // called via FreeLibrary() 81 | onig_end(); 82 | } 83 | break; 84 | 85 | case DLL_THREAD_ATTACH: 86 | break; 87 | case DLL_THREAD_DETACH: 88 | break; 89 | } 90 | return TRUE; 91 | } 92 | #endif 93 | 94 | 95 | TCHAR *::BRegexpVersion(void) 96 | { 97 | static TCHAR version[80]; 98 | _sntprintf(version, lengthof(version), 99 | _T("bregonig.dll Ver.%hs with Onigmo %hs"), 100 | BREGONIG_VERSION_STRING, 101 | onig_version()); 102 | version[lengthof(version) - 1] = '\0'; // Ensure NUL termination. 103 | 104 | return version; 105 | } 106 | 107 | 108 | #ifdef _K2REGEXP_ 109 | int ::BMatch(TCHAR *str, TCHAR *target, TCHAR *targetstartp, TCHAR *targetendp, 110 | int one_shot, 111 | BREGEXP **rxp, TCHAR *msg) 112 | { 113 | return BMatch_s(str, target, targetstartp, targetendp, one_shot, rxp, msg); 114 | } 115 | #else 116 | int ::BMatch(TCHAR *str, TCHAR *target, TCHAR *targetendp, 117 | BREGEXP **rxp, TCHAR *msg) 118 | { 119 | return BMatch_s(str, target, target, targetendp, 0, rxp, msg); 120 | } 121 | int ::BMatchEx(TCHAR *str, TCHAR *targetbegp, TCHAR *target, TCHAR *targetendp, 122 | BREGEXP **rxp, TCHAR *msg) 123 | { 124 | return BMatch_s(str, targetbegp, target, targetendp, 0, rxp, msg); 125 | } 126 | #endif 127 | 128 | 129 | #ifdef _K2REGEXP_ 130 | int ::BSubst(TCHAR *str, TCHAR *target, TCHAR *targetstartp, TCHAR *targetendp, 131 | BREGEXP **rxp, TCHAR *msg, BCallBack callback) 132 | { 133 | return BSubst_s(str, target, targetstartp, targetendp, rxp, msg, callback); 134 | } 135 | #else 136 | int ::BSubst(TCHAR *str, TCHAR *target, TCHAR *targetendp, 137 | BREGEXP **rxp, TCHAR *msg) 138 | { 139 | return BSubst_s(str, target, target, targetendp, rxp, msg, NULL); 140 | } 141 | int ::BSubstEx(TCHAR *str, TCHAR *targetbegp, TCHAR *target, TCHAR *targetendp, 142 | BREGEXP **rxp, TCHAR *msg) 143 | { 144 | return BSubst_s(str, targetbegp, target, targetendp, rxp, msg, NULL); 145 | } 146 | #endif 147 | 148 | 149 | int ::BTrans(TCHAR *str, TCHAR *target, TCHAR *targetendp, 150 | BREGEXP **rxp, TCHAR *msg) 151 | { 152 | TRACE1(_T("BTrans(): %s\n"), str); 153 | set_new_throw_bad_alloc(); 154 | 155 | if (check_params(target, target/*startp*/, targetendp, rxp, msg, false) < 0) { 156 | return -1; 157 | } 158 | bregonig *rx = static_cast(*rxp); 159 | *rxp = rx = recompile_onig(rx, PTN_TRANS, str, msg); 160 | if (*rxp == NULL) { 161 | return -1; 162 | } 163 | 164 | if (!(rx->pmflags & PMf_TRANSLATE)) { 165 | delete rx; 166 | *rxp = NULL; 167 | asc2tcs(msg, "no translate parameter", BREGEXP_MAX_ERROR_MESSAGE_LEN); 168 | return -1; 169 | } 170 | 171 | int matched = trans(rx,target,targetendp,msg); 172 | return msg[0] == '\0' ? matched: -1; 173 | } 174 | 175 | 176 | int ::BSplit(TCHAR *str, TCHAR *target, TCHAR *targetendp, 177 | int limit, BREGEXP **rxp, TCHAR *msg) 178 | { 179 | TRACE1(_T("BSplit(): %s\n"), str); 180 | set_new_throw_bad_alloc(); 181 | 182 | if (check_params(target, target/*startp*/, targetendp, rxp, msg, false) < 0) { 183 | return -1; 184 | } 185 | bregonig *rx = static_cast(*rxp); 186 | *rxp = rx = recompile_onig(rx, PTN_MATCH, str, msg); 187 | if (*rxp == NULL) { 188 | return -1; 189 | } 190 | 191 | int ctr = split_onig(rx,target,targetendp,limit,msg); 192 | return msg[0] == '\0' ? ctr : -1; 193 | } 194 | 195 | 196 | void ::BRegfree(BREGEXP *rx) 197 | { 198 | TRACE1(_T("BRegfree(): rx=0x%08x\n"), rx); 199 | if (rx) { 200 | delete static_cast(rx); 201 | } 202 | } 203 | 204 | 205 | #ifndef _K2REGEXP_ 206 | int ::BoMatch(const TCHAR *patternp, const TCHAR *optionp, 207 | const TCHAR *strstartp, 208 | const TCHAR *targetstartp, const TCHAR *targetendp, 209 | BOOL one_shot, 210 | BREGEXP **rxp, TCHAR *msg) 211 | { 212 | set_new_throw_bad_alloc(); 213 | 214 | const TCHAR *substp = NULL; 215 | const TCHAR *patternendp = (patternp != NULL) ? patternp + _tcslen(patternp) : NULL; 216 | const TCHAR *substendp = NULL; 217 | const TCHAR *optionendp = (optionp != NULL) ? optionp + _tcslen(optionp) : NULL; 218 | 219 | if (check_params(strstartp, targetstartp, targetendp, rxp, msg, true) < 0) { 220 | return -1; 221 | } 222 | 223 | bregonig *rx = static_cast(*rxp); 224 | *rxp = rx = recompile_onig_ex(rx, PTN_MATCH, NULL, patternp, patternendp, 225 | substp, substendp, optionp, optionendp, msg); 226 | if (rx == NULL) { 227 | return -1; 228 | } 229 | 230 | int err_code = regexec_onig(rx, targetstartp, targetendp, strstartp, 231 | 0, 1, one_shot, msg); 232 | 233 | return err_code; 234 | } 235 | 236 | int ::BoSubst(const TCHAR *patternp, const TCHAR *substp, const TCHAR *optionp, 237 | const TCHAR *strstartp, 238 | const TCHAR *targetstartp, const TCHAR *targetendp, 239 | BCallBack callback, 240 | BREGEXP **rxp, TCHAR *msg) 241 | { 242 | set_new_throw_bad_alloc(); 243 | 244 | const TCHAR *patternendp = (patternp != NULL) ? patternp + _tcslen(patternp) : NULL; 245 | const TCHAR *substendp = (substp != NULL) ? substp + _tcslen(substp) : NULL; 246 | const TCHAR *optionendp = (optionp != NULL) ? optionp + _tcslen(optionp) : NULL; 247 | 248 | if (check_params(strstartp, targetstartp, targetendp, rxp, msg, true) < 0) { 249 | return -1; 250 | } 251 | 252 | bregonig *rx = static_cast(*rxp); 253 | *rxp = rx = recompile_onig_ex(rx, PTN_SUBST, NULL, patternp, patternendp, 254 | substp, substendp, optionp, optionendp, msg); 255 | if (rx == NULL) { 256 | return -1; 257 | } 258 | return subst_onig(rx, strstartp, targetstartp, targetendp, msg, callback); 259 | } 260 | #endif 261 | 262 | 263 | 264 | namespace BREGONIG_NS { 265 | 266 | int onig_err_to_bregexp_msg(OnigPosition err_code, OnigErrorInfo* err_info, TCHAR *msg) 267 | { 268 | char err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; 269 | int ret = onig_error_code_to_str((UChar*) err_str, err_code, err_info); 270 | err_str[BREGEXP_MAX_ERROR_MESSAGE_LEN-1] = '\0'; 271 | asc2tcs(msg, err_str, BREGEXP_MAX_ERROR_MESSAGE_LEN); 272 | return ret; 273 | } 274 | 275 | int check_params(const TCHAR *target, const TCHAR *targetstartp, 276 | const TCHAR *targetendp, BREGEXP **rxp, TCHAR *msg, bool allownullstr) 277 | { 278 | if (msg == NULL) // no message area 279 | return -1; 280 | msg[0] = '\0'; // ensure no error 281 | 282 | if (rxp == NULL) { 283 | asc2tcs(msg, "invalid BREGEXP parameter", BREGEXP_MAX_ERROR_MESSAGE_LEN); 284 | return -1; 285 | } 286 | const TCHAR *endp = targetendp; 287 | if (allownullstr) { 288 | endp++; 289 | } 290 | if (target == NULL || targetstartp == NULL || targetendp == NULL 291 | || targetstartp >= endp || target > targetstartp) { // bad target parameter ? 292 | asc2tcs(msg, "invalid target parameter", BREGEXP_MAX_ERROR_MESSAGE_LEN); 293 | return -1; 294 | } 295 | 296 | return 0; 297 | } 298 | 299 | 300 | int BMatch_s(TCHAR *str, TCHAR *target, TCHAR *targetstartp, TCHAR *targetendp, 301 | int one_shot, 302 | BREGEXP **rxp, TCHAR *msg) 303 | { 304 | TRACE(_T("BMatch(): '%s' (%p), %p, %p, %p\n"), str, str, target, targetstartp, targetendp); 305 | set_new_throw_bad_alloc(); 306 | 307 | if (check_params(target, targetstartp, targetendp, rxp, msg, false) < 0) { 308 | return -1; 309 | } 310 | bregonig *rx = static_cast(*rxp); 311 | *rxp = rx = recompile_onig(rx, PTN_MATCH, str, msg); 312 | if (rx == NULL) { 313 | return -1; 314 | } 315 | 316 | int err_code = regexec_onig(rx, targetstartp, targetendp, target, 317 | 0, 1, one_shot, msg); 318 | #if 0 319 | if (err_code > 0 && rx->nparens && rx->endp[1] > rx->startp[1]) { 320 | int len = rx->endp[1] - rx->startp[1]; 321 | TCHAR *tp = new (std::nothrow) TCHAR[len+1]; 322 | if (tp == NULL) { 323 | asc2tcs(msg, "match out of space", BREGEXP_MAX_ERROR_MESSAGE_LEN); 324 | return -1; 325 | } 326 | memcpy(tp,rx->startp[1],len*sizeof(TCHAR)); 327 | rx->outp = tp; 328 | rx->outendp = tp + len; 329 | *(rx->outendp) = '\0'; 330 | } 331 | #endif 332 | return err_code; 333 | } 334 | 335 | 336 | int BSubst_s(TCHAR *str, TCHAR *target, TCHAR *targetstartp, TCHAR *targetendp, 337 | BREGEXP **rxp, TCHAR *msg, BCallBack callback) 338 | { 339 | TRACE(_T("BSubst(): '%s' (%p), %p, %p, %p\n"), str, str, target, targetstartp, targetendp); 340 | set_new_throw_bad_alloc(); 341 | 342 | if (check_params(target, targetstartp, targetendp, rxp, msg, false) < 0) { 343 | return -1; 344 | } 345 | bregonig *rx = static_cast(*rxp); 346 | *rxp = rx = recompile_onig(rx, PTN_SUBST, str, msg); 347 | if (rx == NULL) { 348 | return -1; 349 | } 350 | 351 | if (rx->pmflags & PMf_SUBSTITUTE) { 352 | return subst_onig(rx,target,targetstartp,targetendp,msg,callback); 353 | } 354 | // unusual case 355 | TRACE0(_T("Match in Subst")); 356 | #if 0 357 | int err_code = regexec_onig(rx, targetstartp,targetendp,target,0,1,0,msg); 358 | if (err_code > 0 && rx->nparens && rx->endp[1] > rx->startp[1]) { 359 | int len = rx->endp[1] - rx->startp[1]; 360 | TCHAR *tp = new (std::nothrow) TCHAR[len+1]; 361 | if (tp == NULL) { 362 | asc2tcs(msg, "match out of space", BREGEXP_MAX_ERROR_MESSAGE_LEN); 363 | return -1; 364 | } 365 | memcpy(tp,rx->startp[1],len*sizeof(TCHAR)); 366 | rx->outp = tp; 367 | rx->outendp = tp + len; 368 | *(rx->outendp) = '\0'; 369 | } 370 | return err_code; 371 | #else 372 | delete rx; 373 | *rxp = NULL; 374 | return -1; 375 | #endif 376 | } 377 | 378 | 379 | 380 | 381 | bregonig::bregonig() 382 | { 383 | memset(this, 0, sizeof(bregonig)); 384 | region = onig_region_new(); 385 | } 386 | 387 | bregonig::~bregonig() 388 | { 389 | if (region) { 390 | onig_region_free(region, 1); 391 | } 392 | if (reg) { 393 | onig_free(reg); 394 | } 395 | delete [] outp; 396 | delete [] splitp; 397 | delete [] parap; 398 | delete [] transtblp; 399 | delete [] startp; 400 | // delete [] endp; 401 | 402 | delete [] patternp; 403 | // delete [] prerepp; 404 | // delete [] optionp; 405 | 406 | if (repstr) { 407 | delete repstr; 408 | } 409 | } 410 | 411 | 412 | 413 | pattern_type parse_pattern(const TCHAR *ptn, pattern_type typeold, 414 | const TCHAR **patternp, const TCHAR **patternendp, 415 | const TCHAR **prerepp, const TCHAR **prerependp, 416 | const TCHAR **optionp, const TCHAR **optionendp, 417 | TCHAR *msg) 418 | { 419 | if (ptn == NULL) { 420 | *patternp = NULL; 421 | *patternendp = NULL; 422 | *prerepp = NULL; 423 | *prerependp = NULL; 424 | *optionp = NULL; 425 | *optionendp = NULL; 426 | return typeold; 427 | } 428 | 429 | pattern_type type = PTN_MATCH; 430 | const TCHAR *p = ptn; 431 | const TCHAR *ptnend = ptn + _tcslen(ptn); 432 | TCHAR sep = '/'; // default separater 433 | 434 | if (*p != sep) { 435 | if (*p != 's' && *p != 'm' && *p != 'y' 436 | && (p[0] != 't' || p[1] != 'r')) { 437 | asc2tcs(msg, "does not start with 'm', 's', 'tr' or 'y'", 438 | BREGEXP_MAX_ERROR_MESSAGE_LEN); 439 | return PTN_ERROR; 440 | } 441 | if (*p == 's') { 442 | type = PTN_SUBST; // substitute command 443 | } else if (*p == 'y') { 444 | type = PTN_TRANS; // translate command 445 | } else if (*p == 't') { 446 | type = PTN_TRANS; // translate command 447 | p++; 448 | } 449 | sep = *++p; 450 | } 451 | p++; // skip separater 452 | *patternp = p; 453 | 454 | const TCHAR *res = p; 455 | const TCHAR *resend = NULL, *rp = NULL, *rpend = NULL; 456 | TCHAR prev = 0; 457 | while (p < ptnend) { 458 | #ifndef UNICODE 459 | if (iskanji(*p)) { 460 | prev = 0; p += 2; 461 | continue; 462 | } 463 | #endif 464 | if (*p == '\\' && prev == '\\') { 465 | prev = 0; p++; 466 | continue; 467 | } 468 | if (*p == '/' && prev == '\\') { // \/ means / 469 | prev = 0; p++; 470 | continue; 471 | } 472 | if (*p == sep && prev != '\\') { 473 | if (resend == NULL) { 474 | resend = p; 475 | rp = ++p; 476 | continue; 477 | } else { 478 | rpend = p; 479 | } 480 | p++; 481 | break; 482 | } 483 | prev = *p++; 484 | } 485 | if ((resend == NULL) || (rpend == NULL && type != PTN_MATCH)) { 486 | asc2tcs(msg, "unmatch separater", BREGEXP_MAX_ERROR_MESSAGE_LEN); 487 | return PTN_ERROR; 488 | } 489 | if (rpend == NULL) { 490 | p = resend + 1; 491 | rp = NULL; 492 | } 493 | 494 | *patternendp = resend; 495 | *prerepp = rp; 496 | *prerependp = rpend; 497 | *optionp = p; 498 | *optionendp = ptnend; 499 | 500 | return type; 501 | } 502 | 503 | void parse_option(const TCHAR *optionp, const TCHAR *optionendp, 504 | OnigOptionType *onigoption, OnigEncoding *enc, int *flagp) 505 | { 506 | const TCHAR *p = optionp; 507 | int flag = 0; 508 | OnigOptionType option = ONIG_OPTION_NONE; 509 | #ifdef UNICODE 510 | *enc = ONIG_ENCODING_UTF16_LE; 511 | #else 512 | *enc = ONIG_ENCODING_ASCII; 513 | #endif 514 | TRACE1(_T("option: %s"), optionp); 515 | while (p < optionendp) { 516 | switch (*p++) { 517 | case 'g': 518 | flag |= PMf_GLOBAL; 519 | break; 520 | case 'i': 521 | flag |= PMf_FOLD; 522 | option |= ONIG_OPTION_IGNORECASE; 523 | break; 524 | case 'm': 525 | //flag |= PMf_MULTILINE; 526 | option |= ONIG_OPTION_NEGATE_SINGLELINE; 527 | break; 528 | case 'o': 529 | flag |= PMf_KEEP; 530 | break; 531 | case 'k': 532 | flag |= PMf_KANJI; 533 | #ifndef UNICODE 534 | *enc = ONIG_ENCODING_CP932; 535 | #endif 536 | break; 537 | #if !defined(UNICODE) && !defined(_K2REGEXP_) 538 | case '8': /* bregonig.dll extension */ 539 | *enc = ONIG_ENCODING_UTF8; 540 | break; 541 | #endif 542 | case 'c': 543 | flag |= PMf_TRANS_COMPLEMENT; 544 | break; 545 | case 'd': 546 | flag |= PMf_TRANS_DELETE; 547 | option &= ~ONIG_OPTION_ASCII_RANGE; 548 | break; 549 | case 's': 550 | flag |= PMf_TRANS_SQUASH; 551 | //flag |= PMf_SINGLELINE; 552 | option |= ONIG_OPTION_MULTILINE; 553 | break; 554 | case 'x': 555 | option |= ONIG_OPTION_EXTEND; 556 | break; 557 | case 'a': 558 | option |= ONIG_OPTION_ASCII_RANGE; 559 | break; 560 | case 'l': 561 | case 'u': 562 | option &= ~ONIG_OPTION_ASCII_RANGE; 563 | break; 564 | case 'R': 565 | option |= ONIG_OPTION_NEWLINE_CRLF; 566 | break; 567 | default: 568 | break; 569 | } 570 | } 571 | *flagp = flag; 572 | *onigoption = option; 573 | } 574 | 575 | 576 | bregonig *recompile_onig(bregonig *rxold, pattern_type type, 577 | const TCHAR *ptn, TCHAR *msg) 578 | { 579 | const TCHAR *patternp; 580 | const TCHAR *patternendp; 581 | const TCHAR *prerepp; 582 | const TCHAR *prerependp; 583 | const TCHAR *optionp; 584 | const TCHAR *optionendp; 585 | 586 | TRACE1(_T("recompile_onig(): %s\n"), ptn); 587 | type = parse_pattern(ptn, type, &patternp, &patternendp, 588 | &prerepp, &prerependp, &optionp, &optionendp, msg); 589 | if (type == PTN_ERROR) { 590 | return NULL; 591 | } 592 | return recompile_onig_ex(rxold, type, ptn, patternp, patternendp, 593 | prerepp, prerependp, optionp, optionendp, msg); 594 | } 595 | 596 | 597 | /** 598 | * Compare the old regexp object and new pattern. 599 | * 600 | * return: 601 | * -2: parameter error 602 | * -1: Need to compile. 603 | * 0: No need to compile. The old regexp object can be reused. 604 | * 1: Replace string needs to compile. 605 | */ 606 | int compare_pattern(const bregonig *rxold, 607 | pattern_type type, 608 | const TCHAR *patternp, const TCHAR *patternendp, 609 | const TCHAR *prerepp, const TCHAR *prerependp, 610 | const TCHAR *optionp, const TCHAR *optionendp) 611 | { 612 | pattern_type typeold; 613 | ptrdiff_t len1 = patternendp - patternp; 614 | ptrdiff_t len2 = prerependp - prerepp; 615 | ptrdiff_t len3 = optionendp - optionp; 616 | 617 | TRACE2(_T("compare_pattern: %s, len: %d"), patternp, patternendp-patternp); 618 | if (rxold == NULL) { 619 | return -1; 620 | } 621 | 622 | if (rxold->pmflags & PMf_TRANSLATE) { 623 | typeold = PTN_TRANS; 624 | } else if (rxold->pmflags & PMf_SUBSTITUTE) { 625 | typeold = PTN_SUBST; 626 | } else { 627 | typeold = PTN_MATCH; 628 | } 629 | 630 | if ((typeold == PTN_TRANS) || (type == PTN_TRANS)) { 631 | if (typeold != type) { 632 | return -1; 633 | } 634 | if (patternp == NULL) { 635 | return 0; 636 | } 637 | if ((len1 != rxold->patternendp - rxold->patternp) 638 | || (len2 != rxold->prerependp - rxold->prerepp) 639 | || (len3 != rxold->optionendp - rxold->optionp)) { 640 | return -1; 641 | } 642 | if ((memcmp(patternp, rxold->patternp, len1*sizeof(TCHAR)) != 0) 643 | || (memcmp(prerepp, rxold->prerepp, len2*sizeof(TCHAR)) != 0) 644 | || (memcmp(optionp, rxold->optionp, len3*sizeof(TCHAR)) != 0)) { 645 | return -1; 646 | } 647 | return 0; 648 | } else if (type == PTN_SUBST) { 649 | if (prerepp == NULL) { 650 | if (patternp == NULL) { 651 | if (typeold == PTN_SUBST) { 652 | return 0; 653 | } else { 654 | return -2; // error 655 | } 656 | } 657 | return -2; // error 658 | } 659 | if (patternp != NULL) { 660 | if ((len1 != rxold->patternendp - rxold->patternp) 661 | || (len3 != rxold->optionendp - rxold->optionp)) { 662 | return -1; 663 | } 664 | if ((memcmp(patternp, rxold->patternp, len1*sizeof(TCHAR)) != 0) 665 | || (memcmp(optionp, rxold->optionp, len3*sizeof(TCHAR)) != 0)) { 666 | return -1; 667 | } 668 | } 669 | if ((typeold == PTN_SUBST) 670 | && (len2 == rxold->prerependp - rxold->prerepp) 671 | && (memcmp(prerepp, rxold->prerepp, len2*sizeof(TCHAR)) == 0)) { 672 | return 0; 673 | } 674 | return 1; // compile_rep() is needed 675 | } else { 676 | if (patternp == NULL) { 677 | return 0; 678 | } 679 | if ((len1 != rxold->patternendp - rxold->patternp) 680 | || (len3 != rxold->optionendp - rxold->optionp)) { 681 | return -1; 682 | } 683 | if ((memcmp(patternp, rxold->patternp, len1*sizeof(TCHAR)) != 0) 684 | || (memcmp(optionp, rxold->optionp, len3*sizeof(TCHAR)) != 0)) { 685 | return -1; 686 | } 687 | return 0; 688 | } 689 | } 690 | 691 | 692 | bregonig *recompile_onig_ex(bregonig *rxold, 693 | pattern_type type, const TCHAR *ptn, 694 | const TCHAR *patternp, const TCHAR *patternendp, 695 | const TCHAR *prerepp, const TCHAR *prerependp, 696 | const TCHAR *optionp, const TCHAR *optionendp, 697 | TCHAR *msg) 698 | { 699 | int flag, compare; 700 | bregonig *rx; 701 | OnigOptionType option; 702 | OnigEncoding enc; 703 | TRACE0(_T("recompile_onig_ex()\n")); 704 | TRACE2(_T("patternp: %s, len: %d\n"), patternp, patternendp-patternp); 705 | TRACE2(_T("prerepp: %s, len: %d\n"), prerepp, prerependp-prerepp); 706 | TRACE2(_T("optionp: %s, len: %d\n"), optionp, optionendp-optionp); 707 | 708 | 709 | compare = compare_pattern(rxold, type, patternp, patternendp, 710 | prerepp, prerependp, optionp, optionendp); 711 | TRACE1(_T("compare: %d\n"), compare); 712 | if (compare < 0) { 713 | // need to compile 714 | delete rxold; 715 | rxold = NULL; 716 | 717 | if (patternp == NULL 718 | || ((type == PTN_SUBST || type == PTN_TRANS) && prerepp == NULL)) { 719 | asc2tcs(msg, "invalid reg parameter", BREGEXP_MAX_ERROR_MESSAGE_LEN); 720 | return NULL; 721 | } 722 | } else { 723 | // no need to compile 724 | if (rxold->outp) { 725 | delete [] rxold->outp; 726 | rxold->outp = NULL; 727 | } 728 | if (rxold->splitp) { 729 | delete [] rxold->splitp; 730 | rxold->splitp = NULL; 731 | } 732 | } 733 | 734 | parse_option(optionp, optionendp, &option, &enc, &flag); 735 | 736 | if (type == PTN_TRANS) { 737 | if (compare == 0) { 738 | // no need to compile 739 | TRACE1(_T("rxold(1):0x%08x\n"), rxold); 740 | return rxold; 741 | } 742 | rx = trcomp(patternp, patternendp, prerepp, prerependp, flag, msg); 743 | if (rx == NULL) { 744 | return NULL; 745 | } 746 | } else { 747 | if (compare == 0) { 748 | // no need to compile 749 | TRACE1(_T("rxold(2):0x%08x\n"), rxold); 750 | return rxold; 751 | } else if (compare < 0) { 752 | // pattern string needs to compile. 753 | rx = new (std::nothrow) bregonig(); 754 | if (rx == NULL) { 755 | asc2tcs(msg, "out of space regexp", BREGEXP_MAX_ERROR_MESSAGE_LEN); 756 | return NULL; 757 | } 758 | OnigErrorInfo err_info; 759 | int err_code = onig_new(&rx->reg, 760 | (UChar*) patternp, (UChar*) patternendp, 761 | option, enc, &OnigSyntaxPerl_NG_EX, &err_info); 762 | if (err_code != ONIG_NORMAL) { 763 | onig_err_to_bregexp_msg(err_code, &err_info, msg); 764 | delete rx; 765 | return NULL; 766 | } 767 | 768 | rx->nparens = onig_number_of_captures(rx->reg); // 769 | rx->pmflags = flag; 770 | } else { 771 | // only replace string needs to compile. 772 | rx = rxold; 773 | } 774 | if (rxold != NULL && rxold->repstr != NULL) { 775 | delete rxold->repstr; 776 | rxold->repstr = NULL; 777 | } 778 | if (type == PTN_SUBST) { // substitute 779 | try { 780 | rx->pmflags |= PMf_SUBSTITUTE; 781 | rx->repstr = compile_rep(rx, prerepp, prerependp); // compile replace string 782 | } catch (std::exception& ex) { 783 | asc2tcs(msg, ex.what(), BREGEXP_MAX_ERROR_MESSAGE_LEN); 784 | delete rx; 785 | return NULL; 786 | } 787 | } 788 | } 789 | 790 | if (ptn != NULL) { 791 | size_t plen = _tcslen(ptn); 792 | delete [] rx->parap; 793 | rx->parap = new (std::nothrow) TCHAR[plen+1]; // parameter copy 794 | if (rx->parap == NULL) { 795 | asc2tcs(msg, "precompile out of space", BREGEXP_MAX_ERROR_MESSAGE_LEN); 796 | delete rx; 797 | return NULL; 798 | } 799 | memcpy(rx->parap, ptn, (plen+1)*sizeof(TCHAR)); // copy include null 800 | rx->paraendp = rx->parap + plen; 801 | } 802 | 803 | TCHAR *oldpatternp = rx->patternp; 804 | 805 | if (patternp == NULL) { 806 | patternp = rx->patternp; 807 | patternendp = rx->patternendp; 808 | optionp = rx->optionp; 809 | optionendp = rx->optionendp; 810 | } 811 | 812 | /* save pattern, replace and option string */ 813 | ptrdiff_t len1 = patternendp - patternp; 814 | ptrdiff_t len2 = prerependp - prerepp; 815 | ptrdiff_t len3 = optionendp - optionp; 816 | rx->patternp = new (std::nothrow) TCHAR[len1+1 + len2+1 + len3+1]; 817 | if (rx->patternp == NULL) { 818 | delete rx; 819 | delete [] oldpatternp; 820 | return NULL; 821 | } 822 | memcpy(rx->patternp, patternp, len1*sizeof(TCHAR)); 823 | rx->patternp[len1] = 0; 824 | rx->patternendp = rx->patternp + len1; 825 | 826 | rx->prerepp = rx->patternp + len1 + 1; 827 | memcpy(rx->prerepp, prerepp, len2*sizeof(TCHAR)); 828 | rx->prerepp[len2] = 0; 829 | rx->prerependp = rx->prerepp + len2; 830 | 831 | rx->optionp = rx->prerepp + len2 + 1; 832 | memcpy(rx->optionp, optionp, len3*sizeof(TCHAR)); 833 | rx->optionp[len3] = 0; 834 | rx->optionendp = rx->optionp + len3; 835 | 836 | 837 | delete [] oldpatternp; 838 | 839 | TRACE1(_T("rx:0x%08x\n"), rx); 840 | return rx; 841 | } 842 | 843 | 844 | int regexec_onig(bregonig *rx, const TCHAR *stringarg, 845 | const TCHAR *strend, /* pointer to null at end of string */ 846 | const TCHAR *strbeg, /* real beginning of string */ 847 | int minend, /* end of match must be at least minend after stringarg */ 848 | int safebase, /* no need to remember string in subbase */ 849 | int one_shot, /* if not match then break without proceed str pointer */ 850 | TCHAR *msg) /* fatal error message */ 851 | { 852 | TRACE1(_T("one_shot: %d\n"), one_shot); 853 | OnigPosition err_code; 854 | 855 | if (one_shot) { 856 | OnigOptionType option = (minend > 0) ? 857 | ONIG_OPTION_FIND_NOT_EMPTY : ONIG_OPTION_NONE; 858 | err_code = onig_match(rx->reg, (UChar*) strbeg, (UChar*) strend, 859 | (UChar*) stringarg, rx->region, 860 | option); 861 | } else { 862 | const TCHAR *global_pos = stringarg; /* \G */ 863 | if (minend > 0) { 864 | #ifdef UNICODE 865 | int kanjiflag = 1; 866 | #else 867 | int kanjiflag = rx->pmflags & PMf_KANJI; 868 | #endif 869 | if (kanjiflag && is_char_pair((TBYTE*) stringarg)) { 870 | stringarg += 2; 871 | } else { 872 | stringarg++; 873 | } 874 | } 875 | err_code = onig_search_gpos(rx->reg, (UChar*) strbeg, (UChar*) strend, 876 | (UChar*) global_pos, 877 | (UChar*) stringarg, (UChar*) strend, rx->region, 878 | ONIG_OPTION_NONE); 879 | } 880 | 881 | if (err_code >= 0) { 882 | /* FOUND */ 883 | if (rx->startp) { 884 | delete [] rx->startp; 885 | } 886 | rx->nparens = rx->region->num_regs - 1; 887 | rx->startp = new (std::nothrow) TCHAR*[rx->region->num_regs * 2]; 888 | /* allocate startp and endp together */ 889 | if (rx->startp == NULL) { 890 | asc2tcs(msg, "out of space", BREGEXP_MAX_ERROR_MESSAGE_LEN); 891 | return -1; 892 | } 893 | rx->endp = rx->startp + rx->region->num_regs; 894 | 895 | for (int i = 0; i < rx->region->num_regs; i++) { 896 | if (rx->region->beg[i] != ONIG_REGION_NOTPOS) { 897 | // found 898 | rx->startp[i] = const_cast(strbeg) + rx->region->beg[i] / sizeof(TCHAR); 899 | rx->endp[i] = const_cast(strbeg) + rx->region->end[i] / sizeof(TCHAR); 900 | } else { 901 | // not found 902 | rx->startp[i] = NULL; 903 | rx->endp[i] = NULL; 904 | } 905 | } 906 | return 1; 907 | } else if (err_code == ONIG_MISMATCH) { 908 | /* NOT FOUND */ 909 | return 0; 910 | } else { 911 | /* ERROR */ 912 | onig_err_to_bregexp_msg(err_code, NULL, msg); 913 | return -1; 914 | } 915 | } 916 | 917 | } // namespace 918 | -------------------------------------------------------------------------------- /src/bregonig.h: -------------------------------------------------------------------------------- 1 | /* 2 | * bregonig.h 3 | */ 4 | /* 5 | * Copyright (C) 2006-2012 K.Takata 6 | * 7 | * You may distribute under the terms of either the GNU General Public 8 | * License or the Artistic License, as specified in the perl_license.txt file. 9 | */ 10 | 11 | 12 | #ifndef BREGONIG_H_ 13 | #define BREGONIG_H_ 14 | 15 | #ifdef UNICODE 16 | typedef DWORD TWORD; 17 | #define BREGONIG_NS unicode 18 | #else 19 | typedef WORD TWORD; 20 | #define BREGONIG_NS ansi 21 | #endif 22 | 23 | #ifndef lengthof 24 | #define lengthof(arr) ((sizeof(arr) / sizeof((arr)[0]))) 25 | #endif 26 | 27 | enum casetype { 28 | CASE_NONE, CASE_UPPER, CASE_LOWER 29 | }; 30 | 31 | namespace BREGONIG_NS { 32 | 33 | struct repinfo { 34 | TCHAR *startp; /* start address if <256 \digit */ 35 | ptrdiff_t dlen; /* data length / backref num */ 36 | casetype nextcase; /* \l or \u */ 37 | casetype currentcase; /* \L or \U */ 38 | 39 | repinfo() { 40 | startp = 0; dlen = 0; 41 | nextcase = CASE_NONE; currentcase = CASE_NONE; 42 | } 43 | }; 44 | 45 | typedef struct repstr { 46 | int count; /* entry counter */ 47 | repinfo *info; 48 | TCHAR data[1]; /* data start */ 49 | 50 | repstr() { count = 0; info = 0; } 51 | ~repstr() { delete [] info; } 52 | 53 | void init(int cnt) { 54 | count = cnt; // default \digits count in string 55 | info = new repinfo[cnt]; 56 | } 57 | 58 | inline bool is_normal_string(int i) { 59 | return ((info[i].startp != NULL) && ((INT_PTR) info[i].startp > 1)); 60 | } 61 | inline bool is_backslash(int i) { 62 | return ((INT_PTR) info[i].startp == 1); 63 | } 64 | inline void set_backslash(int i) { 65 | info[i].startp = (TCHAR *) 1; // \digits 66 | } 67 | inline void set_dollar(int i) { 68 | info[i].startp = NULL; // $digits 69 | } 70 | 71 | static void *operator new(size_t cb, size_t data_size) { 72 | return ::operator new (cb + data_size * sizeof(TCHAR)); 73 | } 74 | static void operator delete(void *p) { 75 | ::operator delete (p); 76 | } 77 | static void operator delete(void *p, size_t data_size) { 78 | ::operator delete (p); 79 | } 80 | } REPSTR; 81 | 82 | 83 | enum pattern_type { 84 | PTN_ERROR = -1, 85 | PTN_MATCH = 0, 86 | PTN_SUBST, 87 | PTN_TRANS 88 | }; 89 | 90 | struct bregonig : bregexp { 91 | #if 0 92 | TCHAR *outp; /* matched or substitute string start ptr */ 93 | TCHAR *outendp; /* matched or substitute string end ptr */ 94 | int splitctr; /* split result counrer */ 95 | TCHAR **splitp; /* split result pointer ptr */ 96 | int rsv1; /* reserved for external use */ 97 | TCHAR *parap; /* parameter start ptr ie. "s/xxxxx/yy/gi" */ 98 | TCHAR *paraendp; /* parameter end ptr */ 99 | TCHAR *transtblp; /* translate table ptr */ 100 | TCHAR **startp; /* match string start ptr */ 101 | TCHAR **endp; /* match string end ptr */ 102 | int nparens; /* number of parentheses */ 103 | #endif 104 | // external field end point 105 | int pmflags; 106 | 107 | regex_t *reg; 108 | OnigRegion *region; 109 | REPSTR *repstr; 110 | 111 | /* "s/pattern/replace/option" */ 112 | TCHAR *patternp; /* original pattern string */ 113 | TCHAR *patternendp; /* original pattern string end */ 114 | TCHAR *prerepp; /* original replace string */ 115 | TCHAR *prerependp; /* original replace string end */ 116 | TCHAR *optionp; /* original option string */ 117 | TCHAR *optionendp; /* original option string end */ 118 | 119 | 120 | bregonig(); 121 | ~bregonig(); 122 | }; 123 | 124 | 125 | #define iskanji(c) _ismbblead(c) 126 | 127 | inline int is_char_pair(const TBYTE *s) 128 | { 129 | #ifdef UNICODE 130 | if (((s[0] & 0xfc00) == 0xd800) && ((s[1] & 0xfc00) == 0xdc00)) { 131 | return true; 132 | } 133 | return false; 134 | #else 135 | return iskanji(*s); 136 | #endif 137 | } 138 | 139 | inline TWORD get_codepoint(const TBYTE *s) 140 | { 141 | #ifdef UNICODE 142 | return (((s[0] - 0xd800) << 10) | (s[1] - 0xdc00)) + 0x10000; 143 | #else 144 | return (s[0] << 8) | s[1]; 145 | #endif 146 | } 147 | 148 | inline int set_codepoint(TWORD codepoint, TBYTE *s) 149 | { 150 | TBYTE *t = s; 151 | #ifdef UNICODE 152 | if (codepoint > 0xffff) { // Surrogate Pair 153 | unsigned int c = codepoint - 0x10000; 154 | *s++ = (c >> 10) | 0xd800; 155 | codepoint = (c & 0x3ff) | 0xdc00; 156 | } 157 | #else 158 | if (codepoint > 0xff) { 159 | *s++ = codepoint >> 8; 160 | } 161 | #endif 162 | *s++ = (TBYTE) codepoint; 163 | return (int) (s - t); 164 | } 165 | 166 | // ASCII to TCHAR string 167 | inline TCHAR *asc2tcs(TCHAR *dst, const char *src, size_t cch) 168 | { 169 | #ifdef UNICODE 170 | _snwprintf(dst, cch, L"%hs", src); 171 | if (cch > 0) { 172 | dst[cch - 1] = L'\0'; // Ensure NUL termination. 173 | } 174 | return dst; 175 | #else 176 | return lstrcpyn(dst, src, (int) cch); // NUL termination is ensured. 177 | #endif 178 | } 179 | 180 | 181 | #define CALLBACK_KIND_REPLACE 0 182 | #define SUBST_BUF_SIZE 256 183 | 184 | 185 | int check_params(const TCHAR *target, const TCHAR *targetstartp, 186 | const TCHAR *targetendp, BREGEXP **rxp, TCHAR *msg, bool allownullstr); 187 | int BMatch_s(TCHAR *str, TCHAR *target, TCHAR *targetstartp, TCHAR *targetendp, 188 | int one_shot, 189 | BREGEXP **rxp, TCHAR *msg); 190 | int BSubst_s(TCHAR *str, TCHAR *target, TCHAR *targetstartp, TCHAR *targetendp, 191 | BREGEXP **rxp, TCHAR *msg, BCallBack callback); 192 | 193 | int onig_err_to_bregexp_msg(OnigPosition err_code, OnigErrorInfo* err_info, TCHAR *msg); 194 | 195 | 196 | bregonig *recompile_onig(bregonig *rxold, pattern_type type, 197 | const TCHAR *ptn, TCHAR *msg); 198 | bregonig *recompile_onig_ex(bregonig *rxold, 199 | pattern_type type, const TCHAR *ptn, 200 | const TCHAR *patternp, const TCHAR *patternendp, 201 | const TCHAR *prerepp, const TCHAR *prerependp, 202 | const TCHAR *optionp, const TCHAR *optionendp, 203 | TCHAR *msg); 204 | 205 | //bregonig *compile_onig(const TCHAR *ptn, int plen, TCHAR *msg); 206 | REPSTR *compile_rep(bregonig *rx, const TCHAR *str, const TCHAR *strend); 207 | 208 | int subst_onig(bregonig *rx, const TCHAR *target, 209 | const TCHAR *targetstartp, const TCHAR *targetendp, 210 | TCHAR *msg, BCallBack callback); 211 | int split_onig(bregonig *rx, TCHAR *target, TCHAR *targetendp, int limit, TCHAR *msg); 212 | 213 | 214 | int regexec_onig(bregonig *rx, const TCHAR *stringarg, 215 | const TCHAR *strend, /* pointer to null at end of string */ 216 | const TCHAR *strbeg, /* real beginning of string */ 217 | int minend, /* end of match must be at least minend after stringarg */ 218 | int safebase, /* no need to remember string in subbase */ 219 | int one_shot, /* if not match then break without proceed str pointer */ 220 | TCHAR *msg); /* fatal error message */ 221 | 222 | int trans(bregonig *rx, TCHAR *target, TCHAR *targetendp, TCHAR *msg); 223 | 224 | bregonig *trcomp(const TCHAR *res, const TCHAR *resend, 225 | const TCHAR *rp, const TCHAR *rpend, 226 | int flag, TCHAR *msg); 227 | 228 | 229 | #define isALNUM(c) (isascii(c) && (isalpha(c) || isdigit(c) || c == '_')) 230 | #define isIDFIRST(c) (isascii(c) && (isalpha(c) || (c) == '_')) 231 | #define isALPHA(c) (isascii(c) && isalpha(c)) 232 | #define isSPACE(c) (isascii(c) && isspace(c)) 233 | #define isDIGIT(c) (isascii(c) && isdigit(c)) 234 | #define isXDIGIT(c) (isascii(c) && isxdigit(c)) 235 | #define isUPPER(c) (isascii(c) && isupper(c)) 236 | #define isLOWER(c) (isascii(c) && islower(c)) 237 | #define toUPPER(c) toupper(c) 238 | #define toLOWER(c) tolower(c) 239 | 240 | #define PMf_USED 0x0001 /* pm has been used once already */ 241 | #define PMf_ONCE 0x0002 /* use pattern only once per reset */ 242 | #define PMf_SCANFIRST 0x0004 /* initial constant not anchored */ 243 | #define PMf_ALL 0x0008 /* initial constant is whole pat */ 244 | #define PMf_SKIPWHITE 0x0010 /* skip leading whitespace for split */ 245 | #define PMf_FOLD 0x0020 /* case insensitivity */ 246 | #define PMf_CONST 0x0040 /* subst replacement is constant */ 247 | #define PMf_KEEP 0x0080 /* keep 1st runtime pattern forever */ 248 | #define PMf_GLOBAL 0x0100 /* pattern had a g modifier */ 249 | #define PMf_RUNTIME 0x0200 /* pattern coming in on the stack */ 250 | #define PMf_EVAL 0x0400 /* evaluating replacement as expr */ 251 | #define PMf_WHITE 0x0800 /* pattern is \s+ */ 252 | #define PMf_MULTILINE 0x1000 /* assume multiple lines */ 253 | #define PMf_SINGLELINE 0x2000 /* assume single line */ 254 | #define PMf_KANJI 0x4000 /* KANJI mode */ 255 | #define PMf_EXTENDED 0x8000 /* chuck embedded whitespace */ 256 | #define PMf_SUBSTITUTE 0x010000 /* substitute */ 257 | #define PMf_TRANSLATE 0x020000 /* translate */ 258 | #define PMf_TRANS_COMPLEMENT 0x040000 /* translate complement */ 259 | #define PMf_TRANS_DELETE 0x080000 /* translate delete */ 260 | #define PMf_TRANS_SQUASH 0x100000 /* translate squash */ 261 | 262 | } // namespace 263 | 264 | #endif /* BREGONIG_H_ */ 265 | -------------------------------------------------------------------------------- /src/bregonig.rc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k-takata/bregonig/670b229063a33910e717a9f220d89ce8ab46d790/src/bregonig.rc -------------------------------------------------------------------------------- /src/bsplit.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // bsplit.cc 3 | // Split front-end 4 | //////////////////////////////////////////////////////////////////////////////// 5 | // 1999.11.24 update by Tatsuo Baba 6 | // 2006.08.29 update by K.Takata 7 | // 8 | // You may distribute under the terms of either the GNU General Public 9 | // License or the Artistic License, as specified in the perl_license.txt file. 10 | //////////////////////////////////////////////////////////////////////////////// 11 | 12 | 13 | #define _CRT_SECURE_NO_DEPRECATE 14 | #define WIN32_LEAN_AND_MEAN 15 | 16 | #define _BREGEXP_ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #ifdef USE_ONIGMO_6 26 | # include 27 | #else 28 | # include 29 | #endif 30 | #include "bregexp.h" 31 | //#include "global.h" 32 | #include "bregonig.h" 33 | #include "mem_vc6.h" 34 | #include "dbgtrace.h" 35 | 36 | 37 | using namespace BREGONIG_NS; 38 | namespace BREGONIG_NS { 39 | 40 | int split_onig(bregonig *rx, TCHAR *target, TCHAR *targetendp, int limit, TCHAR *msg) 41 | { 42 | TCHAR *orig,*m; 43 | TCHAR *s = target; 44 | ptrdiff_t len = targetendp - target; 45 | if (len < 1) 46 | return -1; 47 | TCHAR *strend = s + len; 48 | ptrdiff_t maxiters = (strend - s) + 10; 49 | ptrdiff_t iters = 0; 50 | orig = m = s; 51 | 52 | rx->splitctr = 0; // split counter 53 | 54 | 55 | // pattern length = 0 means split each characters 56 | // and limit is 1 returns all string 57 | if (rx->patternp == rx->patternendp || limit == 1) { 58 | ptrdiff_t blen = 2*len + 3; 59 | if (limit == 1) 60 | blen = 5; 61 | TCHAR **buf = new (std::nothrow) TCHAR*[blen]; 62 | int copycnt = 0; 63 | if (buf == NULL) { 64 | asc2tcs(msg, "out of space buf", BREGEXP_MAX_ERROR_MESSAGE_LEN); 65 | return -1; 66 | } 67 | #ifdef UNICODE 68 | int kanjiflag = 1; 69 | #else 70 | int kanjiflag = rx->pmflags & PMf_KANJI; 71 | #endif 72 | while (s < strend) { 73 | if (--limit == 0) { 74 | buf[copycnt++] = s; 75 | buf[copycnt++] = strend; 76 | break; 77 | } 78 | if (kanjiflag && is_char_pair((TBYTE*)s)) { 79 | buf[copycnt++] = s; 80 | s += 2; 81 | buf[copycnt++] = s; 82 | } else { 83 | buf[copycnt++] = s++; 84 | buf[copycnt++] = s; 85 | } 86 | } 87 | if (copycnt) { 88 | rx->splitctr = copycnt / 2; // split counter 89 | buf[copycnt] = NULL; // set stopper 90 | buf[copycnt+1] = NULL; // set stopper 91 | rx->splitp = buf; 92 | } 93 | else 94 | delete [] buf; 95 | 96 | return rx->splitctr; 97 | } 98 | 99 | // now ready 100 | int blen = 256; // initial size 101 | TCHAR **buf = new (std::nothrow) TCHAR*[blen]; 102 | int copycnt = 0; 103 | if (buf == NULL) { 104 | asc2tcs(msg, "out of space buf", BREGEXP_MAX_ERROR_MESSAGE_LEN); 105 | return -1; 106 | } 107 | if (!regexec_onig(rx, s, strend, orig, 0,1,0,msg)) { // no split ? 108 | buf[0] = target; 109 | buf[1] = targetendp; 110 | rx->splitctr = 1; // split counter 111 | buf[2] = NULL; // set stopper 112 | buf[3] = NULL; // set stopper 113 | rx->splitp = buf; 114 | return 1; 115 | } 116 | // now ready to go 117 | limit--; 118 | do { 119 | if (iters++ > maxiters) { 120 | delete [] buf; 121 | asc2tcs(msg, "Split loop", BREGEXP_MAX_ERROR_MESSAGE_LEN); 122 | return -1; 123 | } 124 | m = rx->startp[0]; 125 | len = m - s; 126 | if (blen <= copycnt + 3) { 127 | TCHAR **tp = new (std::nothrow) TCHAR*[blen + 256]; 128 | if (tp == NULL) { 129 | asc2tcs(msg, "out of space buf", BREGEXP_MAX_ERROR_MESSAGE_LEN); 130 | delete [] buf; 131 | return -1; 132 | } 133 | memcpy(tp,buf,copycnt*sizeof(TCHAR*)); 134 | delete [] buf; 135 | buf = tp; blen += 256; 136 | } 137 | buf[copycnt++] = s; 138 | buf[copycnt++] = s+ len; 139 | s = rx->endp[0]; 140 | if (--limit == 0) 141 | break; 142 | } while (regexec_onig(rx, s, strend, orig, s == m, 1, 0, msg)); 143 | // len = rx->subend - s; 144 | len = targetendp - s; // ??? 145 | if (blen <= copycnt + 3) { 146 | TCHAR **tp = new (std::nothrow) TCHAR*[blen + 3]; 147 | if (tp == NULL) { 148 | asc2tcs(msg, "out of space buf", BREGEXP_MAX_ERROR_MESSAGE_LEN); 149 | delete [] buf; 150 | return -1; 151 | } 152 | memcpy(tp,buf,copycnt*sizeof(TCHAR*)); 153 | delete [] buf; 154 | buf = tp; 155 | } 156 | if (len) { 157 | buf[copycnt++] = s; 158 | buf[copycnt++] = s+ len; 159 | } 160 | if (copycnt) { 161 | rx->splitctr = copycnt / 2; // split counter 162 | buf[copycnt] = NULL; // set stopper 163 | buf[copycnt+1] = NULL; // set stopper 164 | rx->splitp = buf; 165 | } 166 | else 167 | delete [] buf; 168 | 169 | return rx->splitctr; 170 | } 171 | 172 | 173 | } // namespace 174 | -------------------------------------------------------------------------------- /src/btrans.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // btrans.cc 3 | // 4 | // translate front-end 5 | //////////////////////////////////////////////////////////////////////////////// 6 | // 1999.11.24 update by Tatsuo Baba 7 | // 2006.08.30 update by K.Takata 8 | // 9 | // You may distribute under the terms of either the GNU General Public 10 | // License or the Artistic License, as specified in the perl_license.txt file. 11 | //////////////////////////////////////////////////////////////////////////////// 12 | 13 | #define _CRT_SECURE_NO_DEPRECATE 14 | #define WIN32_LEAN_AND_MEAN 15 | 16 | #define _BREGEXP_ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #ifdef USE_ONIGMO_6 27 | # include 28 | #else 29 | # include 30 | #endif 31 | #include "bregexp.h" 32 | //#include "global.h" 33 | #include "bregonig.h" 34 | #include "mem_vc6.h" 35 | #include "dbgtrace.h" 36 | 37 | #define KANJI 38 | //#include "global.h" 39 | #include "sv.h" 40 | //#include "intreg.h" 41 | 42 | 43 | using namespace BREGONIG_NS; 44 | namespace BREGONIG_NS { 45 | 46 | static SV *cvchar(const TCHAR *str, const TCHAR *strend); 47 | static TWORD specchar(const TCHAR* p,int *next); 48 | void sv_catkanji(SV *sv,U32 tch); 49 | 50 | 51 | /* 52 | static inline int is_char_pair(const TBYTE *s) 53 | { 54 | #ifdef UNICODE 55 | if (((s[0] & 0xfc00) == 0xd800) && ((s[1] & 0xfc00) == 0xdc00)) { 56 | return true; 57 | } 58 | return false; 59 | #else 60 | return iskanji(*s); 61 | #endif 62 | } 63 | 64 | static inline TWORD get_codepoint(const TBYTE *s) 65 | { 66 | #ifdef UNICODE 67 | return (((s[0] - 0xd800) << 10) | (s[1] - 0xdc00)) + 0x10000; 68 | #else 69 | return ((U8)s[0] <<8) | (U8)s[1]; 70 | #endif 71 | } 72 | */ 73 | 74 | 75 | // compile translate string 76 | bregonig *trcomp(const TCHAR *str, const TCHAR *strend, 77 | const TCHAR *rp, const TCHAR *rpend, 78 | int flag, TCHAR *msg) 79 | { 80 | ptrdiff_t slen = strend - str; 81 | ptrdiff_t rlen = rpend - rp; 82 | if (slen < 1) 83 | return NULL; 84 | register const TCHAR *p = str; 85 | register const TCHAR *pend = strend; 86 | 87 | // bregonig *rx = (bregonig*) new char[sizeof(bregonig)]; 88 | bregonig *rx = new (std::nothrow) bregonig(); 89 | if (rx == NULL) { 90 | asc2tcs(msg, "out of space trcomp", BREGEXP_MAX_ERROR_MESSAGE_LEN); 91 | return NULL; 92 | } 93 | 94 | // memset(rx,0,sizeof(bregonig)); 95 | rx->pmflags = flag | PMf_TRANSLATE; 96 | 97 | SV *tstr = NULL; 98 | SV *rstr = NULL; 99 | 100 | /* the even index holds the t-char(in 2byte), and the odd index 101 | holds the r-char(in 2 byte) if t-char is to be removed, then 102 | r-char is -2. */ 103 | register TWORD *tbl = NULL; 104 | 105 | try { 106 | tstr = cvchar(str,strend); 107 | rstr = cvchar(rp,rpend); 108 | 109 | 110 | ptrdiff_t tlen = SvCUR(tstr); 111 | rlen = SvCUR(rstr); 112 | register TBYTE *t = (TBYTE*)SvPVX(tstr); 113 | register TBYTE *r = (TBYTE*)SvPVX(rstr); 114 | 115 | register int i; /* indexes t */ 116 | register int j; /* indexes j */ 117 | register int k; /* indexes tbl */ 118 | int lastrch = -1; 119 | int tbl_size = 256; 120 | 121 | int del_char; 122 | int complement; 123 | int kanji; 124 | 125 | tbl = new TWORD[tbl_size]; 126 | 127 | 128 | complement = rx->pmflags & PMf_TRANS_COMPLEMENT; 129 | del_char = rx->pmflags & PMf_TRANS_DELETE; 130 | #ifdef UNICODE 131 | kanji = 1; 132 | #else 133 | kanji = rx->pmflags & PMf_KANJI; 134 | #endif 135 | 136 | for (i = 0, j = 0, k = 0; i < tlen; ) { 137 | U32 tch, rch; 138 | if (kanji && i < tlen-1 && is_char_pair(t+i)) { 139 | tch = get_codepoint(t+i); 140 | i+=2; 141 | } else { 142 | tch = (TBYTE)t[i]; 143 | i++; 144 | } 145 | if (j >= rlen) { 146 | if (del_char) rch = (unsigned)-2; 147 | else rch = lastrch; 148 | } else { 149 | if (kanji && j < rlen-1 && is_char_pair(r+j)) { 150 | rch = get_codepoint(r+j); 151 | j += 2; 152 | } else { 153 | rch = (TBYTE)r[j]; 154 | j++; 155 | } 156 | lastrch = rch; 157 | } 158 | if (k >= tbl_size) { 159 | TWORD *tp = new TWORD[tbl_size+256]; 160 | memcpy(tp,tbl,tbl_size * sizeof(TWORD)); 161 | delete [] tbl; 162 | tbl = tp; 163 | tbl_size += 256; 164 | } 165 | tbl[k++] = tch; 166 | tbl[k++] = rch; 167 | } 168 | if (k >= tbl_size) { 169 | TWORD *tp = new TWORD[tbl_size+4]; 170 | memcpy(tp,tbl,tbl_size * sizeof(TWORD)); 171 | delete [] tbl; 172 | tbl = tp; 173 | tbl_size += 4; 174 | } 175 | /* mark the end */ 176 | tbl[k++] = (TWORD)-1; 177 | tbl[k++] = (TWORD)-1; 178 | rx->transtblp = (TCHAR*)tbl; 179 | sv_free(tstr); 180 | sv_free(rstr); 181 | 182 | return rx; 183 | } 184 | catch (std::exception& ex) { 185 | TRACE0(_T("out of space in trcomp()\n")); 186 | if (tstr) 187 | sv_free(tstr); 188 | if (rstr) 189 | sv_free(rstr); 190 | delete tbl; 191 | delete rx; 192 | asc2tcs(msg, ex.what(), BREGEXP_MAX_ERROR_MESSAGE_LEN); 193 | return NULL; 194 | } 195 | } 196 | 197 | static SV *cvchar(const TCHAR *str, const TCHAR *strend) 198 | { 199 | int next; 200 | TWORD ender; 201 | TWORD lastch = 0; 202 | ptrdiff_t len = strend - str; 203 | const TCHAR *p = str; 204 | const TCHAR *pend = strend; 205 | SV *dst = newSVpv(_T(""),0); 206 | while (p < pend) { 207 | if (*p != '\\' && *p != '-') { // no magic char ? 208 | lastch = *p; 209 | if (is_char_pair((TBYTE*)p)) { // Surrogate Pair or kanji ? 210 | lastch = get_codepoint((TBYTE*)p); 211 | sv_catkanji(dst,lastch); 212 | p++; 213 | } else 214 | sv_catpvn(dst,p,1); 215 | p++; 216 | continue; 217 | } 218 | p++; 219 | if (p >= pend) { 220 | sv_catpvn(dst,p-1,1); 221 | break; 222 | } 223 | if (p[-1] == '-') { // - ? 224 | const TCHAR* tp = p -1; 225 | TWORD toch; 226 | if (is_char_pair((TBYTE*)p)) { // Surrogate Pair or kanji ? 227 | toch = get_codepoint((TBYTE*)p); 228 | p += 2; 229 | } else { 230 | toch = *p++; 231 | if (p[-1] == '\\') { 232 | if (p +1 >= pend) { 233 | sv_catpvn(dst,p-1,2); 234 | break; 235 | } 236 | toch = specchar(p,&next); 237 | p += next; 238 | } 239 | } 240 | 241 | if (lastch >= toch || toch - lastch > 255) { 242 | sv_catpvn(dst,tp,p - tp); 243 | continue; 244 | } 245 | // int clen = toch > 255 ? 2 :1; 246 | for (lastch++;toch >= lastch;lastch++) { 247 | /* 248 | if (clen ==1) 249 | sv_catpvn(dst,(TCHAR*)&lastch,clen); 250 | else { 251 | TCHAR ch[2]; 252 | ch[0] = lastch >> 8; 253 | ch[1] = (TCHAR)lastch; 254 | sv_catpvn(dst,ch,clen); 255 | } 256 | */ 257 | sv_catkanji(dst,lastch); 258 | } 259 | lastch--; 260 | continue; 261 | } 262 | 263 | if (*p == '\\') { 264 | sv_catpvn(dst,p,1); 265 | p++; 266 | continue; 267 | } 268 | ender = specchar(p,&next); 269 | // sv_catpvn(dst,&ender,1); 270 | sv_catkanji(dst,ender); 271 | lastch = ender; 272 | p += next; 273 | } 274 | 275 | 276 | return dst; 277 | } 278 | 279 | 280 | static TWORD specchar(const TCHAR *p, int *next) 281 | { 282 | TWORD ender; 283 | int numlen = 0; 284 | switch (*p++) { 285 | case '/': 286 | ender = '/'; 287 | break; 288 | case 'n': 289 | ender = '\n'; 290 | break; 291 | case 'r': 292 | ender = '\r'; 293 | break; 294 | case 't': 295 | ender = '\t'; 296 | break; 297 | case 'f': 298 | ender = '\f'; 299 | break; 300 | case 'e': 301 | ender = '\033'; 302 | break; 303 | case 'a': 304 | // ender = '\007'; 305 | ender = '\a'; 306 | break; 307 | #ifdef USE_VTAB 308 | case 'v': // added by K.Takata 309 | ender = '\v'; 310 | break; 311 | #endif 312 | case 'b': // added by K.Takata 313 | ender = '\b'; 314 | break; 315 | case 'x': 316 | if (isXDIGIT(*p)) { // '\xHH' 317 | ender = (TBYTE)scan_hex(p, 2, &numlen); 318 | } 319 | else { 320 | if (*p == '{') { // '\x{HH}' 321 | TWORD code = (TWORD)scan_hex(++p, 8, &numlen); 322 | if (p[numlen] == '}') { 323 | ender = code; 324 | numlen += 2; 325 | break; 326 | } 327 | } 328 | // SYNTAX ERROR 329 | ender = p[-1]; 330 | numlen = 0; 331 | } 332 | break; 333 | case 'o': 334 | if (*p == '{') { // '\o{OOO}' 335 | TWORD code = (TWORD)scan_oct(++p, 11, &numlen); 336 | if (p[numlen] == '}') { 337 | ender = code; 338 | numlen += 2; 339 | break; 340 | } 341 | } 342 | // SYNTAX ERROR 343 | ender = p[-1]; 344 | numlen = 0; 345 | break; 346 | case 'c': 347 | ender = *p++; 348 | if (isLOWER(ender)) 349 | ender = toUPPER(ender); 350 | ender ^= 64; 351 | ++numlen; 352 | break; 353 | case '0': case '1': case '2': case '3': 354 | case '4': case '5': case '6': case '7': 355 | --p; 356 | ender = (TCHAR)scan_oct(p, 3, &numlen); 357 | --numlen; 358 | break; 359 | case '\0': 360 | /* FALL THROUGH */ 361 | default: 362 | ender = p[-1]; 363 | } 364 | *next = numlen + 1; 365 | return ender; 366 | } 367 | 368 | 369 | 370 | int trans(bregonig *rx, TCHAR *target, TCHAR *targetendp, TCHAR *msg) 371 | { 372 | register short *tbl; 373 | register TBYTE *s; 374 | register TBYTE *send; 375 | register int matches = 0; 376 | register int squash = rx->pmflags & PMf_TRANS_SQUASH; 377 | ptrdiff_t len; 378 | U32 last_rch; 379 | // This variable need, doesn't it? 380 | // replase sv ; 381 | SV *dest_sv = NULL; 382 | try { 383 | dest_sv = newSVpv(_T(""),0); 384 | 385 | int del_char = rx->pmflags & PMf_TRANS_DELETE; 386 | int complement = rx->pmflags & PMf_TRANS_COMPLEMENT; 387 | #ifdef UNICODE 388 | int kanji = 1; 389 | #else 390 | int kanji = rx->pmflags & PMf_KANJI; 391 | #endif 392 | 393 | 394 | tbl = (short*)rx->transtblp; 395 | s = (TBYTE*)target; 396 | len = targetendp - target; 397 | if (!len) 398 | return 0; 399 | send = s + len; 400 | while (s < send) { 401 | U32 tch, rch; 402 | TBYTE *next_s; 403 | TWORD *tp; 404 | int matched; 405 | if (kanji && s < send-1 && is_char_pair(s)) { 406 | tch = get_codepoint(s); 407 | next_s = s+2; 408 | } else { 409 | tch = *(TBYTE*)s; 410 | next_s = s+1; 411 | } 412 | /* look for ch in tbl */ 413 | if (!complement) { 414 | for (tp = (TWORD*)tbl; *tp != (TWORD)(-1); tp += 2) { 415 | if (*tp == tch) break; 416 | } 417 | matched = (*tp != (TWORD)(-1)); 418 | rch = tp[1]; 419 | } else { 420 | for (tp = (TWORD*)tbl; *tp != (TWORD)(-1); tp += 2) { 421 | if (*tp == tch) break; 422 | } 423 | matched = (*tp == (TWORD)(-1)); 424 | rch = (TWORD)(del_char ? -2 : -1); 425 | } 426 | 427 | 428 | if (!matched) { 429 | sv_catkanji(dest_sv, tch); 430 | } else { 431 | matches++; 432 | if (complement && (rch == (TWORD)(-1))) { 433 | rch = *(tp - 1); 434 | } 435 | if (rch == (TWORD)(-2)) { 436 | /* delete this character */ 437 | } else if (squash) { 438 | if (last_rch == (rch==(TWORD)(-1)?tch:rch)) { 439 | ; // delete this char 440 | } else { 441 | sv_catkanji(dest_sv, rch == (TWORD)(-1) ? tch : rch); 442 | // matches++; 443 | } 444 | } else { 445 | sv_catkanji(dest_sv, rch==(TWORD)(-1) ? tch : rch); 446 | // matches++; 447 | } 448 | } 449 | last_rch = ((rch==(TWORD)(-1)||rch==(TWORD)(-2)) ? tch : rch); 450 | s = next_s; 451 | } 452 | // matches += (s-(TBYTE*)target) - dlen; /* account for disappeared chars */ 453 | 454 | 455 | rx->outp = SvPVX(dest_sv); 456 | rx->outendp = rx->outp + SvCUR(dest_sv); 457 | dest_sv->xpv_pv = NULL; 458 | sv_free(dest_sv); 459 | 460 | 461 | return matches; 462 | } 463 | catch (std::exception& ex) { 464 | TRACE0(_T("out of space in trans()\n")); 465 | if (dest_sv) 466 | sv_free(dest_sv); 467 | asc2tcs(msg, ex.what(), BREGEXP_MAX_ERROR_MESSAGE_LEN); 468 | return -1; 469 | } 470 | } 471 | 472 | void sv_catkanji(SV *sv, U32 tch) 473 | { 474 | TCHAR ch[2]; 475 | int len = set_codepoint(tch, (TBYTE*)ch); 476 | sv_catpvn(sv,ch,len); 477 | return; 478 | } 479 | 480 | } // namespace 481 | -------------------------------------------------------------------------------- /src/dbgtrace.h: -------------------------------------------------------------------------------- 1 | /* 2 | * dbgtrace.h 3 | */ 4 | /* 5 | * Copyright (C) 2006-2011 K.Takata 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * 1. Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * 2. Redistributions in binary form must reproduce the above copyright 15 | * notice, this list of conditions and the following disclaimer in the 16 | * documentation and/or other materials provided with the distribution. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 22 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | 32 | #ifndef DBGTRACE_H_ 33 | #define DBGTRACE_H_ 34 | 35 | 36 | /*** Debugging Routines ***/ 37 | 38 | #if defined(__cplusplus) || defined(_MSC_VER) 39 | #ifdef __cplusplus 40 | inline 41 | #else /* __cplusplus */ 42 | __inline 43 | #endif /* __cplusplus */ 44 | void TRACEx_(LPCTSTR msg, ...) 45 | { 46 | TCHAR buf_[1024]; 47 | va_list ap; 48 | va_start(ap, msg); 49 | wvsprintf(buf_, msg, ap); 50 | va_end(ap); 51 | OutputDebugString(buf_); 52 | } 53 | #endif /* __cplusplus || _MSC_VER */ 54 | 55 | #pragma comment(lib, "user32.lib") 56 | 57 | #if defined(_DEBUG) || defined(DEBUG) 58 | #include 59 | 60 | #define TRACE0(msg) OutputDebugString(msg) 61 | #define TRACE1(msg, p1) \ 62 | do { \ 63 | TCHAR buf_[1024]; wsprintf(buf_, msg, p1); \ 64 | OutputDebugString(buf_); \ 65 | } while(0) 66 | #define TRACE2(msg, p1, p2) \ 67 | do { \ 68 | TCHAR buf_[1024]; wsprintf(buf_, msg, p1, p2); \ 69 | OutputDebugString(buf_); \ 70 | } while(0) 71 | #define TRACE3(msg, p1, p2, p3) \ 72 | do { \ 73 | TCHAR buf_[1024]; wsprintf(buf_, msg, p1, p2, p3); \ 74 | OutputDebugString(buf_); \ 75 | } while(0) 76 | #define TRACE4(msg, p1, p2, p3, p4) \ 77 | do { \ 78 | TCHAR buf_[1024]; wsprintf(buf_, msg, p1, p2, p3, p4); \ 79 | OutputDebugString(buf_); \ 80 | } while(0) 81 | #if defined(__cplusplus) || defined(_MSC_VER) 82 | #define TRACE TRACEx_ 83 | #endif /* __cplusplus || _MSC_VER */ 84 | 85 | /* 86 | #define ASSERT(x) \ 87 | do { \ 88 | if (!(x)) { \ 89 | TRACE2(TEXT("Assertion failed! in %s (%d)\n"), __FILE__, __LINE__); \ 90 | DebugBreak(); \ 91 | } \ 92 | } while(0) 93 | #define VERIFY(x) ASSERT(x) 94 | */ 95 | 96 | #else /*_DEBUG */ 97 | 98 | #define TRACE0(msg) 99 | #define TRACE1(msg, p1) 100 | #define TRACE2(msg, p1, p2) 101 | #define TRACE3(msg, p1, p2, p3) 102 | #define TRACE4(msg, p1, p2, p3, p4) 103 | #if defined(__cplusplus) || defined(_MSC_VER) 104 | #define TRACE 1 ? (void) 0 : TRACEx_ 105 | #endif /* __cplusplus || _MSC_VER */ 106 | 107 | /* 108 | #define ASSERT(x) ((void) 0) 109 | #define VERIFY(x) ((void) x) 110 | */ 111 | 112 | #endif /* _DEBUG */ 113 | 114 | 115 | /* 116 | #ifdef _DEBUG 117 | #define DEBUG_CLIENTBLOCK new(_CLIENT_BLOCK, __FILE__, __LINE__) 118 | #else 119 | #define DEBUG_CLIENTBLOCK 120 | #endif // _DEBUG 121 | */ 122 | #ifdef _DEBUG 123 | #define DEBUG_NEW new(_NORMAL_BLOCK, __FILE__, __LINE__) 124 | #else 125 | #define DEBUG_NEW new 126 | #endif 127 | 128 | 129 | #endif /* DBGTRACE_H_ */ 130 | -------------------------------------------------------------------------------- /src/mem_vc6.h: -------------------------------------------------------------------------------- 1 | /* 2 | * mem_vc6.h 3 | */ 4 | /* 5 | * Copyright (C) 2006 K.Takata 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * 1. Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * 2. Redistributions in binary form must reproduce the above copyright 15 | * notice, this list of conditions and the following disclaimer in the 16 | * documentation and/or other materials provided with the distribution. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 22 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | 32 | #ifndef MEM_VC6_H_ 33 | #define MEM_VC6_H_ 34 | 35 | #include 36 | 37 | #if _MSC_VER < 1300 38 | 39 | #include 40 | 41 | 42 | inline int throw_bad_alloc(size_t) 43 | { 44 | throw std::bad_alloc(); 45 | return 0; 46 | } 47 | 48 | inline void set_new_throw_bad_alloc() 49 | { 50 | _set_new_handler(throw_bad_alloc); 51 | } 52 | 53 | inline void *operator new(size_t cb, const std::nothrow_t&) throw() 54 | { 55 | void *p; 56 | try { 57 | p = ::operator new (cb); 58 | } catch (const std::bad_alloc&) { 59 | p = 0; 60 | } 61 | return p; 62 | } 63 | 64 | inline void operator delete(void *p, const std::nothrow_t&) throw() 65 | { 66 | ::operator delete (p); 67 | } 68 | 69 | 70 | #else /* _MSC_VER */ 71 | 72 | 73 | #define set_new_throw_bad_alloc() /**/ 74 | 75 | 76 | #endif /* _MSC_VER */ 77 | 78 | #endif /* MEM_VC6_H_ */ 79 | -------------------------------------------------------------------------------- /src/python/BregPool.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Pooling BREGEXP objects. 4 | 5 | This is a sample of pooling BREGEXP objects. 6 | """ 7 | 8 | from ctypes import * 9 | from bregonig import * 10 | 11 | __all__ = ["BregPool"] 12 | 13 | class BregPool: 14 | def __init__(self, nmax): 15 | self._nmax = nmax 16 | self._rxpool = (POINTER(BREGEXP) * nmax)() 17 | 18 | def __del__(self): 19 | self.Free() 20 | 21 | def Free(self): 22 | if self._nmax == 0: 23 | return 24 | for r in self._rxpool: 25 | if (r): 26 | BRegfree(r) 27 | del self._rxpool 28 | self._nmax = 0 29 | 30 | def Get(self, regstr): 31 | for i in range(self._nmax): 32 | r = self._rxpool[i] 33 | if (not r): 34 | break 35 | if (not r.contents.parap): 36 | break 37 | if (regstr == string_at(r.contents.parap, 38 | r.contents.paraendp - r.contents.parap)): 39 | return r 40 | 41 | if (self._rxpool[i]): 42 | return self._rxpool[i]; 43 | 44 | msg = create_string_buffer(BREGEXP_MAX_ERROR_MESSAGE_LEN) 45 | 46 | dummystr = " " 47 | p = cast(dummystr, c_void_p) 48 | BMatch(regstr, p, p.value + 1, byref(self._rxpool[i]), msg) 49 | 50 | return self._rxpool[i] 51 | 52 | -------------------------------------------------------------------------------- /src/python/bregonig.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Using bregonig.dll/BREGEXP.DLL regular expression DLL. 4 | 5 | This is a wrapper for bregonig.dll/BREGEXP.DLL regular expression DLL. 6 | """ 7 | 8 | from ctypes import * 9 | 10 | __all__ = ["BREGEXP", "BRegexpVersion", 11 | "BMatch", "BSubst", 12 | "BMatchEx", "BSubstEx", 13 | "BTrans", "BSplit", "BRegfree", 14 | "BoMatch", "BoSubst", 15 | "LoadDLL", "LoadBregonig", "LoadBregexp", 16 | "BCallBack", 17 | "create_tchar_buffer", "tstring_at", 18 | "BREGEXP_MAX_ERROR_MESSAGE_LEN"] 19 | 20 | 21 | if sizeof(c_long) == sizeof(c_void_p): 22 | INT_PTR = c_long 23 | elif sizeof(c_longlong) == sizeof(c_void_p): 24 | INT_PTR = c_longlong 25 | 26 | 27 | class BREGEXP(Structure): 28 | """BREGEXP structure""" 29 | _fields_ = [ 30 | ("outp", c_void_p), 31 | ("outendp", c_void_p), 32 | ("splitctr", c_int), 33 | ("splitp", POINTER(c_void_p)), 34 | ("rsv1", INT_PTR), 35 | ("parap", c_void_p), 36 | ("paraendp", c_void_p), 37 | ("transtblp", c_void_p), 38 | ("startp", POINTER(c_void_p)), 39 | ("endp", POINTER(c_void_p)), 40 | ("nparens", c_int) 41 | ] 42 | 43 | # function pointers 44 | _BRegexpVersion = None 45 | _BMatch = None 46 | _BSubst = None 47 | _BMatchEx = None 48 | _BSubstEx = None 49 | _BTrans = None 50 | _BSplit = None 51 | _BRegfree = None 52 | _BoMatch = None 53 | _BoSubst = None 54 | 55 | _create_tchar_buffer = None 56 | _tstring_at = None 57 | 58 | 59 | # constant 60 | BREGEXP_MAX_ERROR_MESSAGE_LEN = 80 61 | 62 | # callback type 63 | BCallBack = WINFUNCTYPE(c_bool, c_int, c_int, c_ssize_t) 64 | 65 | 66 | def BRegexpVersion(): 67 | """Return version string of the regular expression DLL.""" 68 | s = _BRegexpVersion() 69 | if isinstance(s, bytes): 70 | s = s.decode() 71 | return s 72 | 73 | def BMatch(str, target, targetendp, rxp, msg): 74 | return _BMatch(str, target, targetendp, rxp, msg) 75 | 76 | def BMatchEx(str, targetbegp, target, targetendp, rxp, msg): 77 | if _BMatchEx is None: 78 | # fall back 79 | return _BMatch(str, target, targetendp, rxp, msg) 80 | else: 81 | return _BMatchEx(str, targetbegp, target, targetendp, rxp, msg) 82 | 83 | def BSubst(str, target, targetendp, rxp, msg): 84 | return _BSubst(str, target, targetendp, rxp, msg) 85 | 86 | def BSubstEx(str, targetbegp, target, targetendp, rxp, msg): 87 | if _BSubstEx is None: 88 | # fall back 89 | return _BSubst(str, target, targetendp, rxp, msg) 90 | else: 91 | return _BSubstEx(str, targetbegp, target, targetendp, rxp, msg) 92 | 93 | def BTrans(str, target, targetendp, rxp, msg): 94 | return _BTrans(str, target, targetendp, rxp, msg) 95 | 96 | def BSplit(str, target, targetendp, limit, rxp, msg): 97 | return _BSplit(str, target, targetendp, limit, rxp, msg) 98 | 99 | def BRegfree(rxp): 100 | return _BRegfree(rxp) 101 | 102 | def BoMatch(patternp, optionp, strstartp, targetstartp, targetendp, 103 | one_shot, rxp, msg): 104 | if _BoMatch is None: 105 | raise RuntimeError("Ver.2.50+ is needed") 106 | return _BoMatch(patternp, optionp, strstartp, targetstartp, targetendp, 107 | one_shot, rxp, msg) 108 | 109 | def BoSubst(patternp, substp, optionp, strstartp, targetstartp, targetendp, 110 | callback, rxp, msg): 111 | if _BoSubst is None: 112 | raise RuntimeError("Ver.2.50+ is needed") 113 | return _BoSubst(patternp, substp, optionp, strstartp, targetstartp, targetendp, 114 | callback, rxp, msg) 115 | 116 | def create_tchar_buffer(size=None): 117 | return _create_tchar_buffer(size) 118 | 119 | def tstring_at(address, size=-1): 120 | return _tstring_at(address, size) 121 | 122 | 123 | # bregonig.dll 124 | def LoadBregonig(unicode_func = False): 125 | """Load bregonig.dll. 126 | 127 | argument: 128 | unicode_func -- True: Use Unicode functions. 129 | False: Use ANSI functions. 130 | """ 131 | LoadDLL(cdll.bregonig, unicode_func) 132 | 133 | # BREGEXP.DLL 134 | def LoadBregexp(): 135 | """Load BREGEXP.DLL.""" 136 | LoadDLL(cdll.bregexp) 137 | 138 | 139 | def LoadDLL(regexpdll, unicode_func = False): 140 | """Load specified regular expression DLL. 141 | 142 | arguments: 143 | regexpdll -- Instance of ctypes.CDLL to load. 144 | unicode_func -- True: Use Unicode functions. 145 | False: Use ANSI functions. 146 | """ 147 | 148 | global _create_tchar_buffer 149 | global _tstring_at 150 | if unicode_func: 151 | c_tchar_p = c_wchar_p 152 | _create_tchar_buffer = create_unicode_buffer 153 | _tstring_at = wstring_at 154 | else: 155 | c_tchar_p = c_char_p 156 | _create_tchar_buffer = create_string_buffer 157 | _tstring_at = string_at 158 | 159 | global _BRegexpVersion 160 | if unicode_func: 161 | _BRegexpVersion = regexpdll.BRegexpVersionW 162 | else: 163 | _BRegexpVersion = regexpdll.BRegexpVersion 164 | _BRegexpVersion.restype = c_tchar_p 165 | 166 | global _BMatch 167 | if unicode_func: 168 | _BMatch = regexpdll.BMatchW 169 | else: 170 | _BMatch = regexpdll.BMatch 171 | _BMatch.argtypes = [c_tchar_p, c_void_p, c_void_p, 172 | POINTER(POINTER(BREGEXP)), c_tchar_p] 173 | 174 | global _BSubst 175 | if unicode_func: 176 | _BSubst = regexpdll.BSubstW 177 | else: 178 | _BSubst = regexpdll.BSubst 179 | _BSubst.argtypes = [c_tchar_p, c_void_p, c_void_p, 180 | POINTER(POINTER(BREGEXP)), c_tchar_p] 181 | 182 | global _BMatchEx 183 | try: 184 | if unicode_func: 185 | _BMatchEx = regexpdll.BMatchExW 186 | else: 187 | _BMatchEx = regexpdll.BMatchEx 188 | _BMatchEx.argtypes = [c_tchar_p, c_void_p, c_void_p, c_void_p, 189 | POINTER(POINTER(BREGEXP)), c_tchar_p] 190 | except AttributeError: 191 | pass 192 | 193 | global _BSubstEx 194 | try: 195 | if unicode_func: 196 | _BSubstEx = regexpdll.BSubstExW 197 | else: 198 | _BSubstEx = regexpdll.BSubstEx 199 | _BSubstEx.argtypes = [c_tchar_p, c_void_p, c_void_p, c_void_p, 200 | POINTER(POINTER(BREGEXP)), c_tchar_p] 201 | except AttributeError: 202 | pass 203 | 204 | global _BTrans 205 | if unicode_func: 206 | _BTrans = regexpdll.BTransW 207 | else: 208 | _BTrans = regexpdll.BTrans 209 | _BTrans.argtypes = [c_tchar_p, c_void_p, c_void_p, 210 | POINTER(POINTER(BREGEXP)), c_tchar_p] 211 | 212 | global _BSplit 213 | if unicode_func: 214 | _BSplit = regexpdll.BSplitW 215 | else: 216 | _BSplit = regexpdll.BSplit 217 | _BSplit.argtypes = [c_tchar_p, c_void_p, c_void_p, c_int, 218 | POINTER(POINTER(BREGEXP)), c_tchar_p] 219 | 220 | global _BRegfree 221 | if unicode_func: 222 | _BRegfree = regexpdll.BRegfreeW 223 | else: 224 | _BRegfree = regexpdll.BRegfree 225 | _BRegfree.argtypes = [POINTER(BREGEXP)] 226 | 227 | global _BoMatch 228 | try: 229 | if unicode_func: 230 | _BoMatch = regexpdll.BoMatchW 231 | else: 232 | _BoMatch = regexpdll.BoMatch 233 | _BoMatch.argtypes = [c_tchar_p, c_tchar_p, 234 | c_void_p, c_void_p, c_void_p, 235 | c_int, 236 | POINTER(POINTER(BREGEXP)), c_tchar_p] 237 | except AttributeError: 238 | pass 239 | 240 | global _BoSubst 241 | try: 242 | if unicode_func: 243 | _BoSubst = regexpdll.BoSubstW 244 | else: 245 | _BoSubst = regexpdll.BoSubst 246 | _BoSubst.argtypes = [c_tchar_p, c_tchar_p, c_tchar_p, 247 | c_void_p, c_void_p, c_void_p, 248 | c_void_p, 249 | POINTER(POINTER(BREGEXP)), c_tchar_p] 250 | except AttributeError: 251 | pass 252 | -------------------------------------------------------------------------------- /src/python/sample_bomatch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, unicode_literals 5 | from ctypes import * 6 | from bregonig import * 7 | 8 | LoadBregonig() 9 | 10 | print(BRegexpVersion()) 11 | print() 12 | 13 | rxp = POINTER(BREGEXP)() 14 | msg = create_string_buffer(BREGEXP_MAX_ERROR_MESSAGE_LEN) 15 | 16 | t1 = " Yokohama 045-222-1111 Osaka 06-5555-6666 Tokyo 03-1111-9999 ".encode('ASCII') 17 | t1p = cast(t1, c_void_p) 18 | pattern1 = r"(03|045)-(\d{3,4})-(\d{4})".encode('ASCII') 19 | pos = 0 20 | while BoMatch(pattern1, None, 21 | t1p.value, t1p.value + pos, t1p.value + len(t1), 22 | False, byref(rxp), msg) > 0: 23 | print("pos: %d, '%s'" % (pos, string_at(t1p.value + pos).decode('ASCII'))) 24 | print("nparens: %d" % rxp.contents.nparens) 25 | for i in range(rxp.contents.nparens + 1): 26 | print("%d = %s" % (i, string_at(rxp.contents.startp[i], 27 | rxp.contents.endp[i] - rxp.contents.startp[i]).decode('ASCII'))) 28 | pos = rxp.contents.endp[0] - t1p.value 29 | 30 | if (rxp): 31 | BRegfree(rxp) 32 | 33 | -------------------------------------------------------------------------------- /src/python/sample_bosubst.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, unicode_literals 5 | from ctypes import * 6 | from bregonig import * 7 | 8 | 9 | def subst_callback(type, value, index): 10 | print("callback: %d, %d, %d" % (type, value, index)) 11 | return True 12 | 13 | 14 | 15 | LoadBregonig() 16 | 17 | print(BRegexpVersion()) 18 | print() 19 | 20 | rxp = POINTER(BREGEXP)() 21 | msg = create_string_buffer(BREGEXP_MAX_ERROR_MESSAGE_LEN) 22 | 23 | t1 = " Yokohama 045-222-1111 Osaka 06-5555-6666 Tokyo 03-1111-9999 ".encode('ASCII') 24 | t1p = cast(t1, c_void_p) 25 | ctr = BoSubst(r"(\d\d)-\d{4}-\d{4}".encode('ASCII'), r"$1-xxxx-xxxx".encode('ASCII'), r"g".encode('ASCII'), 26 | t1p, t1p, t1p.value + len(t1), 27 | None, byref(rxp), msg) 28 | if ctr > 0: 29 | if rxp.contents.outp: 30 | print("after(%d)=%s" % (ctr, string_at(rxp.contents.outp).decode('ASCII'))) 31 | print("length=%d" % (rxp.contents.outendp - rxp.contents.outp)) 32 | else: 33 | # Result is an empty string. 34 | print("after(%d)" % ctr) 35 | print("length=0") 36 | 37 | 38 | # use same patternp and same substp -> reused 39 | ctr = BoSubst(r"(\d\d)-\d{4}-\d{4}".encode('ASCII'), r"$1-xxxx-xxxx".encode('ASCII'), r"g".encode('ASCII'), 40 | t1p, t1p, t1p.value + len(t1), 41 | None, byref(rxp), msg) 42 | if ctr > 0: 43 | if rxp.contents.outp: 44 | print("after(%d)=%s" % (ctr, string_at(rxp.contents.outp).decode('ASCII'))) 45 | print("length=%d" % (rxp.contents.outendp - rxp.contents.outp)) 46 | else: 47 | # Result is an empty string. 48 | print("after(%d)" % ctr) 49 | print("length=0") 50 | 51 | 52 | # reuse patternp, use new substp 53 | # use callback 54 | ctr = BoSubst(None, r"$1-yyyy-zzzz".encode('ASCII'), None, 55 | t1p, t1p, t1p.value + len(t1), 56 | BCallBack(subst_callback), byref(rxp), msg) 57 | if ctr > 0: 58 | if rxp.contents.outp: 59 | print("after(%d)=%s" % (ctr, string_at(rxp.contents.outp).decode('ASCII'))) 60 | print("length=%d" % (rxp.contents.outendp - rxp.contents.outp)) 61 | else: 62 | # Result is an empty string. 63 | print("after(%d)" % ctr) 64 | print("length=0") 65 | 66 | 67 | # reuse patternp and substp 68 | # use callback 69 | ctr = BoSubst(None, None, None, 70 | t1p, t1p, t1p.value + len(t1), 71 | BCallBack(subst_callback), byref(rxp), msg) 72 | if ctr > 0: 73 | if rxp.contents.outp: 74 | print("after(%d)=%s" % (ctr, string_at(rxp.contents.outp).decode('ASCII'))) 75 | print("length=%d" % (rxp.contents.outendp - rxp.contents.outp)) 76 | else: 77 | # Result is an empty string. 78 | print("after(%d)" % ctr) 79 | print("length=0") 80 | 81 | 82 | # use new patternp and same substp -> not reused 83 | # use callback 84 | ctr = BoSubst(r"(\d{3})-\d{3}-\d{4}".encode('ASCII'), r"$1-yyyy-zzzz".encode('ASCII'), r"g".encode('ASCII'), 85 | t1p, t1p, t1p.value + len(t1), 86 | BCallBack(subst_callback), byref(rxp), msg) 87 | if ctr > 0: 88 | if rxp.contents.outp: 89 | print("after(%d)=%s" % (ctr, string_at(rxp.contents.outp).decode('ASCII'))) 90 | print("length=%d" % (rxp.contents.outendp - rxp.contents.outp)) 91 | else: 92 | # Result is an empty string. 93 | print("after(%d)" % ctr) 94 | print("length=0") 95 | 96 | if (rxp): 97 | BRegfree(rxp) 98 | 99 | -------------------------------------------------------------------------------- /src/python/sample_bosubst_utf8.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, unicode_literals 5 | from ctypes import * 6 | from bregonig import * 7 | 8 | 9 | def subst_callback(type, value, index): 10 | print("callback: %d, %d, %d" % (type, value, index)) 11 | return True 12 | 13 | 14 | 15 | LoadBregonig() 16 | 17 | print(BRegexpVersion()) 18 | print() 19 | 20 | rxp = POINTER(BREGEXP)() 21 | msg = create_string_buffer(BREGEXP_MAX_ERROR_MESSAGE_LEN) 22 | 23 | t1 = " 横浜 045-222-1111 大阪 06-5555-6666 東京 03-1111-9999 ".encode('UTF-8') 24 | t1p = cast(t1, c_void_p) 25 | ctr = BoSubst(r"(\d\d)-\d{4}-\d{4}".encode('UTF-8'), r"$1-xxxx-xxxx".encode('UTF-8'), r"g8".encode('UTF-8'), 26 | t1p, t1p, t1p.value + len(t1), 27 | None, byref(rxp), msg) 28 | if ctr > 0: 29 | print("after(%d)=%s" % (ctr, string_at(rxp.contents.outp).decode('UTF-8'))) 30 | print("length=%d" % (rxp.contents.outendp - rxp.contents.outp)) 31 | 32 | 33 | 34 | ctr = BoSubst(None, r"$1-xxxx-\x{FF59}\x{FF59}\x{FF59}\x{FF59}".encode('UTF-8'), None, 35 | t1p, t1p, t1p.value + len(t1), 36 | BCallBack(subst_callback), byref(rxp), msg) 37 | if ctr > 0: 38 | print("after(%d)=%s" % (ctr, string_at(rxp.contents.outp).decode('UTF-8'))) 39 | print("length=%d" % (rxp.contents.outendp - rxp.contents.outp)) 40 | 41 | 42 | if (rxp): 43 | BRegfree(rxp) 44 | 45 | -------------------------------------------------------------------------------- /src/python/sample_match.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, unicode_literals 5 | from ctypes import * 6 | from bregonig import * 7 | 8 | LoadBregonig() 9 | 10 | print(BRegexpVersion()) 11 | print() 12 | 13 | rxp = POINTER(BREGEXP)() 14 | msg = create_string_buffer(BREGEXP_MAX_ERROR_MESSAGE_LEN) 15 | 16 | t1 = " Yokohama 045-222-1111 Osaka 06-5555-6666 Tokyo 03-1111-9999 ".encode('ASCII') 17 | t1p = cast(t1, c_void_p) 18 | pattern1 = r"/(03|045)-(\d{3,4})-(\d{4})/".encode('ASCII') 19 | pos = 0 20 | while BMatch(pattern1, t1p.value + pos, t1p.value + len(t1), byref(rxp), msg) > 0: 21 | print("pos: %d, '%s'" % (pos, string_at(t1p.value + pos).decode('ASCII'))) 22 | print("nparens: %d" % rxp.contents.nparens) 23 | for i in range(rxp.contents.nparens + 1): 24 | print("%d = %s" % (i, string_at(rxp.contents.startp[i], 25 | rxp.contents.endp[i] - rxp.contents.startp[i]).decode('ASCII'))) 26 | pos = rxp.contents.endp[0] - t1p.value 27 | 28 | if (rxp): 29 | BRegfree(rxp) 30 | 31 | -------------------------------------------------------------------------------- /src/python/sample_match_utf16.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, unicode_literals 5 | from ctypes import * 6 | from bregonig import * 7 | 8 | # Unicode 9 | LoadBregonig(True) 10 | 11 | print(BRegexpVersion()) 12 | print() 13 | 14 | rxp = POINTER(BREGEXP)() 15 | msg = create_unicode_buffer(BREGEXP_MAX_ERROR_MESSAGE_LEN) 16 | 17 | t1 = " Yokohama 045-222-1111 Osaka 06-5555-6666 Tokyo 03-1111-9999 " 18 | t1w = c_wchar_p(t1) 19 | t1p = cast(t1w, c_void_p) 20 | pattern1 = c_wchar_p(r"/(03|045)-(\d{3,4})-(\d{4})/") 21 | pos = 0 22 | while BMatch(pattern1, t1p.value + pos, t1p.value + len(t1) * sizeof(c_wchar), byref(rxp), msg) > 0: 23 | print("pos: %d, '%s'" % (pos, wstring_at(t1p.value + pos))) 24 | print("nparens: %d" % rxp.contents.nparens) 25 | for i in range(rxp.contents.nparens + 1): 26 | print("%d = %s" % (i, wstring_at(rxp.contents.startp[i], 27 | (rxp.contents.endp[i] - rxp.contents.startp[i])//sizeof(c_wchar)))) 28 | pos = rxp.contents.endp[0] - t1p.value 29 | 30 | if (rxp): 31 | BRegfree(rxp) 32 | 33 | -------------------------------------------------------------------------------- /src/python/sample_split.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, unicode_literals 5 | from ctypes import * 6 | from bregonig import * 7 | from BregPool import * 8 | 9 | LoadBregonig() 10 | 11 | print(BRegexpVersion()) 12 | print() 13 | 14 | bpool = BregPool(8) 15 | msg = create_string_buffer(BREGEXP_MAX_ERROR_MESSAGE_LEN) 16 | 17 | t1 = " Yokohama 045-222-1111 Osaka 06-5555-6666 Tokyo 03-1111-9999 ".encode('ASCII') 18 | t1p = cast(t1, c_void_p) 19 | pattern1 = r"/ *\d{2,3}-\d{3,4}-\d{4} */".encode('ASCII') 20 | rxp = bpool.Get(pattern1) 21 | splitcnt = BSplit(pattern1, t1p, t1p.value + len(t1), 0, byref(rxp), msg) 22 | if splitcnt > 0: 23 | i = 0 24 | for j in range(splitcnt): 25 | length = rxp.contents.splitp[i+1] - rxp.contents.splitp[i] 26 | print("len=%d [%d]=%s" % (length, j, string_at(rxp.contents.splitp[i], length).decode('ASCII'))) 27 | i += 2 28 | 29 | -------------------------------------------------------------------------------- /src/python/sample_subst.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, unicode_literals 5 | from ctypes import * 6 | from bregonig import * 7 | 8 | LoadBregonig() 9 | 10 | print(BRegexpVersion()) 11 | print() 12 | 13 | rxp = POINTER(BREGEXP)() 14 | msg = create_string_buffer(BREGEXP_MAX_ERROR_MESSAGE_LEN) 15 | 16 | t1 = " Yokohama 045-222-1111 Osaka 06-5555-6666 Tokyo 03-1111-9999 ".encode('ASCII') 17 | t1p = cast(t1, c_void_p) 18 | pattern1 = r"s/(\d\d)-\d{4}-\d{4}/$1-xxxx-xxxx/g".encode('ASCII') 19 | ctr = BSubst(pattern1, t1p, t1p.value + len(t1), byref(rxp), msg) 20 | if ctr > 0: 21 | if rxp.contents.outp: 22 | print("after(%d)=%s" % (ctr, string_at(rxp.contents.outp).decode('ASCII'))) 23 | print("length=%d" % (rxp.contents.outendp - rxp.contents.outp)) 24 | else: 25 | # Result is an empty string. 26 | print("after(%d)" % ctr) 27 | print("length=0") 28 | 29 | if (rxp): 30 | BRegfree(rxp) 31 | 32 | -------------------------------------------------------------------------------- /src/python/sample_trans.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, unicode_literals 5 | from ctypes import * 6 | from bregonig import * 7 | 8 | LoadBregonig() 9 | 10 | print(BRegexpVersion()) 11 | print() 12 | 13 | rxp = POINTER(BREGEXP)() 14 | msg = create_string_buffer(BREGEXP_MAX_ERROR_MESSAGE_LEN) 15 | 16 | t1 = " Yokohama 045-222-1111 Osaka 06-5555-6666 Tokyo 03-1111-9999 ".encode('ASCII') 17 | t1p = cast(t1, c_void_p) 18 | pattern1 = r"tr/A-Z0-9/a-zx/g".encode('ASCII') 19 | ctr = BTrans(pattern1, t1p, t1p.value + len(t1), byref(rxp), msg) 20 | if ctr > 0: 21 | print("after(%d)=%s" % (ctr, string_at(rxp.contents.outp).decode('ASCII'))) 22 | print("length=%d" % (rxp.contents.outendp - rxp.contents.outp)) 23 | 24 | if (rxp): 25 | BRegfree(rxp) 26 | 27 | -------------------------------------------------------------------------------- /src/python/test_common.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import print_function, unicode_literals 4 | import ctypes 5 | from bregonig import * 6 | import sys 7 | import io 8 | import locale 9 | 10 | __all__ = ["get_nerror", "get_nsucc", "get_nfail", 11 | "inc_nerror", "inc_nsucc", "inc_nfail", 12 | "strptr", "cc_to_cb", "print_result", 13 | "xx", "x2", "x3", "n", 14 | "is_unicode_encoding", "is_wide_encoding", 15 | "set_encoding", "get_encoding", "set_output_encoding", "init"]; 16 | 17 | nerror = 0 18 | nsucc = 0 19 | nfail = 0 20 | 21 | # default encoding 22 | encoding = "CP932" 23 | 24 | 25 | def get_nerror(): 26 | return nerror 27 | def get_nsucc(): 28 | return nsucc 29 | def get_nfail(): 30 | return nfail 31 | 32 | def inc_nerror(): 33 | global nerror 34 | nerror += 1 35 | def inc_nsucc(): 36 | global nsucc 37 | nsucc += 1 38 | def inc_nfail(): 39 | global nfail 40 | nfail += 1 41 | 42 | 43 | class strptr: 44 | """a helper class to get a pointer to a string""" 45 | def __init__(self, s): 46 | if not isinstance(s, bytes): 47 | raise TypeError 48 | self._str = s 49 | try: 50 | # CPython 2.x/3.x 51 | self._ptr = ctypes.cast(self._str, ctypes.c_void_p) 52 | except TypeError: 53 | # PyPy 1.x 54 | self._ptr = ctypes.c_void_p(self._str) 55 | 56 | def getptr(self, offset=0): 57 | if offset == -1: # -1 means the end of the string 58 | offset = len(self._str) 59 | elif offset > len(self._str): 60 | raise IndexError 61 | return self._ptr.value + offset 62 | 63 | def cc_to_cb(s, enc, cc): 64 | """convert char count to byte count 65 | 66 | arguments: 67 | s -- unicode string 68 | enc -- encoding name 69 | cc -- char count 70 | """ 71 | if cc == -1: 72 | return -1 73 | s = s.encode('UTF-32LE') 74 | clen = cc * 4 75 | if clen > len(s): 76 | raise IndexError 77 | return len(s[:clen].decode('UTF-32LE').encode(enc)) 78 | 79 | def print_result(result, pattern, file=None): 80 | if not file: 81 | file = sys.stdout 82 | print(result + ": ", end='', file=file) 83 | try: 84 | print(pattern, file=file) 85 | except UnicodeEncodeError as e: 86 | print('(' + str(e) + ')') 87 | 88 | def decode_errmsg(msg): 89 | if isinstance(msg.value, bytes): 90 | return msg.value.decode(encoding, 'replace') 91 | else: 92 | return msg.value 93 | 94 | 95 | def xx(pattern, target, s_from, s_to, mem, not_match, opt="", err=False, 96 | start_offset=0): 97 | global nerror 98 | global nsucc 99 | global nfail 100 | 101 | rxp = ctypes.POINTER(BREGEXP)() 102 | msg = create_tchar_buffer(BREGEXP_MAX_ERROR_MESSAGE_LEN) 103 | 104 | pattern2 = pattern 105 | if not isinstance(pattern, bytes): 106 | pattern2 = pattern.encode(encoding) 107 | pattern3 = "/".encode(encoding) + pattern2 + ("/k" + opt).encode(encoding) 108 | 109 | target2 = target 110 | if not isinstance(target, bytes): 111 | s_from = cc_to_cb(target, encoding, s_from) 112 | s_to = cc_to_cb(target, encoding, s_to) 113 | start_offset = cc_to_cb(target, encoding, start_offset) 114 | target2 = target.encode(encoding) 115 | tp = strptr(target2) 116 | 117 | # cut very long outputs (used for showing message) 118 | pattern = pattern2.decode(encoding, 'replace') 119 | target = target2.decode(encoding, 'replace') 120 | limit = 100 121 | if len(pattern) > limit: 122 | pattern = pattern[:limit] + "..." 123 | if len(target) > limit: 124 | target = target[:limit] + "..." 125 | 126 | if encoding == "UTF-8": 127 | option = "8" 128 | else: 129 | option = "k" 130 | option = (option + opt).encode(encoding) 131 | 132 | if encoding == "UTF-16LE": 133 | pattern2 = ctypes.c_wchar_p(pattern2.decode(encoding)) 134 | pattern3 = ctypes.c_wchar_p(pattern3.decode(encoding)) 135 | option = ctypes.c_wchar_p(option.decode(encoding)) 136 | 137 | try: 138 | r = BoMatch(pattern2, option, tp.getptr(), tp.getptr(start_offset), tp.getptr(-1), 139 | False, ctypes.byref(rxp), msg) 140 | except RuntimeError: 141 | r = BMatch(pattern3, tp.getptr(), tp.getptr(-1), ctypes.byref(rxp), msg) 142 | 143 | if r < 0: 144 | # Error 145 | if err: 146 | nsucc += 1 147 | print_result("OK(E)", "%s (/%s/ '%s')" % \ 148 | (decode_errmsg(msg), pattern, target)) 149 | else: 150 | nerror += 1 151 | print_result("ERROR", "%s (/%s/ '%s')" % \ 152 | (decode_errmsg(msg), pattern, target), file=sys.stderr) 153 | return 154 | 155 | if err: 156 | nfail += 1 157 | print_result("FAIL(E)", "/%s/ '%s'" % (pattern, target)) 158 | 159 | elif r == 0: 160 | # Not matched 161 | if not_match: 162 | nsucc += 1 163 | print_result("OK(N)", "/%s/ '%s'" % (pattern, target)) 164 | else: 165 | nfail += 1 166 | print_result("FAIL", "/%s/ '%s'" % (pattern, target)) 167 | else: 168 | # Matched 169 | if not_match: 170 | nfail += 1 171 | print_result("FAIL(N)", "/%s/ '%s'" % (pattern, target)) 172 | else: 173 | start = rxp.contents.startp[mem] - tp.getptr() 174 | end = rxp.contents.endp[mem] - tp.getptr() 175 | if (start == s_from) and (end == s_to): 176 | nsucc += 1 177 | print_result("OK", "/%s/ '%s'" % (pattern, target)) 178 | else: 179 | nfail += 1 180 | print_result("FAIL", "/%s/ '%s' %d-%d : %d-%d" % (pattern, target, 181 | s_from, s_to, start, end)) 182 | 183 | if (rxp): 184 | BRegfree(rxp) 185 | 186 | def x2(pattern, target, s_from, s_to, **kwargs): 187 | xx(pattern, target, s_from, s_to, 0, False, **kwargs) 188 | 189 | def x3(pattern, target, s_from, s_to, mem, **kwargs): 190 | xx(pattern, target, s_from, s_to, mem, False, **kwargs) 191 | 192 | def n(pattern, target, **kwargs): 193 | xx(pattern, target, 0, 0, 0, True, **kwargs) 194 | 195 | 196 | def is_unicode_encoding(enc): 197 | return enc in ("UTF-16LE", "UTF-8") 198 | 199 | def is_wide_encoding(enc): 200 | encs = {"CP932": False, 201 | "SJIS": False, 202 | "UTF-8": False, 203 | "UTF-16LE": True} 204 | return encs[enc] 205 | 206 | 207 | def set_encoding(enc): 208 | """Set the encoding used for testing. 209 | 210 | arguments: 211 | enc -- encoding name 212 | """ 213 | global encoding 214 | 215 | if enc == None: 216 | return False 217 | encoding = enc 218 | 219 | return is_wide_encoding(enc) 220 | 221 | 222 | def get_encoding(): 223 | return encoding 224 | 225 | 226 | def set_output_encoding(enc=None): 227 | """Set the encoding used for showing the results. 228 | 229 | arguments: 230 | enc -- Encoding name. 231 | If omitted, locale.getpreferredencoding() is used. 232 | """ 233 | if enc is None: 234 | enc = locale.getpreferredencoding() 235 | 236 | def get_text_writer(fo, **kwargs): 237 | kw = dict(kwargs) 238 | kw.setdefault('errors', 'backslashreplace') # use \uXXXX style 239 | kw.setdefault('closefd', False) 240 | 241 | if sys.version_info[0] < 3: 242 | # Work around for Python 2.x 243 | # New line conversion isn't needed here. Done in somewhere else. 244 | writer = io.open(fo.fileno(), mode='w', newline='', **kw) 245 | write = writer.write # save the original write() function 246 | enc = locale.getpreferredencoding() 247 | def convwrite(s): 248 | if isinstance(s, bytes): 249 | write(s.decode(enc)) # convert to unistr 250 | else: 251 | write(s) 252 | try: 253 | writer.flush() # needed on Windows 254 | except IOError: 255 | pass 256 | writer.write = convwrite 257 | else: 258 | writer = io.open(fo.fileno(), mode='w', **kw) 259 | return writer 260 | 261 | sys.stdout = get_text_writer(sys.stdout, encoding=enc) 262 | sys.stderr = get_text_writer(sys.stderr, encoding=enc) 263 | 264 | 265 | def init(enc, outenc=None): 266 | """Setup test target encoding, output encoding and warning function. 267 | 268 | arguments: 269 | enc -- Encoding used for testing. 270 | outenc -- Encoding used for showing messages. 271 | """ 272 | ret = set_encoding(enc) 273 | set_output_encoding(outenc) 274 | return ret 275 | -------------------------------------------------------------------------------- /src/python/test_crnl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, unicode_literals 5 | from bregonig import * 6 | from test_common import * 7 | import sys 8 | 9 | def x(pattern, target, start_offset, s_from, s_to): 10 | xx(pattern, target, s_from, s_to, 0, False, start_offset=start_offset, opt="R") 11 | 12 | def n(pattern, target, start_offset): 13 | xx(pattern, target, 0, 0, 0, True, start_offset=start_offset, opt="R") 14 | 15 | 16 | def main(): 17 | unicode_func = False 18 | 19 | # encoding of the test target 20 | enc = None 21 | if len(sys.argv) > 1: 22 | enc = sys.argv[1] 23 | 24 | # encoding of stdout/stderr 25 | outenc = None 26 | if len(sys.argv) > 2: 27 | outenc = sys.argv[2] 28 | 29 | # Initialization 30 | try: 31 | unicode_func = init(enc, outenc) 32 | except KeyError: 33 | print("test target encoding error") 34 | print("Usage: python test_match.py [test target encoding] [output encoding]") 35 | sys.exit() 36 | 37 | 38 | LoadBregonig(unicode_func) 39 | #LoadBregexp() 40 | 41 | print(BRegexpVersion()) 42 | print() 43 | 44 | x("", "\r\n", 0, 0, 0); 45 | n(".", "\r\n", 0); 46 | n("..", "\r\n", 0); 47 | x("^", "\r\n", 0, 0, 0); 48 | x("(?m)\\n^", "\r\nf", 0, 1, 2); 49 | x("(?m)\\n^a", "\r\na", 0, 1, 3); 50 | x("$", "\r\n", 0, 0, 0); 51 | x("T$", "T\r\n", 0, 0, 1); 52 | x("T$", "T\raT\r\n", 0, 3, 4); 53 | x("\\z", "\r\n", 0, 2, 2); 54 | n("a\\z", "a\r\n", 0); 55 | x("\\Z", "\r\n", 0, 0, 0); 56 | x("\\Z", "\r\na", 0, 3, 3); 57 | x("\\Z", "\r\n\r\n\n", 0, 4, 4); 58 | x("\\Z", "\r\n\r\nX", 0, 5, 5); 59 | x("a\\Z", "a\r\n", 0, 0, 1); 60 | x("aaaaaaaaaaaaaaa\\Z", "aaaaaaaaaaaaaaa\r\n", 0, 0, 15); 61 | x("(?m)a|$", "b\r\n", 0, 1, 1); 62 | x("(?m)$|b", "\rb", 0, 1, 2); 63 | x("(?m)a$|ab$", "\r\nab\r\n", 0, 2, 4); 64 | x("a|\\Z", "b\r\n", 0, 1, 1); 65 | x("\\Z|b", "\rb", 0, 1, 2); 66 | x("a\\Z|ab\\Z", "\r\nab\r\n", 0, 2, 4); 67 | x("(?=a$).", "a\r\n", 0, 0, 1); 68 | n("(?=a$).", "a\r", 0); 69 | x("(?!a$)..", "a\r", 0, 0, 2); 70 | x("(?m)(?<=a$)\\r\\n", "a\r\n", 0, 1, 3); 71 | n("(?m)(? '%s'" % (pattern, replacement, target, out_result)) 75 | else: 76 | if out_result == s_result: 77 | inc_nsucc() 78 | print_result("OK", 79 | "s/%s/%s/g '%s' => '%s'" % (pattern, replacement, target, out_result)) 80 | else: 81 | inc_nfail() 82 | print_result("FAIL", 83 | "s/%s/%s/g '%s' => '%s'" % (pattern, replacement, target, out_result)) 84 | 85 | if (rxp): 86 | BRegfree(rxp) 87 | 88 | 89 | def main(): 90 | unicode_func = False 91 | 92 | # encoding of the test target 93 | enc = None 94 | if len(sys.argv) > 1: 95 | enc = sys.argv[1] 96 | 97 | # encoding of stdout/stderr 98 | outenc = None 99 | if len(sys.argv) > 2: 100 | outenc = sys.argv[2] 101 | 102 | # Initialization 103 | try: 104 | unicode_func = init(enc, outenc) 105 | except KeyError: 106 | print("test target encoding error") 107 | print("Usage: python test_match.py [test target encoding] [output encoding]") 108 | sys.exit() 109 | 110 | 111 | LoadBregonig(unicode_func) 112 | 113 | print(BRegexpVersion()) 114 | print() 115 | 116 | 117 | # fixed string 118 | xx("abc", "def", "abc", "def") 119 | xx("abc", "def", "abcabcabc", "defdefdef") 120 | xx("あいう", "えお", "あいうあいう", "えおえお") 121 | xx("x?", "!", "abcde", "!a!b!c!d!e!") 122 | xx("\\Gx?", "!", "abcde", "!abcde") 123 | xx("abc", "\\r\\n\\t\\f\\e\\a\\b\\c[\\x30\\x{31}\\62\\o{63}", "abc", "\r\n\t\f\x1b\a\b\x1b0123") 124 | xx("0123", "", "0123", "") 125 | 126 | # variable 127 | xx("abc", "$&_$&_$&", "abc", "abc_abc_abc") 128 | xx("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)", "${10}$9\\8${7}$6\\5${4}$3\\2${1}", "abcdefghij", "jihgfedcba") 129 | xx("(a+)(b+)(c+).*", "$+", "aabbcc", "cc") 130 | xx("(a+)(b+)(c+)?.*", "$+", "aabbdd", "bb") 131 | xx("(a+)(b+)?(c+)?.*", "$+", "aaddee", "aa") 132 | 133 | # named group 134 | xx("(?.*)_(?.*)", "\\k_\\k", "abc_def", "def_abc") 135 | xx("(?'a'.*)_(?'b'.*)", "\\k'b'_\\k'a'", "abc_def", "def_abc") 136 | xx("(?.*)_(?.*)", "$+{b}_$+{a}", "abc_def", "def_abc") 137 | xx("(?.*)_(?.*)_(?.*)", "$+{a}", "abc_def_ghi", "abc") 138 | xx("(?.*)_(?.*)_(?.*)", "$-{a}[2]_$-{a}[1]_$-{a}[0]", "abc_def_ghi", "ghi_def_abc") 139 | xx("(?.*)_(?.*)_(?.*)", "$-{a}[-1]_$-{a}[-2]_$-{a}[-3]", "abc_def_ghi", "ghi_def_abc") 140 | 141 | # \l, \u, \L, \U and \E 142 | xx("", "\\LABCDEFG\\EHIJKLMN", "", "abcdefgHIJKLMN") 143 | xx("", "\\Uabcdefg\\Ehijklmn", "", "ABCDEFGhijklmn") 144 | xx("(.*)", "\\L$1", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") 145 | xx("(.*)", "\\U$1", "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ") 146 | xx("(.*)_(.*)", "\\L\\u$1\\u\\L$2", "abc_def", "AbcDef") 147 | xx("(.*)_(.*)", "\\L\\u$1\\u$2", "abc_def", "AbcDef") 148 | xx("(.*)_(.*)", "\\l\\U$1\\U\\l$2", "abc_def", "aBCdEF") 149 | xx("(.*)_(.*)", "\\l\\U$1\\l$2", "abc_def", "aBCdEF") 150 | xx("([a-z]+)_?", "\\L\\u$1", "abc_def_ghi_jkl", "AbcDefGhiJkl") # snake_case => CamelCase 151 | xx("([A-Z]?[a-z]+)([A-Z])", "\\L$1_$2", "AbcDefGhiJkl", "abc_def_ghi_jkl") # CamelCase => snake_case 152 | xx("(a*)(b*)(c*)", "\\u$1$2$3", "aabbcc", "Aabbcc") 153 | xx("(a*)(b*)(c*)", "\\u$1$2$3", "abbcc", "Abbcc") 154 | xx("(a*)(b*)(c*)", "\\u$1$2$3", "bbcc", "Bbcc") 155 | xx("(a*)(b*)(c*)", "\\u$1$2$3", "bcc", "Bcc") 156 | xx("(a*)(b*)(c*)", "\\u$1$2$3", "cc", "Cc") 157 | xx("(A*)(B*)(C*)", "\\l$1$2$3", "AABBCC", "aABBCC") 158 | xx("(A*)(B*)(C*)", "\\l$1$2$3", "ABBCC", "aBBCC") 159 | xx("(A*)(B*)(C*)", "\\l$1$2$3", "BBCC", "bBCC") 160 | xx("(A*)(B*)(C*)", "\\l$1$2$3", "BCC", "bCC") 161 | xx("(A*)(B*)(C*)", "\\l$1$2$3", "CC", "cC") 162 | xx("Abc", "\\U$&\\L$&", "Abc", "ABCabc") 163 | 164 | # nasted variable (bregonig.dll doesn't support this.) 165 | # xx("^([23]),(.*),(.*)$", "${$1}", "2,1234,abcd", "1234") 166 | # xx("^([23]),(.*),(.*)$", "${$1}", "3,1234,abcd", "abcd") 167 | # xx("^(?[ab]),(?.*?),(?.*?)$", "$+{$+{select}}", "b,1234,abcd", "abcd") 169 | 170 | print("\nEncoding:", get_encoding()) 171 | print("RESULT SUCC: %d, FAIL: %d, ERROR: %d\n" % ( 172 | get_nsucc(), get_nfail(), get_nerror())) 173 | 174 | if (get_nfail() == 0 and get_nerror() == 0): 175 | exit(0) 176 | else: 177 | exit(-1) 178 | 179 | if __name__ == '__main__': 180 | main() 181 | 182 | -------------------------------------------------------------------------------- /src/resource.h: -------------------------------------------------------------------------------- 1 | //{{NO_DEPENDENCIES}} 2 | // Microsoft Developer Studio generated include file. 3 | // Used by bregonig.rc 4 | // 5 | 6 | // Next default values for new objects 7 | // 8 | #ifdef APSTUDIO_INVOKED 9 | #ifndef APSTUDIO_READONLY_SYMBOLS 10 | #define _APS_NEXT_RESOURCE_VALUE 101 11 | #define _APS_NEXT_COMMAND_VALUE 40001 12 | #define _APS_NEXT_CONTROL_VALUE 1000 13 | #define _APS_NEXT_SYMED_VALUE 101 14 | #endif 15 | #endif 16 | -------------------------------------------------------------------------------- /src/sample/bregpool.h: -------------------------------------------------------------------------------- 1 | /* 2 | * bregpool.h 3 | */ 4 | 5 | 6 | class BregPool 7 | { 8 | public: 9 | BregPool(int max){ 10 | m_nmax = max; 11 | m_rxpool = new BREGEXP*[m_nmax]; 12 | // ZeroMemory(m_rxpool,sizeof(BREGEXP*)*m_nmax); 13 | memset(m_rxpool,0,sizeof(BREGEXP*)*m_nmax); 14 | }; 15 | ~BregPool() { 16 | Free(); 17 | }; 18 | void Free() { 19 | if (m_rxpool == 0) 20 | return; 21 | for (int i = 0;i < m_nmax;i++) { 22 | if (m_rxpool[i]) 23 | BRegfree(m_rxpool[i]); 24 | } 25 | delete [] m_rxpool; 26 | m_rxpool = NULL; 27 | }; 28 | BREGEXP* Get(TCHAR *regstr) 29 | { 30 | BREGEXP *r; 31 | int i; 32 | for (i = 0;i < m_nmax;i++) { 33 | r = m_rxpool[i]; 34 | if (r == 0) 35 | break; 36 | if (r->parap == 0) 37 | break; 38 | // Check same Regular Expression 39 | if (memcmp(regstr,r->parap,(r->paraendp - r->parap) + 1) == 0) 40 | return r; // we got !!! 41 | } 42 | if (i > m_nmax - 1) 43 | i = m_nmax - 1; 44 | if (m_rxpool[i]) 45 | return m_rxpool[i]; 46 | TCHAR msg[BREGEXP_MAX_ERROR_MESSAGE_LEN]; 47 | TCHAR p[] = _T(" "); 48 | // Make Compile Block 49 | BMatch(regstr,p,p+1,&m_rxpool[i],msg); 50 | 51 | return m_rxpool[i]; 52 | } 53 | private: 54 | int m_nmax; 55 | BREGEXP **m_rxpool; 56 | }; 57 | 58 | -------------------------------------------------------------------------------- /src/sample/sample.c: -------------------------------------------------------------------------------- 1 | // 2 | // sample.c 3 | // 4 | // grep for bregexp version 5 | // Author Tatsuo Baba 6 | // 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | int _tmain(int argc,TCHAR *argv[]) 13 | { 14 | TCHAR fname[512],line[4096],*p1; 15 | TCHAR msg[BREGEXP_MAX_ERROR_MESSAGE_LEN]; 16 | FILE *fp; 17 | int len,ctr; 18 | BREGEXP *rxp = 0; 19 | TCHAR dmy[] = _T(" "); 20 | setlocale(LC_ALL, ""); 21 | if (argc < 2) { 22 | _putts (_T("usage /regstr/ [file]\n if omitted assume /usr/dict/words")); 23 | return 0; 24 | } 25 | _tcscpy(fname,_T("/usr/dict/words")); 26 | if (argc > 2) 27 | _tcscpy(fname,argv[2]); 28 | p1 = argv[1]; 29 | fp = _tfopen(fname,_T("r")); 30 | if (!fp) { 31 | _tprintf (_T("file cant open %s\n"),fname); 32 | return 0; 33 | } 34 | BMatch(p1,dmy,dmy+1,&rxp,msg); // compile using dummy 35 | if (msg[0]) { 36 | _tprintf (_T("parse error %s\n"),msg); 37 | return 0; 38 | } 39 | ctr = 0; 40 | while(_fgetts(line,sizeof(line),fp)) { 41 | len = _tcslen(line); 42 | if (len && (BMatch(p1,line,line+len,&rxp,msg) > 0)) { 43 | ctr++; 44 | line[len-1] = 0; 45 | _putts(line); 46 | } 47 | } 48 | fclose(fp); 49 | 50 | _tprintf(_T("%ld lines(s) greped\n"),ctr); 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /src/sample/sample_bosubst.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k-takata/bregonig/670b229063a33910e717a9f220d89ce8ab46d790/src/sample/sample_bosubst.cpp -------------------------------------------------------------------------------- /src/sample/sample_match.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k-takata/bregonig/670b229063a33910e717a9f220d89ce8ab46d790/src/sample/sample_match.cpp -------------------------------------------------------------------------------- /src/sample/sample_split.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "bregexp.h" 7 | #include "bregpool.h" 8 | 9 | 10 | int main() 11 | { 12 | static BregPool bpool(8); 13 | TCHAR msg[BREGEXP_MAX_ERROR_MESSAGE_LEN]; 14 | TCHAR t1[] = _T(" Yokohama 045-222-1111 Osaka 06-5555-6666 Tokyo 03-1111-9999 "); 15 | TCHAR patern1[] = _T("/ *\\d{2,3}-\\d{3,4}-\\d{4} */"); 16 | BREGEXP *rxp = bpool.Get(patern1); 17 | int splitcnt = BSplit(patern1,t1,t1+_tcslen(t1),0,&rxp,msg); 18 | if (splitcnt > 0 ) { 19 | int i = 0; 20 | for (int j = 0;j < splitcnt;j++) { 21 | int len = rxp->splitp[i+1] - rxp->splitp[i]; 22 | TCHAR *tp = (TCHAR*)rxp->splitp[i]; 23 | TCHAR ch = tp[len]; // save delmitter 24 | tp[len] = 0; // set stopper 25 | _tprintf(_T("len=%d [%d]=%s\n"),len,j,tp); 26 | tp[len] = ch; // restore the char 27 | i += 2; 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/sample/sample_subst.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k-takata/bregonig/670b229063a33910e717a9f220d89ce8ab46d790/src/sample/sample_subst.cpp -------------------------------------------------------------------------------- /src/sample/sample_trans.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/k-takata/bregonig/670b229063a33910e717a9f220d89ce8ab46d790/src/sample/sample_trans.cpp -------------------------------------------------------------------------------- /src/subst.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * subst.cpp 3 | */ 4 | /* 5 | * Copyright (C) 2006-2011 K.Takata 6 | * 7 | * You may distribute under the terms of either the GNU General Public 8 | * License or the Artistic License, as specified in the perl_license.txt file. 9 | */ 10 | /* 11 | * Note: 12 | * This file is based on K2Regexp.dll (bsubst.cpp) 13 | * by Tatsuo Baba and Koyabu Kazuya (K2). 14 | */ 15 | 16 | 17 | #define _CRT_SECURE_NO_DEPRECATE 18 | #define WIN32_LEAN_AND_MEAN 19 | 20 | #define _BREGEXP_ 21 | 22 | #include 23 | #include 24 | //#include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #ifdef USE_ONIGMO_6 32 | # include 33 | #else 34 | # include 35 | #endif 36 | #include "bregexp.h" 37 | //#include "global.h" 38 | #include "bregonig.h" 39 | #include "mem_vc6.h" 40 | #include "dbgtrace.h" 41 | 42 | 43 | int dec2oct(int dec); 44 | int get_right_most_captured_num(OnigRegion *region); 45 | OnigCodePoint convert_char_case(OnigEncoding enc, OnigCodePoint c, 46 | casetype nextcase, casetype currentcase); 47 | 48 | #ifndef UNICODE 49 | int dec2oct(int dec) 50 | { 51 | int i, j; 52 | if (dec > 377) // 0377 == 0xFF 53 | return -1; 54 | i = dec % 10; 55 | if (i > 7) 56 | return -1; 57 | j = (dec / 10) % 10; 58 | if (j > 7) 59 | return -1; 60 | return i + j*8 + (dec/100)*64; 61 | } 62 | 63 | int get_right_most_captured_num(OnigRegion *region) 64 | { 65 | for (int i = region->num_regs - 1; i > 0; i--) { 66 | if (region->beg[i] != ONIG_REGION_NOTPOS) 67 | return i; 68 | } 69 | return -1; 70 | } 71 | 72 | OnigCodePoint convert_char_case(OnigEncoding enc, OnigCodePoint c, 73 | casetype nextcase, casetype currentcase) 74 | { 75 | // TODO: support for non-ASCII character 76 | if (nextcase == CASE_LOWER 77 | || (nextcase == CASE_NONE && currentcase == CASE_LOWER)) { 78 | if (isascii(c)) { 79 | c = tolower(c); 80 | } 81 | } else if (nextcase == CASE_UPPER 82 | || (nextcase == CASE_NONE && currentcase == CASE_UPPER)) { 83 | if (isascii(c)) { 84 | c = toupper(c); 85 | } 86 | } 87 | return c; 88 | } 89 | #endif 90 | 91 | 92 | using namespace BREGONIG_NS; 93 | namespace BREGONIG_NS { 94 | 95 | 96 | unsigned long scan_oct(const TCHAR *start, int len, int *retlen); 97 | unsigned long scan_hex(const TCHAR *start, int len, int *retlen); 98 | //unsigned long scan_dec(const TCHAR *start, int len, int *retlen); 99 | 100 | 101 | TCHAR *bufcat(OnigEncoding enc, TCHAR *buf, ptrdiff_t *copycnt, 102 | const TCHAR *src, ptrdiff_t len, ptrdiff_t *blen, 103 | casetype nextcase, casetype currentcase, 104 | int bufsize = SUBST_BUF_SIZE) 105 | { 106 | if (*blen <= *copycnt + len) { 107 | *blen += len + bufsize; 108 | TCHAR *tp = new (std::nothrow) TCHAR[*blen]; 109 | if (tp == NULL) { 110 | // asc2tcs(msg, "out of space buf", BREGEXP_MAX_ERROR_MESSAGE_LEN); 111 | delete [] buf; 112 | throw std::bad_alloc(); 113 | // return NULL; 114 | } 115 | memcpy(tp, buf, *copycnt * sizeof(TCHAR)); 116 | delete [] buf; 117 | buf = tp; 118 | } 119 | if (len) { 120 | if ((nextcase == CASE_NONE) && (currentcase == CASE_NONE)) { 121 | memcpy(buf + *copycnt, src, len * sizeof(TCHAR)); 122 | *copycnt += len; 123 | } else { 124 | int i = 0; 125 | while (i < len) { 126 | OnigCodePoint c = ONIGENC_MBC_TO_CODE(enc, (UChar*) (src + i), 127 | (UChar*) (src + len)); 128 | c = convert_char_case(enc, c, nextcase, currentcase); 129 | int clen = ONIGENC_CODE_TO_MBC(enc, c, 130 | (UChar*) (buf + *copycnt + i)) / sizeof(TCHAR); 131 | i += clen; 132 | nextcase = CASE_NONE; 133 | } 134 | *copycnt += len; 135 | } 136 | } 137 | return buf; 138 | } 139 | 140 | 141 | 142 | int subst_onig(bregonig *rx, const TCHAR *target, 143 | const TCHAR *targetstartp, const TCHAR *targetendp, 144 | TCHAR *msg, BCallBack callback) 145 | { 146 | TRACE0(_T("subst_onig()\n")); 147 | OnigEncoding enc = rx->reg->enc; 148 | const TCHAR *orig,*m,*c; 149 | const TCHAR *s = target; 150 | ptrdiff_t len = targetendp - target; 151 | const TCHAR *strend = s + len; 152 | ptrdiff_t maxiters = (strend - s) + 10; 153 | ptrdiff_t iters = 0; 154 | ptrdiff_t clen; 155 | orig = m = s; 156 | s = targetstartp; // added by K2 157 | bool once = !(rx->pmflags & PMf_GLOBAL); 158 | c = rx->prerepp; 159 | clen = rx->prerependp - c; 160 | // 161 | if (regexec_onig(rx, s, strend, orig, 0,1,0,msg) <= 0) 162 | return 0; 163 | try { 164 | ptrdiff_t blen = len + clen + SUBST_BUF_SIZE; 165 | TCHAR *buf = new TCHAR[blen]; 166 | ptrdiff_t copycnt = 0; 167 | // now ready to go 168 | int subst_count = 0; 169 | do { 170 | if (iters++ > maxiters) { 171 | delete [] buf; 172 | TRACE0(_T("Substitution loop\n")); 173 | asc2tcs(msg, "Substitution loop", BREGEXP_MAX_ERROR_MESSAGE_LEN); 174 | return 0; 175 | } 176 | m = rx->startp[0]; 177 | len = m - s; 178 | buf = bufcat(enc, buf, ©cnt, s, len, &blen, 179 | CASE_NONE, CASE_NONE); 180 | s = rx->endp[0]; 181 | if (!(rx->pmflags & PMf_CONST)) { // we have \digits or $& 182 | // ok start magic 183 | REPSTR *rep = rx->repstr; 184 | casetype nextcase = CASE_NONE; 185 | // ASSERT(rep); 186 | for (int i = 0; i < rep->count; i++) { 187 | int j; 188 | casetype currentcase = rep->info[i].currentcase; 189 | if (rep->info[i].nextcase != CASE_NONE) { 190 | nextcase = rep->info[i].nextcase; 191 | } 192 | // normal char 193 | ptrdiff_t dlen = rep->info[i].dlen; 194 | if (rep->is_normal_string(i) && dlen) { 195 | buf = bufcat(enc, buf, ©cnt, rep->info[i].startp, 196 | dlen, &blen, nextcase, currentcase); 197 | nextcase = CASE_NONE; 198 | } 199 | 200 | else if (0 <= dlen && dlen <= rx->nparens 201 | && rx->startp[dlen] && rx->endp[dlen]) { 202 | // \digits, $digits or $& 203 | len = rx->endp[dlen] - rx->startp[dlen]; 204 | buf = bufcat(enc, buf, ©cnt, rx->startp[dlen], 205 | len, &blen, nextcase, currentcase); 206 | if (len) { 207 | nextcase = CASE_NONE; 208 | } 209 | } 210 | 211 | else if (dlen == -1 212 | && (j=get_right_most_captured_num(rx->region)) > 0) { 213 | // $+ 214 | len = rx->endp[j] - rx->startp[j]; 215 | buf = bufcat(enc, buf, ©cnt, rx->startp[j], 216 | len, &blen, nextcase, currentcase); 217 | if (len) { 218 | nextcase = CASE_NONE; 219 | } 220 | } 221 | 222 | else if ((10<=dlen && (j=dec2oct((int) dlen)) > 0) 223 | && dlen > rx->nparens && rep->is_backslash(i)) { 224 | // \nnn 225 | TCHAR ch = (TCHAR) j; 226 | buf = bufcat(enc, buf, ©cnt, &ch, 227 | 1, &blen, nextcase, currentcase); 228 | nextcase = CASE_NONE; 229 | } 230 | } 231 | } else { 232 | if (clen) { // no special char 233 | buf = bufcat(enc, buf, ©cnt, c, clen, &blen, 234 | CASE_NONE, CASE_NONE); 235 | } 236 | } 237 | subst_count++; 238 | if (once) 239 | break; 240 | if (callback) 241 | if (!callback(CALLBACK_KIND_REPLACE, subst_count, s - orig)) 242 | break; 243 | } while (regexec_onig(rx, s, strend, orig, s == m, 1,0,msg) > 0); 244 | len = targetendp - s; 245 | buf = bufcat(enc, buf, ©cnt, s, len, &blen, CASE_NONE, CASE_NONE, 246 | 1); 247 | if (copycnt) { 248 | rx->outp = buf; 249 | rx->outendp = buf + copycnt; 250 | *(rx->outendp) = '\0'; 251 | } 252 | else 253 | delete [] buf; 254 | 255 | TRACE2(_T("subst_count: %d, copycnt: %d\n"), subst_count, copycnt); 256 | return subst_count; 257 | } 258 | catch (std::bad_alloc& /*ex*/) { 259 | TRACE0(_T("out of space in subst_onig()\n")); 260 | asc2tcs(msg, "out of space buf", BREGEXP_MAX_ERROR_MESSAGE_LEN); 261 | return 0; 262 | } 263 | } 264 | 265 | 266 | 267 | 268 | int set_repstr(REPSTR *repstr, int num, 269 | int *pcindex, TCHAR *dst, TCHAR **polddst, 270 | casetype nextcase, casetype currentcase, bool backslash = false) 271 | { 272 | TRACE0(_T("set_repstr\n")); 273 | int cindex = *pcindex; 274 | TCHAR *olddst = *polddst; 275 | 276 | if (/*num > 0 &&*/ cindex >= repstr->count - 2) { 277 | int newcount = repstr->count + 10; 278 | repinfo *info = new repinfo[newcount]; 279 | memcpy(info, repstr->info, repstr->count * sizeof(repinfo)); 280 | delete [] repstr->info; 281 | repstr->info = info; 282 | repstr->count = newcount; 283 | } 284 | 285 | if (dst - olddst > 0) { 286 | repstr->info[cindex].startp = olddst; 287 | repstr->info[cindex++].dlen = dst - olddst; 288 | } 289 | repstr->info[cindex].dlen = num; // paren number 290 | if (backslash) { 291 | repstr->set_backslash(cindex); // \digits (try later) 292 | } else { 293 | repstr->set_dollar(cindex); // $digits (try later) 294 | } 295 | repstr->info[cindex].nextcase = nextcase; 296 | repstr->info[cindex].currentcase = currentcase; 297 | cindex++; 298 | 299 | olddst = dst; 300 | 301 | *pcindex = cindex; 302 | *polddst = olddst; 303 | return 0; 304 | } 305 | 306 | 307 | const TCHAR *parse_digits(const TCHAR *str, REPSTR *repstr, int *pcindex, 308 | TCHAR *dst, TCHAR **polddst, 309 | casetype nextcase, casetype currentcase, bool backslash = false) 310 | { 311 | TRACE0(_T("parse_digits\n")); 312 | TCHAR *s; 313 | TCHAR endch = 0; 314 | if (*str == '{') { 315 | ++str; 316 | endch = '}'; 317 | } 318 | int num = (int) _tcstoul(str, &s, 10); 319 | 320 | if (endch) { 321 | if (*s != endch) 322 | return NULL; // SYNTAX ERROR 323 | ++s; 324 | } 325 | set_repstr(repstr, num, pcindex, dst, polddst, nextcase, currentcase, 326 | backslash); 327 | 328 | return s; 329 | } 330 | 331 | 332 | const TCHAR *parse_groupname(bregonig *rx, const TCHAR *str, const TCHAR *strend, 333 | REPSTR *repstr, int *pcindex, TCHAR *dst, TCHAR **polddst, 334 | casetype nextcase, casetype currentcase, bool bracket = false) 335 | { 336 | TCHAR endch; 337 | switch (*str++) { 338 | case '<': endch = '>'; break; 339 | case '{': endch = '}'; break; 340 | case '\'': endch = '\''; break; 341 | default: 342 | return NULL; 343 | } 344 | const TCHAR *q = str; 345 | while (q < strend && *q != endch) { 346 | ++q; 347 | } 348 | if (*q != endch) { 349 | return NULL; // SYNTAX ERROR 350 | } 351 | int arrnum = 0; 352 | const TCHAR *nameend = q; 353 | if (bracket) { // [n] 354 | if (q[1] != '[') 355 | return NULL; // SYNTAX ERROR 356 | arrnum = (int) _tcstol(q+2, (TCHAR**) &q, 10); 357 | if (*q != ']' || q >= strend) 358 | return NULL; // SYNTAX ERROR 359 | } 360 | int *num_list; 361 | int num = onig_name_to_group_numbers(rx->reg, 362 | (UChar*) str, (UChar*) nameend, &num_list); 363 | #ifdef NAMEGROUP_RIGHTMOST 364 | int n = num - 1; 365 | #else 366 | int n = 0; 367 | #endif 368 | if (bracket) { 369 | n = arrnum; 370 | if (arrnum < 0) { 371 | n += num; 372 | } 373 | } 374 | if ((num > 0) && (0 <= n || n < num)) 375 | set_repstr(repstr, num_list[n], pcindex, dst, polddst, 376 | nextcase, currentcase); // leftmost group-num 377 | return q+1; 378 | } 379 | 380 | 381 | REPSTR *compile_rep(bregonig *rx, const TCHAR *str, const TCHAR *strend) 382 | { 383 | TRACE0(_T("compile_rep()\n")); 384 | rx->pmflags |= PMf_CONST; /* default */ 385 | ptrdiff_t len = strend - str; 386 | if (len < 2) // no special char 387 | return NULL; 388 | register const TCHAR *p = str; 389 | register const TCHAR *pend = strend; 390 | 391 | REPSTR *repstr = NULL; 392 | try { 393 | repstr = new (len) REPSTR; 394 | // memset(repstr, 0, len + sizeof(REPSTR)); 395 | TCHAR *dst = repstr->data; 396 | repstr->init(20); // default \digits count in string 397 | int cindex = 0; 398 | TCHAR ender, prvch; 399 | int numlen; 400 | TCHAR *olddst = dst; 401 | bool special = false; // found special char 402 | casetype nextcase = CASE_NONE; 403 | casetype currentcase = CASE_NONE; 404 | OnigEncoding enc = onig_get_encoding(rx->reg); 405 | while (p < pend) { 406 | if (*p != '\\' && *p != '$') { // magic char ? 407 | // copy one char 408 | OnigCodePoint c = ONIGENC_MBC_TO_CODE(enc, (UChar*) p, 409 | (UChar*) pend); 410 | c = convert_char_case(enc, c, nextcase, currentcase); 411 | int len = ONIGENC_CODE_TO_MBC(enc, c, (UChar*) dst) 412 | / sizeof(TCHAR); 413 | p += len; 414 | dst += len; 415 | ender = *dst; 416 | nextcase = CASE_NONE; 417 | continue; 418 | } 419 | if (p+1 >= pend) { // end of the pattern 420 | *dst++ = *p++; 421 | break; 422 | } 423 | 424 | prvch = *p++; 425 | if (prvch == '$') { 426 | switch (*p) { 427 | case '&': // $& 428 | // case '0': // $0 429 | special = true; 430 | set_repstr(repstr, 0, &cindex, dst, &olddst, 431 | nextcase, currentcase); 432 | nextcase = CASE_NONE; 433 | p++; 434 | break; 435 | case '+': // $+, $+{name} 436 | special = true; 437 | if (p[1] == '{') { // $+{name} 438 | const TCHAR *q = parse_groupname(rx, p+1, pend, repstr, 439 | &cindex, dst, &olddst, nextcase, currentcase); 440 | nextcase = CASE_NONE; 441 | if (q != NULL) { 442 | p = q; 443 | } else { 444 | // SYNTAX ERROR 445 | *dst++ = prvch; 446 | } 447 | } else { // $+ 448 | set_repstr(repstr, -1, &cindex, dst, &olddst, 449 | nextcase, currentcase); 450 | nextcase = CASE_NONE; 451 | p++; 452 | } 453 | break; 454 | case '-': // $-{name}[n] 455 | special = true; 456 | if (p[1] == '{') { 457 | const TCHAR *q = parse_groupname(rx, p+1, pend, repstr, 458 | &cindex, dst, &olddst, 459 | nextcase, currentcase, true); 460 | nextcase = CASE_NONE; 461 | if (q != NULL) { 462 | p = q; 463 | continue; 464 | } 465 | } 466 | // SYNTAX ERROR 467 | *dst++ = prvch; 468 | break; 469 | /* 470 | case '`': // $` 471 | special = true; 472 | break; 473 | case '\'': // $' 474 | special = true; 475 | break; 476 | case '^': // $^N 477 | special = true; 478 | break; 479 | */ 480 | case '{': // ${nn}, ${name} 481 | special = true; 482 | if (isDIGIT(p[1])) { 483 | // ${nn} 484 | const TCHAR *q = parse_digits(p, repstr, &cindex, dst, 485 | &olddst, nextcase, currentcase); 486 | nextcase = CASE_NONE; 487 | if (q != NULL) { 488 | p = q; 489 | } else { 490 | // SYNTAX ERROR 491 | // throw std::invalid_argument("} not found"); 492 | *dst++ = prvch; 493 | } 494 | } else { 495 | // ${name} 496 | const TCHAR *q = parse_groupname(rx, p, pend, repstr, 497 | &cindex, dst, &olddst, nextcase, currentcase); 498 | nextcase = CASE_NONE; 499 | if (q != NULL) { 500 | p = q; 501 | } else { 502 | // SYNTAX ERROR 503 | *dst++ = prvch; 504 | } 505 | } 506 | break; 507 | default: 508 | if (isDIGIT(*p) && *p != '0') { // $digits 509 | special = true; 510 | p = parse_digits(p, repstr, &cindex, dst, &olddst, 511 | nextcase, currentcase); 512 | } else { 513 | *dst++ = prvch; 514 | *dst++ = *p++; 515 | } 516 | nextcase = CASE_NONE; 517 | break; 518 | } 519 | continue; 520 | } 521 | 522 | 523 | // now prvch == '\\' 524 | 525 | special = true; 526 | if (isDIGIT(*p)) { 527 | if (*p == '0') { 528 | // '\0nn' 529 | ender = (TCHAR) scan_oct(p, 3, &numlen); 530 | p += numlen; 531 | *dst++ = ender; 532 | } else { 533 | // \digits found 534 | p = parse_digits(p, repstr, &cindex, dst, &olddst, 535 | nextcase, currentcase, true); 536 | } 537 | nextcase = CASE_NONE; 538 | } else { 539 | prvch = *p++; 540 | switch (prvch) { 541 | case 'n': 542 | ender = '\n'; 543 | nextcase = CASE_NONE; 544 | break; 545 | case 'r': 546 | ender = '\r'; 547 | nextcase = CASE_NONE; 548 | break; 549 | case 't': 550 | ender = '\t'; 551 | nextcase = CASE_NONE; 552 | break; 553 | case 'f': 554 | ender = '\f'; 555 | nextcase = CASE_NONE; 556 | break; 557 | case 'e': 558 | ender = '\033'; 559 | nextcase = CASE_NONE; 560 | break; 561 | case 'a': 562 | ender = '\a'; 563 | nextcase = CASE_NONE; 564 | break; 565 | #ifdef USE_VTAB 566 | case 'v': 567 | ender = '\v'; 568 | nextcase = CASE_NONE; 569 | break; 570 | #endif 571 | case 'b': 572 | ender = '\b'; 573 | nextcase = CASE_NONE; 574 | break; 575 | case 'x': // '\xHH', '\x{HH}' 576 | if (isXDIGIT(*p)) { // '\xHH' 577 | ender = (TCHAR) scan_hex(p, 2, &numlen); 578 | ender = convert_char_case(enc, ender, nextcase, 579 | currentcase); 580 | p += numlen; 581 | nextcase = CASE_NONE; 582 | } 583 | else { 584 | const TCHAR *q = p; 585 | if (*q == '{') { // '\x{HH}' 586 | unsigned int code = scan_hex(++q, 8, &numlen); 587 | q += numlen; 588 | if (*q == '}') { 589 | code = convert_char_case(enc, code, nextcase, 590 | currentcase); 591 | int len = ONIGENC_CODE_TO_MBC( 592 | enc, code, (UChar*) dst) 593 | / sizeof(TCHAR); 594 | dst += len - 1; 595 | ender = *dst; 596 | p = q+1; 597 | nextcase = CASE_NONE; 598 | break; 599 | } 600 | } 601 | // SYNTAX ERROR 602 | ender = prvch; 603 | } 604 | break; 605 | case 'o': // '\o{OOO}' 606 | if (*p == '{') { 607 | const TCHAR *q = p; 608 | unsigned int code = scan_oct(++q, 11, &numlen); 609 | q += numlen; 610 | if (*q == '}') { 611 | code = convert_char_case(enc, code, nextcase, 612 | currentcase); 613 | int len = ONIGENC_CODE_TO_MBC( 614 | enc, code, (UChar*) dst) 615 | / sizeof(TCHAR); 616 | dst += len - 1; 617 | ender = *dst; 618 | p = q+1; 619 | nextcase = CASE_NONE; 620 | break; 621 | } 622 | } 623 | // SYNTAX ERROR 624 | ender = prvch; 625 | break; 626 | case 'c': // '\cx' (ex. '\c[' == Ctrl-[ == '\x1b') 627 | ender = *p++; 628 | if (ender == '\\') // '\c\x' == '\cx' 629 | ender = *p++; 630 | ender = toupper((TBYTE) ender); 631 | ender ^= 64; 632 | nextcase = CASE_NONE; 633 | break; 634 | case 'k': // \k, \k'name' 635 | if (*p == '<' || *p == '\'') { 636 | const TCHAR *q = parse_groupname(rx, p, pend, repstr, 637 | &cindex, dst, &olddst, nextcase, currentcase); 638 | nextcase = CASE_NONE; 639 | if (q != NULL) { 640 | p = q; 641 | continue; 642 | } 643 | } 644 | // SYNTAX ERROR 645 | ender = prvch; 646 | break; 647 | 648 | case 'l': // lower next 649 | nextcase = CASE_LOWER; 650 | continue; 651 | case 'u': // upper next 652 | nextcase = CASE_UPPER; 653 | continue; 654 | case 'L': // lower till \E 655 | currentcase = CASE_LOWER; 656 | continue; 657 | case 'U': // upper till \E 658 | currentcase = CASE_UPPER; 659 | continue; 660 | case 'Q': // quote 661 | continue; 662 | case 'E': // end of \L/\U 663 | currentcase = CASE_NONE; 664 | continue; 665 | 666 | default: // '/', '\\' and the other char 667 | ender = prvch; 668 | nextcase = CASE_NONE; 669 | break; 670 | } 671 | *dst++ = ender; 672 | } 673 | } 674 | if (!special) { // no special char found 675 | // delete [] repstr->startp; // deleted by the deconstructor 676 | // delete [] repstr->dlen; // deleted by the deconstructor 677 | delete repstr; 678 | return NULL; 679 | } 680 | 681 | rx->pmflags &= ~PMf_CONST; /* off known replacement string */ 682 | 683 | 684 | if (dst - olddst > 0) { 685 | repstr->info[cindex].startp = olddst; 686 | repstr->info[cindex++].dlen = dst - olddst; 687 | } 688 | repstr->count = cindex; 689 | 690 | return repstr; 691 | } 692 | catch (std::exception& /*ex*/) { 693 | TRACE0(_T("out of space in compile_rep()\n")); 694 | delete repstr; 695 | throw; 696 | } 697 | } 698 | 699 | 700 | 701 | unsigned long 702 | scan_oct(const TCHAR *start, int len, int *retlen) 703 | { 704 | register const TCHAR *s = start; 705 | register unsigned long retval = 0; 706 | 707 | while (len && *s >= '0' && *s <= '7') { 708 | retval <<= 3; 709 | retval |= *s++ - '0'; 710 | len--; 711 | } 712 | *retlen = (int) (s - start); 713 | return retval; 714 | } 715 | 716 | static TCHAR hexdigit[] = _T("0123456789abcdef0123456789ABCDEF"); 717 | 718 | unsigned long 719 | scan_hex(const TCHAR *start, int len, int *retlen) 720 | { 721 | register const TCHAR *s = start; 722 | register unsigned long retval = 0; 723 | TCHAR *tmp; 724 | 725 | while (len-- && *s && (tmp = _tcschr(hexdigit, *s))) { 726 | retval <<= 4; 727 | retval |= (tmp - hexdigit) & 15; 728 | s++; 729 | } 730 | *retlen = (int) (s - start); 731 | return retval; 732 | } 733 | 734 | /* 735 | unsigned long 736 | scan_dec(const TCHAR *start, int len, int *retlen) 737 | { 738 | TCHAR *s; 739 | unsigned long retval; 740 | 741 | retval = _tcstoul(start, &s, 10); 742 | *retlen = s - start; 743 | return retval; 744 | } 745 | */ 746 | 747 | } // namespace 748 | -------------------------------------------------------------------------------- /src/sv.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // sv.cc 3 | // 4 | //////////////////////////////////////////////////////////////////////////////// 5 | // 1999.11.24 update by Tatsuo Baba 6 | // 2006.08.29 update by K.Takata 7 | // 8 | // You may distribute under the terms of either the GNU General Public 9 | // License or the Artistic License, as specified in the perl_license.txt file. 10 | //////////////////////////////////////////////////////////////////////////////// 11 | 12 | 13 | #define WIN32_LEAN_AND_MEAN 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "mem_vc6.h" 22 | 23 | #ifndef UNICODE 24 | #define KANJI 25 | #endif 26 | 27 | /* return values of kpart */ 28 | #define KPART_KANJI_1 1 /* kanji 1st byte */ 29 | #define KPART_KANJI_2 2 /* kanji 2nd byte */ 30 | #define KPART_OTHER 0 /* other (ASCII) */ 31 | 32 | 33 | #include "sv.h" 34 | 35 | //using namespace BREGONIG_NS; 36 | namespace BREGONIG_NS { 37 | 38 | int kpart(TCHAR *pLim,TCHAR *pChr); 39 | 40 | 41 | #if 0 42 | static 43 | unsigned char sjis_tab[256] = 44 | { 45 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ 46 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ 47 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 48 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ 49 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ 50 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ 51 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 6x */ 52 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 53 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 8x */ 54 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 9x */ 55 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Ax */ 56 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 57 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Cx */ 58 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Dx */ 59 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ex */ 60 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, /* Fx */ 61 | }; 62 | #endif 63 | 64 | 65 | //# define SvGROW(sv,len) (SvLEN(sv) < (len) ? sv_grow(sv,len) : SvPVX(sv)) 66 | // g++ cause error converting to `void' from `char *' 1999/11/22 67 | // so add (void) 68 | # define SvGROW(sv,len) (SvLEN(sv) < (len) ? sv_grow(sv,len) : (void)SvPVX(sv)) 69 | 70 | 71 | #if 0 72 | static TBYTE fold[] = { /* fast case folding table */ 73 | 0, 1, 2, 3, 4, 5, 6, 7, 74 | 8, 9, 10, 11, 12, 13, 14, 15, 75 | 16, 17, 18, 19, 20, 21, 22, 23, 76 | 24, 25, 26, 27, 28, 29, 30, 31, 77 | 32, 33, 34, 35, 36, 37, 38, 39, 78 | 40, 41, 42, 43, 44, 45, 46, 47, 79 | 48, 49, 50, 51, 52, 53, 54, 55, 80 | 56, 57, 58, 59, 60, 61, 62, 63, 81 | 64, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 82 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 83 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 84 | 'x', 'y', 'z', 91, 92, 93, 94, 95, 85 | 96, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 86 | 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 87 | 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 88 | 'X', 'Y', 'Z', 123, 124, 125, 126, 127, 89 | 128, 129, 130, 131, 132, 133, 134, 135, 90 | 136, 137, 138, 139, 140, 141, 142, 143, 91 | 144, 145, 146, 147, 148, 149, 150, 151, 92 | 152, 153, 154, 155, 156, 157, 158, 159, 93 | 160, 161, 162, 163, 164, 165, 166, 167, 94 | 168, 169, 170, 171, 172, 173, 174, 175, 95 | 176, 177, 178, 179, 180, 181, 182, 183, 96 | 184, 185, 186, 187, 188, 189, 190, 191, 97 | 192, 193, 194, 195, 196, 197, 198, 199, 98 | 200, 201, 202, 203, 204, 205, 206, 207, 99 | 208, 209, 210, 211, 212, 213, 214, 215, 100 | 216, 217, 218, 219, 220, 221, 222, 223, 101 | 224, 225, 226, 227, 228, 229, 230, 231, 102 | 232, 233, 234, 235, 236, 237, 238, 239, 103 | 240, 241, 242, 243, 244, 245, 246, 247, 104 | 248, 249, 250, 251, 252, 253, 254, 255 105 | }; 106 | 107 | static TBYTE freq[] = { /* letter frequencies for mixed English/C */ 108 | 1, 2, 84, 151, 154, 155, 156, 157, 109 | 165, 246, 250, 3, 158, 7, 18, 29, 110 | 40, 51, 62, 73, 85, 96, 107, 118, 111 | 129, 140, 147, 148, 149, 150, 152, 153, 112 | 255, 182, 224, 205, 174, 176, 180, 217, 113 | 233, 232, 236, 187, 235, 228, 234, 226, 114 | 222, 219, 211, 195, 188, 193, 185, 184, 115 | 191, 183, 201, 229, 181, 220, 194, 162, 116 | 163, 208, 186, 202, 200, 218, 198, 179, 117 | 178, 214, 166, 170, 207, 199, 209, 206, 118 | 204, 160, 212, 216, 215, 192, 175, 173, 119 | 243, 172, 161, 190, 203, 189, 164, 230, 120 | 167, 248, 227, 244, 242, 255, 241, 231, 121 | 240, 253, 169, 210, 245, 237, 249, 247, 122 | 239, 168, 252, 251, 254, 238, 223, 221, 123 | 213, 225, 177, 197, 171, 196, 159, 4, 124 | 5, 6, 8, 9, 10, 11, 12, 13, 125 | 14, 15, 16, 17, 19, 20, 21, 22, 126 | 23, 24, 25, 26, 27, 28, 30, 31, 127 | 32, 33, 34, 35, 36, 37, 38, 39, 128 | 41, 42, 43, 44, 45, 46, 47, 48, 129 | 49, 50, 52, 53, 54, 55, 56, 57, 130 | 58, 59, 60, 61, 63, 64, 65, 66, 131 | 67, 68, 69, 70, 71, 72, 74, 75, 132 | 76, 77, 78, 79, 80, 81, 82, 83, 133 | 86, 87, 88, 89, 90, 91, 92, 93, 134 | 94, 95, 97, 98, 99, 100, 101, 102, 135 | 103, 104, 105, 106, 108, 109, 110, 111, 136 | 112, 113, 114, 115, 116, 117, 119, 120, 137 | 121, 122, 123, 124, 125, 126, 127, 128, 138 | 130, 131, 132, 133, 134, 135, 136, 137, 139 | 138, 139, 141, 142, 143, 144, 145, 146 140 | }; 141 | #endif 142 | 143 | 144 | 145 | typedef ptrdiff_t STRLEN; 146 | 147 | void sv_free(register SV *sv) 148 | { 149 | if (sv->xpv_pv) 150 | delete [] sv->xpv_pv; 151 | delete sv; 152 | } 153 | 154 | 155 | #if 0 156 | TCHAR *fbm_instr( 157 | TBYTE *big, 158 | register TBYTE *bigend, 159 | SV *littlestr,int mline,int kmode) 160 | { 161 | register TBYTE *s; 162 | register int tmp; 163 | register int littlelen; 164 | register TBYTE *little; 165 | register TBYTE *table; 166 | register TBYTE *olds; 167 | register TBYTE *oldlittle; 168 | #ifdef KANJI 169 | TBYTE *tops = big; 170 | #endif 171 | 172 | if (SvTYPE(littlestr) != SVt_PVBM || !SvVALID(littlestr)) { 173 | int len = SvCUR(littlestr); 174 | TCHAR *l = SvPVX(littlestr); 175 | if (!len) 176 | return (TCHAR*)big; 177 | return ninstr((TCHAR*)big,(TCHAR*)bigend, l, l + len,kmode); 178 | } 179 | 180 | 181 | 182 | 183 | 184 | littlelen = SvCUR(littlestr); 185 | if (SvTAIL(littlestr) && !mline) { /* tail anchored? */ 186 | if (littlelen > bigend - big) 187 | return NULL; 188 | little = (TBYTE*)SvPVX(littlestr); 189 | if (SvCASEFOLD(littlestr)) { /* oops, fake it */ 190 | big = bigend - littlelen; /* just start near end */ 191 | if (bigend[-1] == '\n' && little[littlelen-1] != '\n') 192 | big--; 193 | } 194 | else { 195 | s = bigend - littlelen; 196 | #ifdef KANJI 197 | if (*s == *little 198 | // && (!(hints & HINT_KANJI_STRING) 199 | // || kpart((TCHAR *)tops,(TCHAR *)s)!=KPART_KANJI_2) 200 | && (kpart((TCHAR *)tops,(TCHAR *)s)!=KPART_KANJI_2) 201 | && memcmp(s,little,littlelen)==0) 202 | #else 203 | if (*s == *little && memcmp((TCHAR*)s,(TCHAR*)little,littlelen*sizeof(TCHAR))==0) 204 | #endif 205 | return (TCHAR*)s; /* how sweet it is */ 206 | else if (bigend[-1] == '\n' && little[littlelen-1] != '\n' 207 | && s > big) { 208 | s--; 209 | #ifdef KANJI 210 | if (*s == *little 211 | // && (!(hints & HINT_KANJI_STRING) 212 | // || kpart((TCHAR *)tops,(TCHAR *)s)!=KPART_KANJI_2) 213 | && ( 214 | kpart((TCHAR *)tops,(TCHAR *)s)!=KPART_KANJI_2) 215 | && memcmp(s,little,littlelen)==0) 216 | #else 217 | if (*s == *little && memcmp((TCHAR*)s,(TCHAR*)little,littlelen*sizeof(TCHAR))==0) 218 | #endif 219 | return (TCHAR*)s; 220 | } 221 | return NULL; 222 | } 223 | } 224 | table = (TBYTE*)(SvPVX(littlestr) + littlelen + 1); 225 | if (--littlelen >= bigend - big) 226 | return NULL; 227 | s = big + littlelen; 228 | oldlittle = little = table - 2; 229 | if (SvCASEFOLD(littlestr)) { /* case insensitive? */ 230 | if (s < bigend) { 231 | top1: 232 | /*SUPPRESS 560*/ 233 | if (tmp = table[*s]) { 234 | #ifdef POINTERRIGOR 235 | if (bigend - s > tmp) { 236 | s += tmp; 237 | goto top1; 238 | } 239 | #else 240 | if ((s += tmp) < bigend) 241 | goto top1; 242 | #endif 243 | return NULL; 244 | } 245 | else { 246 | tmp = littlelen; /* less expensive than calling strncmp() */ 247 | olds = s; 248 | while (tmp--) { 249 | if (*--s == *--little || fold[*s] == *little) 250 | continue; 251 | s = olds + 1; /* here we pay the price for failure */ 252 | little = oldlittle; 253 | if (s < bigend) /* fake up continue to outer loop */ 254 | goto top1; 255 | return NULL; 256 | } 257 | #ifdef KANJI 258 | // if ((hints & HINT_KANJI_STRING) 259 | // && kpart((TCHAR *)tops,(TCHAR *)s) == 2) { 260 | if ( 261 | kpart((TCHAR *)tops,(TCHAR *)s) == 2) { 262 | s = olds + 1; 263 | little = oldlittle; 264 | if (s < bigend) 265 | goto top1; 266 | return NULL; 267 | } 268 | #endif 269 | return (TCHAR *)s; 270 | } 271 | } 272 | } 273 | else { 274 | if (s < bigend) { 275 | top2: 276 | /*SUPPRESS 560*/ 277 | if (tmp = table[*s]) { 278 | #ifdef POINTERRIGOR 279 | if (bigend - s > tmp) { 280 | s += tmp; 281 | goto top2; 282 | } 283 | #else 284 | if ((s += tmp) < bigend) 285 | goto top2; 286 | #endif 287 | return NULL; 288 | } 289 | else { 290 | tmp = littlelen; /* less expensive than calling strncmp() */ 291 | olds = s; 292 | while (tmp--) { 293 | if (*--s == *--little) 294 | continue; 295 | s = olds + 1; /* here we pay the price for failure */ 296 | little = oldlittle; 297 | if (s < bigend) /* fake up continue to outer loop */ 298 | goto top2; 299 | return NULL; 300 | } 301 | #ifdef KANJI 302 | if (kpart((TCHAR *)tops,(TCHAR *)s) == 2) { 303 | s = olds + 1; 304 | little = oldlittle; 305 | if (s < bigend) 306 | goto top1; 307 | return NULL; 308 | } 309 | #endif 310 | return (TCHAR *)s; 311 | } 312 | } 313 | } 314 | return NULL; 315 | } 316 | #endif 317 | 318 | 319 | //////////////////////////////////////////////////////////////////////////// 320 | /* Note: sv_setsv() should not be called with a source string that needs 321 | * to be reused, since it may destroy the source string if it is marked 322 | * as temporary. 323 | */ 324 | 325 | void sv_setpvn(register SV *sv, register const TCHAR *ptr, register STRLEN len); 326 | 327 | 328 | 329 | void sv_grow(SV* sv,STRLEN len) 330 | { 331 | len += 512; 332 | TCHAR *ptr = new (std::nothrow) TCHAR[len]; 333 | if (ptr == NULL) 334 | throw std::bad_alloc(); 335 | 336 | memcpy(ptr,sv->xpv_pv,sv->xpv_cur*sizeof(TCHAR)); 337 | ptr[sv->xpv_cur] = '\0'; 338 | if (sv->xpv_pv) 339 | delete [] sv->xpv_pv; 340 | sv->xpv_pv = ptr; 341 | sv->xpv_len = len; 342 | } 343 | 344 | 345 | 346 | void 347 | sv_setsv(SV *dstr, register SV *sstr) 348 | { 349 | if (sstr == dstr) 350 | return; 351 | ptrdiff_t len = sstr->xpv_cur; 352 | SvGROW(dstr, len +1); 353 | 354 | memcpy(dstr->xpv_pv,sstr->xpv_pv,len*sizeof(TCHAR)); 355 | dstr->xpv_cur = len; 356 | *SvEND(dstr) = '\0'; 357 | dstr->sv_flags = sstr->sv_flags; 358 | } 359 | 360 | 361 | void sv_catpvn(SV* sv,const TCHAR*ptr,STRLEN len) 362 | { 363 | SvGROW(sv, sv->xpv_cur + len +1); 364 | 365 | memcpy(sv->xpv_pv + sv->xpv_cur,ptr,len*sizeof(TCHAR)); 366 | sv->xpv_cur += len; 367 | *SvEND(sv) = '\0'; 368 | } 369 | 370 | 371 | void sv_setpvn(register SV *sv, register const TCHAR *ptr, register STRLEN len) 372 | { 373 | SvGROW(sv, len + 1 < 512 ? 512:len + 1); 374 | memcpy(SvPVX(sv),ptr,len*sizeof(TCHAR)); 375 | // SvCUR_set(sv, len); 376 | sv->xpv_cur = len; 377 | *SvEND(sv) = '\0'; 378 | (void)SvPOK_only(sv); /* validate pointer */ 379 | } 380 | 381 | BOOL sv_upgrade(register SV* sv, int mt) 382 | { 383 | if (SvTYPE(sv) == mt) 384 | return TRUE; 385 | sv->sv_flags |= SVt_PVBM; 386 | return TRUE; 387 | } 388 | 389 | 390 | 391 | #if 0 392 | void fbm_compile(SV *sv, int iflag) 393 | { 394 | register TBYTE *s; 395 | register TBYTE *table; 396 | register int i; 397 | register int len = SvCUR(sv); 398 | int rarest = 0; 399 | int frequency = 256; 400 | 401 | if (len > 255) 402 | return; /* can't have offsets that big */ 403 | SvGROW(sv,len+258); 404 | table = (TBYTE*)(SvPVX(sv) + len + 1); 405 | s = table - 2; 406 | for (i = 0; i < 256; i++) { 407 | table[i] = len; 408 | } 409 | i = 0; 410 | while (s >= (TBYTE*)(SvPVX(sv))) 411 | { 412 | if (table[*s] == len) { 413 | #ifndef pdp11 414 | if (iflag) 415 | table[*s] = table[fold[*s]] = i; 416 | #else 417 | if (iflag) { 418 | int j; 419 | j = fold[*s]; 420 | table[j] = i; 421 | table[*s] = i; 422 | } 423 | #endif /* pdp11 */ 424 | else 425 | table[*s] = i; 426 | } 427 | s--,i++; 428 | } 429 | sv_upgrade(sv, SVt_PVBM); 430 | //baba sv_magic(sv, Nullsv, 'B', Nullch, 0); /* deep magic */ 431 | SvVALID_on(sv); 432 | 433 | s = (TBYTE*)(SvPVX(sv)); /* deeper magic */ 434 | if (iflag) { 435 | register int tmp, foldtmp; 436 | SvCASEFOLD_on(sv); 437 | if (SvCASEFOLD(sv)) { 438 | int ff = 1; 439 | ff = 3; 440 | } 441 | for (i = 0; i < len; i++) { 442 | tmp=freq[s[i]]; 443 | foldtmp=freq[fold[s[i]]]; 444 | if (tmp < frequency && foldtmp < frequency) { 445 | rarest = i; 446 | /* choose most frequent among the two */ 447 | frequency = (tmp > foldtmp) ? tmp : foldtmp; 448 | } 449 | } 450 | } 451 | else { 452 | for (i = 0; i < len; i++) { 453 | if (freq[s[i]] < frequency) { 454 | rarest = i; 455 | frequency = freq[s[i]]; 456 | } 457 | } 458 | } 459 | BmRARE(sv) = s[rarest]; 460 | BmPREVIOUS(sv) = rarest; 461 | } 462 | #endif 463 | 464 | 465 | SV *newSVpv(TCHAR *s, STRLEN len) 466 | { 467 | register SV *sv; 468 | sv = new (std::nothrow) SV; 469 | if (sv == NULL) 470 | throw std::bad_alloc(); 471 | memset(sv,0,sizeof(SV)); 472 | 473 | SvREFCNT(sv) = 1; 474 | SvFLAGS(sv) = 0; 475 | if (!len) 476 | len = _tcslen(s); 477 | sv_setpvn(sv,s,len); 478 | return sv; 479 | } 480 | 481 | int kpart(TCHAR *pLim, TCHAR *pChr) 482 | { 483 | register TCHAR *p = pChr - 1; 484 | register int ct = 0; 485 | 486 | if (NULL == pLim || NULL == pChr) return 0 ; 487 | 488 | while (p >= pLim && iskanji(*p)) { 489 | p--; 490 | ct++; 491 | } 492 | return (ct & 1) ? 2 : iskanji(*pChr); 493 | } 494 | 495 | 496 | 497 | /* same as instr but allow embedded nulls */ 498 | 499 | TCHAR * 500 | ninstr( 501 | register TCHAR *big, 502 | register TCHAR *bigend, 503 | TCHAR *little, 504 | TCHAR *lend,int kmode) 505 | { 506 | register TCHAR *s, *x; 507 | register int first = *little; 508 | register TCHAR *littleend = lend; 509 | 510 | if (!first && little >= littleend) 511 | return big; 512 | if (bigend - big < littleend - little) 513 | return NULL; 514 | bigend -= littleend - little++; 515 | if (kmode) goto kproc; 516 | while (big <= bigend) { 517 | if (*big++ != first) 518 | continue; 519 | 520 | for (x=big,s=little; s < littleend; /**/ ) { 521 | if (*s++ != *x++) { 522 | s--; 523 | break; 524 | } 525 | } 526 | if (s >= littleend) 527 | return big-1; 528 | } 529 | return NULL; 530 | 531 | kproc: 532 | int k = 0; 533 | while (big <= bigend) { 534 | if (k) { 535 | big++; k = 0; 536 | continue; 537 | } 538 | k = iskanji(*big); 539 | if (*big++ != first) 540 | continue; 541 | 542 | for (x=big,s=little; s < littleend; /**/ ) { 543 | if (*s++ != *x++) { 544 | s--; 545 | break; 546 | } 547 | } 548 | if (s >= littleend) 549 | return big-1; 550 | } 551 | return NULL; 552 | 553 | 554 | } 555 | 556 | 557 | #if 0 558 | BOOL iskanji(int c) { 559 | return sjis_tab[c & 0xff]; 560 | } 561 | #endif 562 | 563 | } // namespace 564 | -------------------------------------------------------------------------------- /src/sv.h: -------------------------------------------------------------------------------- 1 | /* sv.h 2 | * 3 | * Copyright (c) 1991-1994, Larry Wall 4 | * 5 | * You may distribute under the terms of either the GNU General Public 6 | * License or the Artistic License, as specified in the README file. 7 | * 8 | */ 9 | // 2006.08.29 updated by K.Takata 10 | 11 | 12 | #ifndef BREGONIG_NS 13 | #ifdef UNICODE 14 | #define BREGONIG_NS unicode 15 | #else 16 | #define BREGONIG_NS ansi 17 | #endif 18 | #endif 19 | 20 | namespace BREGONIG_NS { 21 | 22 | #ifndef TRUE 23 | //#define BOOL int 24 | #define TRUE 1 25 | #define FALSE 0 26 | typedef int BOOL; 27 | #endif 28 | 29 | /* 30 | #define U16 unsigned short 31 | #define U8 unsigned char 32 | #define U32 unsigned int 33 | */ 34 | typedef unsigned short U16; 35 | typedef unsigned char U8; 36 | typedef unsigned int U32; 37 | 38 | 39 | typedef enum { 40 | SVt_NULL, /* 0 */ 41 | SVt_IV, /* 1 */ 42 | SVt_NV, /* 2 */ 43 | SVt_RV, /* 3 */ 44 | SVt_PV, /* 4 */ 45 | SVt_PVIV, /* 5 */ 46 | SVt_PVNV, /* 6 */ 47 | SVt_PVMG, /* 7 */ 48 | SVt_PVBM, /* 8 */ 49 | SVt_PVLV, /* 9 */ 50 | SVt_PVAV, /* 10 */ 51 | SVt_PVHV, /* 11 */ 52 | SVt_PVCV, /* 12 */ 53 | SVt_PVGV, /* 13 */ 54 | SVt_PVFM, /* 14 */ 55 | SVt_PVIO /* 15 */ 56 | } svtype; 57 | 58 | /* Using C's structural equivalence to help emulate C++ inheritance here... */ 59 | struct sv { 60 | // void* sv_any; /* pointer to something */ 61 | int sv_refcnt; /* how many references to us */ 62 | int sv_flags; /* what we are */ 63 | // struct xpvbm sv_xpv; /* struct xpvbm */ 64 | TCHAR* xpv_pv; /* pointer to malloced string */ 65 | ptrdiff_t xpv_cur; /* length of xpv_pv as a C string */ 66 | ptrdiff_t xpv_len; /* allocated size */ 67 | int xbm_useful; /* is this constant pattern being useful? */ 68 | int xbm_previous; /* how many characters in string before rare? */ 69 | TCHAR xbm_rare; /* rarest character in string */ 70 | }; 71 | 72 | typedef struct sv SV; 73 | 74 | 75 | #define SvFLAGS(sv) (sv)->sv_flags 76 | 77 | #define SvREFCNT(sv) (sv)->sv_refcnt 78 | #define SvREFCNT_inc(sv) ((Sv = (SV*)(sv)), \ 79 | (Sv && ++SvREFCNT(Sv)), (SV*)Sv) 80 | #define SvREFCNT_dec(sv) sv_free((SV*)sv) 81 | 82 | #define SVTYPEMASK 0xff 83 | #define SvTYPE(sv) ((sv)->sv_flags & SVTYPEMASK) 84 | 85 | #define SvUPGRADE(sv, mt) (SvTYPE(sv) >= mt || sv_upgrade(sv, mt)) 86 | 87 | #define SVs_PADBUSY 0x00000100 /* reserved for tmp or my already */ 88 | #define SVs_PADTMP 0x00000200 /* in use as tmp */ 89 | #define SVs_PADMY 0x00000400 /* in use a "my" variable */ 90 | #define SVs_TEMP 0x00000800 /* string is stealable? */ 91 | #define SVs_OBJECT 0x00001000 /* is "blessed" */ 92 | #define SVs_GMG 0x00002000 /* has magical get method */ 93 | #define SVs_SMG 0x00004000 /* has magical set method */ 94 | #define SVs_RMG 0x00008000 /* has random magical methods */ 95 | 96 | #define SVf_IOK 0x00010000 /* has valid public integer value */ 97 | #define SVf_NOK 0x00020000 /* has valid public numeric value */ 98 | #define SVf_POK 0x00040000 /* has valid public pointer value */ 99 | #define SVf_ROK 0x00080000 /* has a valid reference pointer */ 100 | 101 | #define SVf_FAKE 0x00100000 /* glob or lexical is just a copy */ 102 | #define SVf_OOK 0x00200000 /* has valid offset value */ 103 | #define SVf_BREAK 0x00400000 /* refcnt is artificially low */ 104 | #define SVf_READONLY 0x00800000 /* may not be modified */ 105 | 106 | #define SVf_THINKFIRST (SVf_READONLY|SVf_ROK) 107 | 108 | #define SVp_IOK 0x01000000 /* has valid non-public integer value */ 109 | #define SVp_NOK 0x02000000 /* has valid non-public numeric value */ 110 | #define SVp_POK 0x04000000 /* has valid non-public pointer value */ 111 | #define SVp_SCREAM 0x08000000 /* has been studied? */ 112 | 113 | #define SVf_OK (SVf_IOK|SVf_NOK|SVf_POK|SVf_ROK| \ 114 | SVp_IOK|SVp_NOK|SVp_POK) 115 | 116 | 117 | #define PRIVSHIFT 8 118 | 119 | /* Some private flags. */ 120 | 121 | #define SVpfm_COMPILED 0x80000000 122 | 123 | #define SVpbm_VALID 0x80000000 124 | #define SVpbm_CASEFOLD 0x40000000 125 | #define SVpbm_TAIL 0x20000000 126 | #define SVphv_SHAREKEYS 0x20000000 /* keys live on shared string table */ 127 | #define SVphv_LAZYDEL 0x40000000 /* entry in xhv_eiter must be deleted */ 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | /* The following macros define implementation-independent predicates on SVs. */ 136 | 137 | #define SvNIOK(sv) (SvFLAGS(sv) & (SVf_IOK|SVf_NOK)) 138 | #define SvNIOKp(sv) (SvFLAGS(sv) & (SVp_IOK|SVp_NOK)) 139 | #define SvNIOK_off(sv) (SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK| \ 140 | SVp_IOK|SVp_NOK)) 141 | 142 | #define SvOK(sv) (SvFLAGS(sv) & SVf_OK) 143 | 144 | #define SvOK_off(sv) (SvFLAGS(sv) &= ~SVf_OK, SvOOK_off(sv)) 145 | 146 | #define SvOKp(sv) (SvFLAGS(sv) & (SVp_IOK|SVp_NOK|SVp_POK)) 147 | #define SvIOKp(sv) (SvFLAGS(sv) & SVp_IOK) 148 | #define SvIOKp_on(sv) (SvOOK_off(sv), SvFLAGS(sv) |= SVp_IOK) 149 | #define SvNOKp(sv) (SvFLAGS(sv) & SVp_NOK) 150 | #define SvNOKp_on(sv) (SvFLAGS(sv) |= SVp_NOK) 151 | #define SvPOKp(sv) (SvFLAGS(sv) & SVp_POK) 152 | #define SvPOKp_on(sv) (SvFLAGS(sv) |= SVp_POK) 153 | 154 | #define SvIOK(sv) (SvFLAGS(sv) & SVf_IOK) 155 | #define SvIOK_on(sv) (SvOOK_off(sv), \ 156 | SvFLAGS(sv) |= (SVf_IOK|SVp_IOK)) 157 | #define SvIOK_off(sv) (SvFLAGS(sv) &= ~(SVf_IOK|SVp_IOK)) 158 | #define SvIOK_only(sv) (SvOOK_off(sv), SvOK_off(sv), \ 159 | SvFLAGS(sv) |= (SVf_IOK|SVp_IOK)) 160 | 161 | #define SvNOK(sv) (SvFLAGS(sv) & SVf_NOK) 162 | #define SvNOK_on(sv) (SvFLAGS(sv) |= (SVf_NOK|SVp_NOK)) 163 | #define SvNOK_off(sv) (SvFLAGS(sv) &= ~(SVf_NOK|SVp_NOK)) 164 | #define SvNOK_only(sv) (SvOK_off(sv), \ 165 | SvFLAGS(sv) |= (SVf_NOK|SVp_NOK)) 166 | 167 | #define SvPOK(sv) (SvFLAGS(sv) & SVf_POK) 168 | #define SvPOK_on(sv) (SvFLAGS(sv) |= (SVf_POK|SVp_POK)) 169 | #define SvPOK_off(sv) (SvFLAGS(sv) &= ~(SVf_POK|SVp_POK)) 170 | 171 | #define SvPOK_only(sv) (SvFLAGS(sv) &= ~SVf_OK, \ 172 | SvFLAGS(sv) |= (SVf_POK|SVp_POK)) 173 | 174 | #define SvOOK(sv) (SvFLAGS(sv) & SVf_OOK) 175 | #define SvOOK_on(sv) (SvIOK_off(sv), SvFLAGS(sv) |= SVf_OOK) 176 | #define SvOOK_off(sv) (SvOOK(sv) && sv_backoff(sv)) 177 | 178 | #define SvFAKE(sv) (SvFLAGS(sv) & SVf_FAKE) 179 | #define SvFAKE_on(sv) (SvFLAGS(sv) |= SVf_FAKE) 180 | #define SvFAKE_off(sv) (SvFLAGS(sv) &= ~SVf_FAKE) 181 | 182 | #define SvROK(sv) (SvFLAGS(sv) & SVf_ROK) 183 | #define SvROK_on(sv) (SvFLAGS(sv) |= SVf_ROK) 184 | 185 | 186 | 187 | 188 | 189 | #define SvSCREAM(sv) (SvFLAGS(sv) & SVp_SCREAM) 190 | #define SvSCREAM_on(sv) (SvFLAGS(sv) |= SVp_SCREAM) 191 | #define SvSCREAM_off(sv) (SvFLAGS(sv) &= ~SVp_SCREAM) 192 | 193 | #define SvCOMPILED(sv) (SvFLAGS(sv) & SVpfm_COMPILED) 194 | #define SvCOMPILED_on(sv) (SvFLAGS(sv) |= SVpfm_COMPILED) 195 | #define SvCOMPILED_off(sv) (SvFLAGS(sv) &= ~SVpfm_COMPILED) 196 | 197 | #define SvTAIL(sv) (SvFLAGS(sv) & SVpbm_TAIL) 198 | #define SvTAIL_on(sv) (SvFLAGS(sv) |= SVpbm_TAIL) 199 | #define SvTAIL_off(sv) (SvFLAGS(sv) &= ~SVpbm_TAIL) 200 | 201 | #define SvCASEFOLD(sv) (SvFLAGS(sv) & SVpbm_CASEFOLD) 202 | #define SvCASEFOLD_on(sv) (SvFLAGS(sv) |= SVpbm_CASEFOLD) 203 | #define SvCASEFOLD_off(sv) (SvFLAGS(sv) &= ~SVpbm_CASEFOLD) 204 | 205 | #define SvVALID(sv) (SvFLAGS(sv) & SVpbm_VALID) 206 | #define SvVALID_on(sv) (SvFLAGS(sv) |= SVpbm_VALID) 207 | #define SvVALID_off(sv) (SvFLAGS(sv) &= ~SVpbm_VALID) 208 | 209 | 210 | #define SvPVX(sv) sv->xpv_pv 211 | #define SvPVXx(sv) SvPVX(sv) 212 | #define SvCUR(sv) sv->xpv_cur 213 | #define SvLEN(sv) sv->xpv_len 214 | #define SvLENx(sv) SvLEN(sv) 215 | #define SvEND(sv) (sv->xpv_pv + sv->xpv_cur) 216 | #define SvENDx(sv) ((Sv = (sv)), SvEND(Sv)) 217 | 218 | #define BmRARE(sv) sv->xbm_rare 219 | #define BmUSEFUL(sv) sv->xbm_useful 220 | #define BmPREVIOUS(sv) sv->xbm_previous 221 | 222 | 223 | 224 | 225 | 226 | #define SAVEt_ITEM 0 227 | #define SAVEt_SV 1 228 | #define SAVEt_AV 2 229 | #define SAVEt_HV 3 230 | #define SAVEt_INT 4 231 | #define SAVEt_LONG 5 232 | #define SAVEt_I32 6 233 | #define SAVEt_IV 7 234 | #define SAVEt_SPTR 8 235 | #define SAVEt_APTR 9 236 | #define SAVEt_HPTR 10 237 | #define SAVEt_PPTR 11 238 | #define SAVEt_NSTAB 12 239 | #define SAVEt_SVREF 13 240 | #define SAVEt_GP 14 241 | #define SAVEt_FREESV 15 242 | #define SAVEt_FREEOP 16 243 | #define SAVEt_FREEPV 17 244 | #define SAVEt_CLEARSV 18 245 | #define SAVEt_DELETE 19 246 | #define SAVEt_DESTRUCTOR 20 247 | #define SAVEt_REGCONTEXT 21 248 | 249 | #define SSCHECK(need) if (savestack_ix + need > savestack_max) savestack_grow() 250 | #define SSPUSHINT(i) (savestack[savestack_ix++].any_i32 = (int)(i)) 251 | #define SSPUSHLONG(i) (savestack[savestack_ix++].any_long = (long)(i)) 252 | #define SSPUSHIV(i) (savestack[savestack_ix++].any_iv = (IV)(i)) 253 | #define SSPUSHPTR(p) (savestack[savestack_ix++].any_ptr = (void*)(p)) 254 | #define SSPUSHDPTR(p) (savestack[savestack_ix++].any_dptr = (p)) 255 | #define SSPOPINT (savestack[--savestack_ix].any_i32) 256 | #define SSPOPLONG (savestack[--savestack_ix].any_long) 257 | #define SSPOPIV (savestack[--savestack_ix].any_iv) 258 | #define SSPOPPTR (savestack[--savestack_ix].any_ptr) 259 | #define SSPOPDPTR (savestack[--savestack_ix].any_dptr) 260 | 261 | 262 | TCHAR *ninstr(register TCHAR *big,register TCHAR *bigend,TCHAR *little, 263 | TCHAR *lend,int kmode); 264 | //TCHAR * fbm_instr(unsigned TCHAR*,register unsigned TCHAR *,SV*,int mline,int kmode); 265 | TCHAR * fbm_instr(TBYTE*,register TBYTE *,SV*,int mline,int kmode); 266 | BOOL sv_upgrade(register SV*, int);// sv.spp 267 | SV *newSVpv(TCHAR*,ptrdiff_t);//sv.cpp 268 | void sv_catpvn(register SV*,register const TCHAR*,register ptrdiff_t);//sv.cpp 269 | void sv_setpvn(register SV*,register const TCHAR*,register ptrdiff_t);//sv.cpp 270 | void sv_setsv(SV*,SV*);//sv.cpp 271 | 272 | void fbm_compile (SV* sv, int iflag); 273 | void sv_free (SV* sv); 274 | TCHAR* savepvn (TCHAR* sv, int len); 275 | unsigned long scan_hex(const TCHAR*, int,int*); 276 | unsigned long scan_oct(const TCHAR*, int,int*); 277 | 278 | #ifndef iskanji 279 | #include 280 | //BOOL iskanji(int c); 281 | #define iskanji(c) _ismbblead(c) 282 | #endif 283 | 284 | } // namespace 285 | -------------------------------------------------------------------------------- /src/version.h: -------------------------------------------------------------------------------- 1 | 2 | #define BREGONIG_VERSION_MAJOR 4 3 | #define BREGONIG_VERSION_MINOR 20 4 | #define BREGONIG_VERSION_SUFFIX "" 5 | #define BREGONIG_COPYRIGHT_STRING "Copyright (C) 2006-2019 K.Takata" 6 | 7 | 8 | #define TOSTR_(a) #a 9 | #define BREGONIG_VERSION_TOSTR_(a,b,c) \ 10 | TOSTR_(a) "." TOSTR_(b) c 11 | #define BREGONIG_VERSION_STRING \ 12 | BREGONIG_VERSION_TOSTR_(BREGONIG_VERSION_MAJOR, BREGONIG_VERSION_MINOR, BREGONIG_VERSION_SUFFIX) 13 | 14 | -------------------------------------------------------------------------------- /srcfiles.lst: -------------------------------------------------------------------------------- 1 | Makefile 2 | bregexp.h 3 | bregonig.h 4 | bregonig.cpp 5 | subst.cpp 6 | bsplit.cpp 7 | btrans.cpp 8 | sv.h 9 | sv.cpp 10 | version.h 11 | dbgtrace.h 12 | mem_vc6.h 13 | afxres.h 14 | resource.h 15 | bregonig.rc 16 | sample\bregpool.h 17 | sample\sample.c 18 | sample\sample_bosubst.cpp 19 | sample\sample_match.cpp 20 | sample\sample_split.cpp 21 | sample\sample_subst.cpp 22 | sample\sample_trans.cpp 23 | python\bregonig.py 24 | python\BregPool.py 25 | python\sample_bomatch.py 26 | python\sample_bosubst.py 27 | python\sample_bosubst_utf8.py 28 | python\sample_match.py 29 | python\sample_match_utf16.py 30 | python\sample_split.py 31 | python\sample_subst.py 32 | python\sample_trans.py 33 | python\test_common.py 34 | python\test_crnl.py 35 | python\test_match.py 36 | python\test_subst.py 37 | bregonig.lib 38 | k2regexp.lib 39 | --------------------------------------------------------------------------------