├── LICENSE
├── Makefile
├── README.md
├── common.cpp
├── common.h
├── file.cpp
├── file.h
├── rdflags.cpp
├── rdflags.h
├── regex
    ├── Makefile
    ├── config.h
    ├── re_comp.h
    ├── regcomp.c
    ├── regex.c
    ├── regex.h
    ├── regex_internal.c
    ├── regex_internal.h
    └── regexec.c
├── scriptext.cpp
├── scriptext.def
├── string.cpp
└── string.h


/LICENSE:
--------------------------------------------------------------------------------
  1 | 		    GNU GENERAL PUBLIC LICENSE
  2 | 		       Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.
  5 |      51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 | 			    Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Library General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 | 		    GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 | 			    NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 | 		     END OF TERMS AND CONDITIONS
281 | 
282 | 	    How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License
307 |     along with this program; if not, write to the Free Software
308 |     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
309 | 
310 | 
311 | Also add information on how to contact you by electronic and paper mail.
312 | 
313 | If the program is interactive, make it output a short notice like this
314 | when it starts in an interactive mode:
315 | 
316 |     Gnomovision version 69, Copyright (C) year  name of author
317 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
318 |     This is free software, and you are welcome to redistribute it
319 |     under certain conditions; type `show c' for details.
320 | 
321 | The hypothetical commands `show w' and `show c' should show the appropriate
322 | parts of the General Public License.  Of course, the commands you use may
323 | be called something other than `show w' and `show c'; they could even be
324 | mouse-clicks or menu items--whatever suits your program.
325 | 
326 | You should also get your employer (if you work as a programmer) or your
327 | school, if any, to sign a "copyright disclaimer" for the program, if
328 | necessary.  Here is a sample; alter the names:
329 | 
330 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
331 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
332 | 
333 |   <signature of Ty Coon>, 1 April 1989
334 |   Ty Coon, President of Vice
335 | 
336 | This General Public License does not permit incorporating your program into
337 | proprietary programs.  If your program is a subroutine library, you may
338 | consider it more useful to permit linking proprietary applications with the
339 | library.  If this is what you want to do, use the GNU Library General
340 | Public License instead of this License.
341 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | LD=link
 2 | MAKE=nmake
 3 | RM=DEL
 4 | CP=COPY
 5 | 
 6 | !IF "$(TARGET_CPU)"=="x86"
 7 | CPU_DIR=i386
 8 | DIR_PSFIX=(x86)
 9 | !ELSEIF "$(TARGET_CPU)"=="x64"
10 | CPU_DIR=amd64
11 | DIR_PSFIX=(x64)
12 | !ELSEIF "$(TARGET_CPU)"=="IA64"
13 | CPU_DIR=ia64
14 | DIR_PSFIX=(x64)
15 | !ENDIF
16 | 
17 | !IFNDEF WINDBG_DIR
18 | WINDBG_DIR=%ProgramFiles%\Debugging Tools for Windows $(DIR_PSFIX)
19 | !ENDIF
20 | 
21 | !IF !DEFINED(CPU_DIR)
22 | !ERROR Target platform can't be deduced. Make sure the MS SDK building \
23 | environment is set and WINDBG_DIR is defined for non standard WinDbg \
24 | installation location.
25 | !ENDIF
26 | 
27 | INC=-I "$(WINDBG_DIR)\sdk\inc"
28 | CPPFLAGS_CMN=$(INC) -DREGEX_STATIC
29 | CPPFLAGS=$(CPPFLAGS_CMN) -Yucommon.h
30 | 
31 | LIB_DIR=-LIBPATH:"$(WINDBG_DIR)\sdk\lib\$(CPU_DIR)"
32 | LIBS=dbgeng.lib
33 | LDFLAGS=-DLL $(LIB_DIR) $(LIBS)
34 | 
35 | OBJS = rdflags.obj \
36 |        common.obj \
37 |        string.obj \
38 |        file.obj \
39 |        scriptext.obj
40 | 
41 | all: scriptext.dll
42 | 
43 | clean:
44 | 	$(RM) *.obj *.dll *.exp *.lib
45 | 
46 | cleanall: clean
47 | 	$(RM) *.pch
48 | 	cd .\regex
49 | 	$(MAKE) clean
50 | 	cd ..
51 | 
52 | install: scriptext.dll
53 | 	$(CP) $** "$(WINDBG_DIR)\winext"
54 | 
55 | .\regex\regex.obj:
56 | 	cd .\regex
57 | 	$(MAKE)
58 | 	cd ..
59 | 
60 | # precompiled common headers
61 | common.obj: common.cpp
62 | 	$(CPP) -c $(CPPFLAGS_CMN) -Yc$*.h $**
63 | 
64 | scriptext.dll: .\regex\regex.obj $(OBJS)
65 | 	$(LD) -DEF:scriptext.def $(LDFLAGS) $** /out:$@
66 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | WinDbg scripting extensions
  2 | ===========================
  3 | 
  4 | Among many WinDbg scripting language flaws is a complete lack of advanced text
  5 | searching/replacing functionality, especially associated with aliases. The library
  6 | provides simple, "s" sed like command to search and replace regexp patterns in
  7 | strings and WinDbg aliases. Together with WinDbg functionality of setting aliases
  8 | to arbitrary WinDbg commands outputs, the library provides an effective tool to
  9 | write advanced WinDbg scripts without a need to reach for Python substitutions.
 10 | 
 11 | Additionally the library supplies simple set of commands to open, read, write and
 12 | close text/binary files. Of course this ain't shell nor Python but for simple
 13 | scripts should be sufficient.
 14 | 
 15 | NOTE: Since my contact with Windows platform and the WinDbg debugger is currently
 16 | rather occasional, the project is no more maintained. If someone is interested
 17 | to continue work on it, feel free to contact me.
 18 | 
 19 | scriptext help command output
 20 | -----------------------------
 21 | 
 22 |     scriptext: Various scripting utilities
 23 | 
 24 |     sxtr [-t num] input/pattern/replacement
 25 |         Look for a substring of the input string matching the extended POSIX RE pattern.
 26 |         If the pattern matches, the substring is extracted and modified according to
 27 |         the replacement string. The input string may contain alphanumeric characters
 28 |         plus '_' only, or must be enclosed in '' or "". If it's enclosed in [], then
 29 |         the input string specifies an alias name containing a string to process.
 30 |         Delimiter character is recognized as the first one after the input string. All
 31 |         the strings may contain escaped characters.
 32 |         -t: If specified, provides a pseudo-reg $t number where the matching result
 33 |             will be set: 0 - not matched, 1 - matched.
 34 | 
 35 |     fopn [-m mode] [-t num] fname
 36 |         Open a file with a name fname.
 37 |         -m: Open mode (C standard). "r+" by default.
 38 |         -t: Pseudo-reg $t number where a handle of the opened file will be written. In
 39 |             case of opening error zero will be written there. If not specified $t0 is
 40 |             taken.
 41 |     fwrt hndl input
 42 |         Write the input string to the file with the handle hndl. If the input string
 43 |         is enclosed in [] then it specifies an alias name containing a string to write.
 44 |         The input string may contain escaped characters.
 45 |     frdl hndl
 46 |         Read line from a file with the handle hndl. The file shall be opened for read
 47 |         in the text mode.
 48 |     fcls hndl
 49 |         Close a file with a handle hndl.
 50 | 
 51 |     help
 52 |         Display this help.
 53 | 
 54 | Compiling and Installing
 55 | ------------------------
 56 | 
 57 | Prerequisites:
 58 | 
 59 |  - MS SDK with `cl`, `link` and `nmake`; no need for MS Visual Studio,
 60 |  - Debugging Tools for Windows with its SDK,
 61 | 
 62 | Compilation:
 63 | 
 64 | Set required building environment depending on your target platform (x86/x64/ia64,
 65 | debug/release etc.) by calling MS SDK's `SetEnv.Cmd` script with proper arguments
 66 | and make the library:
 67 | 
 68 |     nmake
 69 | 
 70 | The result is `scriptext.dll` library located in the sources directory. Install
 71 | it by:
 72 | 
 73 |     nmake install
 74 | 
 75 | Loading and testing
 76 | -------------------
 77 | 
 78 | I. Loading
 79 | 
 80 |     0:000> .load scriptext
 81 | 
 82 | II. Help info
 83 | 
 84 |     0:000> !scriptext.help
 85 | 
 86 | III. Text searching and replacement
 87 | 
 88 | Create `test_alias` alias, look for `test` string in it and replace by `TEST`.
 89 | `$t0` pseudo register is set to 1 (the searched string is found in the alias).
 90 | 
 91 |     0:000> aS test_alias "Let's try test"
 92 | 
 93 |     0:000> al
 94 |       Alias            Value
 95 |      -------          -------
 96 |      test_alias       Let's try test
 97 | 
 98 |     0:000> !sxtr -t0 [test_alias]/(.*)test(.*)/\1TEST\2
 99 |     Let's try TEST
100 |     0:000> r @$t0
101 |     $t0=0000000000000001
102 | 
103 | Look for `test` and `TEST` substrings in `test_alias` and set `$t0` pseudo
104 | register as a result of searching. Searching is performed on the alias
105 | substitution.
106 | 
107 |     0:000> !sxtr "-t0 \"${test_alias}\"/test"
108 |     0:000> r @$t0
109 |     $t0=0000000000000001
110 | 
111 |     0:000> !sxtr "-t0 \"${test_alias}\"/TEST"
112 |     0:000> r @$t0
113 |     $t0=0000000000000000
114 | 
115 | IV. Simple grep
116 | 
117 | Create the following script with filename `simple_grep`:
118 | 
119 |     !fopn -m "r" ${$arg1}
120 |     .if (@$t0!=0) {
121 |       aS /c ${/v:ln} !frdl @$t0
122 |       .while (1) {
123 |         !sxtr -t1 [ln]/.+
124 |         .if (@$t1!=0) {
125 |           !sxtr -t1 [ln]/${$arg2}
126 |           .if (@$t1!=0) {
127 |             .echo ${ln}
128 |           }
129 |         } .else {
130 |           .break
131 |         }
132 |         aS /c ${/v:ln} !frdl @$t0
133 |       }
134 |       !fcls @$t0
135 |       ad /q ${/v:ln}
136 |     }
137 | 
138 | and execute it with 2 arguments: input file and searched RE pattern:
139 | 
140 |     0:000> $$>a< simple_grep file pattern
141 | 
142 | License
143 | -------
144 | 
145 | GNU GENERAL PUBLIC LICENSE v2. See LICENSE file for details.
146 | 


--------------------------------------------------------------------------------
/common.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright (c) 2015 Piotr Stolarz
  3 |    scriptext: Various scripting utilities WinDbg extension
  4 | 
  5 |    Distributed under the GNU General Public License (the License)
  6 |    see accompanying file LICENSE for details.
  7 | 
  8 |    This software is distributed WITHOUT ANY WARRANTY; without even the
  9 |    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 10 |    See the License for more information.
 11 |  */
 12 | 
 13 | #include "common.h"
 14 | #include <stdarg.h>
 15 | 
 16 | /*
 17 |     TLS and dbg client related funcs
 18 |  */
 19 | static DWORD tls_i=TLS_OUT_OF_INDEXES;
 20 | 
 21 | void set_tls_i(DWORD tls_i) {
 22 |     ::tls_i=tls_i;
 23 | }
 24 | 
 25 | DWORD get_tls_i(void) {
 26 |     return tls_i;
 27 | }
 28 | 
 29 | void set_client(PDEBUG_CLIENT4 Client) {
 30 |     if (tls_i!=TLS_OUT_OF_INDEXES) TlsSetValue(tls_i, Client);
 31 | }
 32 | 
 33 | PDEBUG_CLIENT4 get_client(void) {
 34 |     return (PDEBUG_CLIENT4)
 35 |         (tls_i!=TLS_OUT_OF_INDEXES ? TlsGetValue(tls_i) : NULL);
 36 | }
 37 | 
 38 | static void vdbgprintf(
 39 |     ULONG ctrl, ULONG mask, const char *pc_pref, const char *format, va_list args)
 40 | {
 41 |     IDebugControl *DebugControl=NULL;
 42 | 
 43 |     PDEBUG_CLIENT4 Client;
 44 |     if (!(Client=get_client())) goto finish;
 45 | 
 46 |     if (Client->QueryInterface(
 47 |         __uuidof(IDebugControl), (void **)&DebugControl)!=S_OK) goto finish;
 48 | 
 49 |     if (pc_pref) DebugControl->ControlledOutput(ctrl, mask, "%s: ", pc_pref);
 50 |     DebugControl->ControlledOutputVaList(ctrl, mask, format, args);
 51 | 
 52 | finish:
 53 |     if (DebugControl) DebugControl->Release();
 54 |     return;
 55 | }
 56 | 
 57 | #define DBGPRNT_METHOD(name, ctrl, pref)      \
 58 |     void name(const char *format, ...) {      \
 59 |         va_list args;                         \
 60 |         va_start(args, format);               \
 61 |         vdbgprintf(ctrl, DEBUG_OUTPUT_NORMAL, \
 62 |             pref, format, args);              \
 63 |         va_end(args);                         \
 64 |     }
 65 | 
 66 | DBGPRNT_METHOD(dbgprintf, DEBUG_OUTCTL_ALL_CLIENTS, NULL)
 67 | DBGPRNT_METHOD(dbg_dbgprintf, DEBUG_OUTCTL_ALL_CLIENTS, "DBG")
 68 | DBGPRNT_METHOD(info_dbgprintf, DEBUG_OUTCTL_ALL_CLIENTS, "INFO")
 69 | DBGPRNT_METHOD(warn_dbgprintf, DEBUG_OUTCTL_ALL_CLIENTS, "WARN")
 70 | DBGPRNT_METHOD(err_dbgprintf, DEBUG_OUTCTL_ALL_CLIENTS, "ERR")
 71 | DBGPRNT_METHOD(cdbgprintf, DEBUG_OUTCTL_THIS_CLIENT, NULL)
 72 | 
 73 | /*
 74 |     Memory access functions
 75 |  */
 76 | #define MEMACCESS_METHOD(name, func)                                         \
 77 |     ULONG name(ULONG64 addr, PVOID p_buf, ULONG buf_sz, PULONG p_cb)         \
 78 |     {                                                                        \
 79 |         ULONG ret=FALSE;                                                     \
 80 |         IDebugDataSpaces *DebugDataSpaces=NULL;                              \
 81 |                                                                              \
 82 |         PDEBUG_CLIENT4 Client;                                               \
 83 |         if (Client=get_client()) {                                           \
 84 |             if (Client->QueryInterface(__uuidof(IDebugDataSpaces),           \
 85 |                 (void **)&DebugDataSpaces)==S_OK) {                          \
 86 |                 ret=(DebugDataSpaces->func(addr, p_buf, buf_sz, p_cb)==S_OK);\
 87 |             }                                                                \
 88 |         }                                                                    \
 89 |                                                                              \
 90 |         if (DebugDataSpaces) DebugDataSpaces->Release();                     \
 91 |         return ret;                                                          \
 92 |     }
 93 | 
 94 | /* WdbgExts ReadMemory(), WriteMemory() analogous */
 95 | MEMACCESS_METHOD(read_memory, ReadVirtual)
 96 | MEMACCESS_METHOD(write_memory, WriteVirtual)
 97 | 
 98 | /* WdbgExts GetExpressionEx() analogous */
 99 | BOOL get_expression(PCSTR pc_expr, ULONG64 *p_val, PCSTR *ppc_rem)
100 | {
101 |     BOOL ret=FALSE;
102 |     IDebugControl *DebugControl=NULL;
103 | 
104 |     PDEBUG_CLIENT4 Client;
105 |     if (!(Client=get_client())) goto finish;
106 | 
107 |     if ((Client->QueryInterface(
108 |         __uuidof(IDebugControl), (void **)&DebugControl))!=S_OK) goto finish;
109 | 
110 |     ULONG rem_i;
111 |     DEBUG_VALUE val;
112 |     if (DebugControl->Evaluate(pc_expr, DEBUG_VALUE_INT64, &val, &rem_i)!=S_OK)
113 |         goto finish;
114 | 
115 |     if (ppc_rem) {
116 |         for (pc_expr+=rem_i; isspace(*pc_expr); pc_expr++);
117 |         *ppc_rem = pc_expr;
118 |     }
119 |     *p_val = (ULONG64)val.I64;
120 | 
121 |     ret=TRUE;
122 | finish:
123 |     if (DebugControl) DebugControl->Release();
124 |     return ret;
125 | }
126 | 
127 | #define HEXDIG2INT(c)   \
128 |     ((int)(('0'<=(c) && (c)<='9') ? (c)-'0' : \
129 |     (('A'<=(c) && (c)<='F') ? (c)-'A'+10 : \
130 |     (('a'<=(c) && (c)<='f') ? (c)-'a'+10 : -1))))
131 | 
132 | /* exported; see header for details */
133 | size_t stresc(char *pc_in, char endc, char *p_lstc)
134 | {
135 |     char c;
136 |     size_t i, j;
137 | 
138 |     for (i=j=0;
139 |         (c=pc_in[i], (c && c!=endc));
140 |         pc_in[j]=c, i++, j++)
141 |     {
142 |         if (c!='\\') continue;
143 | 
144 |         char esc=pc_in[i+1];
145 |         switch (esc)
146 |         {
147 |         case 'a': c='\a'; i++; break;
148 |         case 'b': c='\b'; i++; break;
149 |         case 'f': c='\f'; i++; break;
150 |         case 'n': c='\n'; i++; break;
151 |         case 'r': c='\r'; i++; break;
152 |         case 't': c='\t'; i++; break;
153 |         case 'v': c='\v'; i++; break;
154 |         case '\\': c='\\'; i++; break;
155 |         case '\'': c='\''; i++; break;
156 |         case '"': c='"'; i++; break;
157 |         case 'x':
158 |           {
159 |             /* hex encoded char */
160 |             int c1=HEXDIG2INT(pc_in[i+2]);
161 |             if (c1==-1) break;
162 | 
163 |             int c2=HEXDIG2INT(pc_in[i+3]);
164 |             if (c2==-1) break;
165 | 
166 |             i+=3;
167 |             c=(c1<<4 | c2);
168 |             break;
169 |           }
170 |         default:
171 |             if (endc && esc==endc) { c=esc; i++; }
172 |             break;
173 |         }
174 |     }
175 | 
176 |     if (p_lstc) *p_lstc=c;
177 |     pc_in[j]=0;
178 |     return i+1;
179 | }
180 | 


--------------------------------------------------------------------------------
/common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Copyright (c) 2015 Piotr Stolarz
 3 |    scriptext: Various scripting utilities WinDbg extension
 4 | 
 5 |    Distributed under the GNU General Public License (the License)
 6 |    see accompanying file LICENSE for details.
 7 | 
 8 |    This software is distributed WITHOUT ANY WARRANTY; without even the
 9 |    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10 |    See the License for more information.
11 |  */
12 | 
13 | #ifndef __SCRIPTEXT_COMMON_H__
14 | #define __SCRIPTEXT_COMMON_H__
15 | 
16 | #include <windows.h>
17 | #include <winnt.h>
18 | #include <stdio.h>
19 | #include <dbgeng.h>
20 | #include "rdflags.h"
21 | 
22 | #ifndef FALSE
23 | #define FALSE 0
24 | #endif
25 | 
26 | #ifndef TRUE
27 | #define TRUE !FALSE
28 | #endif
29 | 
30 | #ifndef NULL
31 | #define NULL 0
32 | #endif
33 | 
34 | #define RNDUP_DW(x)   ((((x)+3)>>2)<<2)
35 | 
36 | /* TLS and dbg client related functions */
37 | void set_tls_i(DWORD tls_i);
38 | DWORD get_tls_i(void);
39 | 
40 | /* Get/set debug client object */
41 | void set_client(PDEBUG_CLIENT4 Client);
42 | PDEBUG_CLIENT4 get_client(void);
43 | 
44 | /* dbg printf related functions */
45 | void dbgprintf(const char *format, ...);
46 | void dbg_dbgprintf(const char *format, ...);
47 | void info_dbgprintf(const char *format, ...);
48 | void warn_dbgprintf(const char *format, ...);
49 | void err_dbgprintf(const char *format, ...);
50 | void cdbgprintf(const char *format, ...);
51 | 
52 | /* Memory access functions */
53 | ULONG read_memory(ULONG64 addr, PVOID p_buf, ULONG buf_sz, PULONG p_cb);
54 | ULONG write_memory(ULONG64 addr, PVOID p_buf, ULONG buf_sz, PULONG p_cb);
55 | 
56 | /* Expression evaluation */
57 | BOOL get_expression(PCSTR pc_expr, ULONG64 *p_val, PCSTR *ppc_rem);
58 | 
59 | /* Replace escaped chars in string 'pc_in'; returns number of processed chars.
60 |    'p_lstc' gets last processed char ending the resulting string (\0 or 'endc').
61 |  */
62 | size_t stresc(char *pc_in, char endc=0, char *p_lstc=NULL);
63 | 
64 | #endif /* __SCRIPTEXT_COMMON_H__ */
65 | 


--------------------------------------------------------------------------------
/file.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright (c) 2015 Piotr Stolarz
  3 |    scriptext: Various scripting utilities WinDbg extension
  4 | 
  5 |    Distributed under the GNU General Public License (the License)
  6 |    see accompanying file LICENSE for details.
  7 | 
  8 |    This software is distributed WITHOUT ANY WARRANTY; without even the
  9 |    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 10 |    See the License for more information.
 11 |  */
 12 | 
 13 | #include "common.h"
 14 | #include "file.h"
 15 | 
 16 | /* exported; see header for details */
 17 | BOOL file_open(
 18 |     const char *pc_file, const char *pc_mode, const char *pc_prnm)
 19 | {
 20 |     BOOL ret=FALSE;
 21 |     FILE *fh=NULL;
 22 |     IDebugRegisters2 *DebugRegisters=NULL;
 23 | 
 24 |     if ((get_client()->QueryInterface(
 25 |         __uuidof(IDebugRegisters2), (void **)&DebugRegisters))!=S_OK)
 26 |         goto finish;
 27 | 
 28 |     fh = fopen(pc_file, pc_mode);
 29 |     if (!fh) err_dbgprintf("File opening error: %s\n", strerror(errno));
 30 | 
 31 |     DEBUG_VALUE fh_val;
 32 |     fh_val.Type = DEBUG_VALUE_INT64;
 33 |     fh_val.I64 = (sizeof(fh)<8 ? DEBUG_EXTEND64(fh) : (ULONG64)fh);
 34 |     fh_val.Nat = FALSE;
 35 | 
 36 |     ULONG pr_i;
 37 |     if (DebugRegisters->GetPseudoIndexByName(pc_prnm, &pr_i)==S_OK)
 38 |         if (DebugRegisters->SetPseudoValues(
 39 |             DEBUG_REGSRC_DEBUGGEE, 1, NULL, pr_i, &fh_val)==S_OK)
 40 |                 ret=TRUE;
 41 | 
 42 |     if (!ret) err_dbgprintf("Pseudo-reg access error\n");
 43 | finish:
 44 |     if (fh && !ret) fclose(fh);
 45 |     if (DebugRegisters) DebugRegisters->Release();
 46 |     return ret;
 47 | }
 48 | 
 49 | /* exported; see header for details */
 50 | BOOL file_wrtstr(FILE *fh, const char *pc_in)
 51 | {
 52 |     BOOL ret=FALSE;
 53 |     IDebugControl2 *DebugControl=NULL;
 54 | 
 55 |     char buf[3072];     /* default buffer */
 56 |     char *pc_ebuf=NULL; /* extra buffer */
 57 | 
 58 |     if (get_client()->QueryInterface(
 59 |         __uuidof(IDebugControl2), (void **)&DebugControl)!=S_OK) goto finish;
 60 | 
 61 |     strncpy(buf, pc_in, sizeof(buf));
 62 |     buf[sizeof(buf)-1]=0;
 63 | 
 64 |     char *pc_str;
 65 |     if (buf[0]=='[')
 66 |     {
 67 |         /* alias name */
 68 |         char *pc_aname = &buf[1];
 69 |         stresc(pc_aname, ']', NULL);
 70 |         if (!*pc_aname) goto finish;
 71 | 
 72 |         pc_str = pc_aname+strlen(pc_aname)+1;
 73 |         size_t rem_buf_sz = pc_str-&buf[0];
 74 | 
 75 |         /* get alias val size */
 76 |         ULONG aval_sz;
 77 |         if (DebugControl->GetTextReplacement(
 78 |             pc_aname, 0, NULL, 0, NULL, NULL, 0, &aval_sz)!=S_OK) goto finish;
 79 | 
 80 |         /* value read */
 81 |         aval_sz = RNDUP_DW(aval_sz+1);
 82 |         if (aval_sz > rem_buf_sz) {
 83 |             if (!(pc_str=pc_ebuf=(char*)malloc(aval_sz))) goto finish;
 84 |         }
 85 |         if (DebugControl->GetTextReplacement(
 86 |             pc_aname, 0, NULL, 0, NULL, pc_str, aval_sz, NULL)!=S_OK)
 87 |                 goto finish;
 88 |     } else
 89 |     if (buf[0]=='\'' || buf[0]=='"') {
 90 |         /* apostrophed string */
 91 |         pc_str = &buf[1];
 92 |         stresc(pc_str, buf[0], NULL);
 93 |     } else {
 94 |         /* string */
 95 |         pc_str=buf;
 96 |         stresc(pc_str);
 97 |     }
 98 |     if (!*pc_str) goto finish;
 99 | 
100 |     size_t cb = strlen(pc_str);
101 |     if (fwrite(pc_str, 1, cb, fh)==cb) ret=TRUE;
102 |     else err_dbgprintf("File write error\n");
103 | 
104 |     fflush(fh);
105 | 
106 | finish:
107 |     if (pc_ebuf) free(pc_ebuf);
108 |     if (DebugControl) DebugControl->Release();
109 |     return ret;
110 | }
111 | 
112 | /* exported; see header for details */
113 | void file_rdln(FILE *fh)
114 | {
115 |     char buf[0x100+1];
116 | 
117 |     for (int i=0, c; (c=fgetc(fh))!=EOF; i++) {
118 |         if (i>=sizeof(buf)-1) {
119 |             /* flush the read buffer */
120 |             i=0;
121 |             buf[sizeof(buf)-1]=0;
122 |             dbgprintf("%s", buf);
123 |         }
124 | 
125 |         if (c!='\n') buf[i]=(char)c;
126 |         else {
127 |             buf[i]=0;
128 |             dbgprintf("%s\n", buf);
129 |             break;
130 |         }
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/file.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Copyright (c) 2015 Piotr Stolarz
 3 |    scriptext: Various scripting utilities WinDbg extension
 4 | 
 5 |    Distributed under the GNU General Public License (the License)
 6 |    see accompanying file LICENSE for details.
 7 | 
 8 |    This software is distributed WITHOUT ANY WARRANTY; without even the
 9 |    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10 |    See the License for more information.
11 |  */
12 | 
13 | #ifndef __SCRIPTEXT_FILE_H__
14 | #define __SCRIPTEXT_FILE_H__
15 | 
16 | /* Open a file with name 'pc_file' and a mode 'pc_mode'. Write its handler under
17 |    a pseudo-register with name 'pc_prnm' (in case of file open error 0 will be
18 |    written). Returns TRUE on success.
19 |  */
20 | BOOL file_open(
21 |     const char *pc_file, const char *pc_mode, const char *pc_prnm);
22 | 
23 | /* Write a string/alias value to a file */
24 | BOOL file_wrtstr(FILE *fh, const char *pc_in);
25 | 
26 | /* Read a line from a file and print it on the console */
27 | void file_rdln(FILE *fh);
28 | 
29 | #endif /* __SCRIPTEXT_FILE_H__ */
30 | 


--------------------------------------------------------------------------------
/rdflags.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright (c) 2015 Piotr Stolarz
  3 |    scriptext: Various scripting utilities WinDbg extension
  4 | 
  5 |    Distributed under the GNU General Public License (the License)
  6 |    see accompanying file LICENSE for details.
  7 | 
  8 |    This software is distributed WITHOUT ANY WARRANTY; without even the
  9 |    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 10 |    See the License for more information.
 11 |  */
 12 | 
 13 | #include <common.h>
 14 | 
 15 | /* exported; see header for details */
 16 | size_t read_flags(const char *pc_in, flag_desc_t *p_fdsc)
 17 | {
 18 |     int state=0;
 19 |     char arg_delim;
 20 |     size_t i, j, arg_i;
 21 | 
 22 |     /* clear output fields */
 23 |     for (j=0; p_fdsc[j].c_flag; j++) {
 24 |         p_fdsc[j].is_pres=0;
 25 |         p_fdsc[j].has_dups=0;
 26 |         p_fdsc[j].has_esc=0;
 27 |         p_fdsc[j].arg_len=0;
 28 |         p_fdsc[j].pc_arg=NULL;
 29 |     }
 30 | 
 31 |     for (i=0; pc_in[i] && state>=0; i++)
 32 |     {
 33 |         switch (state)
 34 |         {
 35 |         /* read until flag prefix: '-' */
 36 |         case 0:
 37 |             if (!isspace(pc_in[i])) {
 38 |                 if (pc_in[i]=='-') state=1;
 39 |                 else {
 40 |                     /* no more flags; finish parsing */
 41 |                     i--; state=-1;
 42 |                 }
 43 |             }
 44 |             break;
 45 | 
 46 |         /* recognize flag marker */
 47 |         case 1:
 48 |             if (isspace(pc_in[i])) {
 49 |                 /* no flag provided; prepare for reading the next flag */
 50 |                 state=0;
 51 |             } else
 52 |             if (pc_in[i]=='"' || pc_in[i]=='\'') {
 53 |                 /* arg to unknown flag; finish parsing */
 54 |                 i--; state=-1;
 55 |             } else
 56 |             if (pc_in[i]=='-') {
 57 |                 /* ignore sequences of '-' (no long arg supported) */
 58 |             } else {
 59 |                 for (j=0; p_fdsc[j].c_flag; j++)
 60 |                 {
 61 |                     if (p_fdsc[j].c_flag==pc_in[i]) {
 62 |                         if (p_fdsc[j].is_pres) {
 63 |                             p_fdsc[j].has_dups=1;
 64 |                             p_fdsc[j].has_esc=0;
 65 |                             p_fdsc[j].arg_len=0;
 66 |                             p_fdsc[j].pc_arg=NULL;
 67 |                         } else {
 68 |                             p_fdsc[j].is_pres=1;
 69 |                         }
 70 |                         break;
 71 |                     }
 72 |                 }
 73 |                 if (p_fdsc[j].c_flag) {
 74 |                     if (p_fdsc[j].allow_arg) {
 75 |                         arg_i=j;
 76 |                         state=2;
 77 |                     }
 78 |                 } else {
 79 |                     /* unknown flag, ignore it and read the next one */
 80 |                 }
 81 |             }
 82 |             break;
 83 | 
 84 |         /* recognize flag arg delimiter
 85 |            input:
 86 |            arg_i: index of flag desc
 87 |          */
 88 |         case 2:
 89 |             /* read until start of arg */
 90 |             if (!isspace(pc_in[i])) {
 91 |                 if (pc_in[i]=='-') {
 92 |                     /* no arg provided */
 93 |                     state=1; continue;
 94 |                 } else
 95 |                 if (pc_in[i]=='"') {
 96 |                     arg_delim='"';
 97 |                     p_fdsc[arg_i].pc_arg = (char*)&pc_in[i+1];
 98 |                 } else
 99 |                 if (pc_in[i]=='\'') {
100 |                     arg_delim='\'';
101 |                     p_fdsc[arg_i].pc_arg = (char*)&pc_in[i+1];
102 |                 } else {
103 |                     arg_delim=0;
104 |                     p_fdsc[arg_i].arg_len++;
105 |                     p_fdsc[arg_i].pc_arg = (char*)&pc_in[i];
106 |                 }
107 |                 state=3;
108 |             }
109 |             break;
110 | 
111 |         /* read flag arg
112 |            input:
113 |            arg_delim; 0:white space
114 |            arg_i: index of flag desc */
115 |         case 3:
116 |             if (!arg_delim) {
117 |                 if (isspace(pc_in[i])) state=0;
118 |                 else
119 |                 if (pc_in[i]=='-') state=1;
120 |                 else
121 |                 p_fdsc[arg_i].arg_len++;
122 |             } else {
123 |                 if (arg_delim==pc_in[i]) state=1;
124 |                 else {
125 |                     if (pc_in[i]=='\\') state=4;
126 |                     p_fdsc[arg_i].arg_len++;
127 |                 }
128 |             }
129 |             break;
130 | 
131 |         /* escaped char in apostrophed flag arg */
132 |         case 4:
133 |             if (pc_in[i]=='"' || pc_in[i]=='\'') {
134 |                 p_fdsc[arg_i].has_esc=1;
135 |             }
136 |             p_fdsc[arg_i].arg_len++;
137 |             state=3;
138 |             break;
139 |         }
140 |     }
141 |     return i;
142 | }
143 | 


--------------------------------------------------------------------------------
/rdflags.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Copyright (c) 2015 Piotr Stolarz
 3 |    scriptext: Various scripting utilities WinDbg extension
 4 | 
 5 |    Distributed under the GNU General Public License (the License)
 6 |    see accompanying file LICENSE for details.
 7 | 
 8 |    This software is distributed WITHOUT ANY WARRANTY; without even the
 9 |    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10 |    See the License for more information.
11 |  */
12 | 
13 | #ifndef __RDFLAGS_H__
14 | #define __RDFLAGS_H__
15 | 
16 | typedef struct _flag_desc_t
17 | {
18 |     /* read_flags() input fields */
19 |     char c_flag;        /* flag's char */
20 |     int allow_arg;      /* if !=0 the flag may be provided with an arg */
21 | 
22 |     /* read_flags() output fields */
23 |     struct {
24 |         unsigned int is_pres:  1;   /* flag has been found */
25 |         unsigned int has_dups: 1;   /* duplicated flags occurred */
26 |         unsigned int has_esc:  1;   /* " or ' chars escaped inside 'pc_arg'
27 |                                        string */
28 |     };
29 |     size_t arg_len;     /* flag's argument length */
30 |     char *pc_arg;       /* points to flag's argument (if arg_len>0) */
31 | } flag_desc_t;
32 | 
33 | /* Read flags from 'pc_in' and write under 'p_dsc' table (last element is zeroed).
34 |    Returns number of read chars from the input.
35 | 
36 |    NOTES:
37 |    1. Unknown flags are ignored.
38 |    2. If a flag with an arg allowed, occurs more than once, the last occurrence
39 |       (and its arg) is taken into account. Duplicated flags existence is
40 |       indicated by 'has_dups' flag.
41 |    3. The " and ' chars may be escaped inside arg enclosed by them, but the
42 |       func doesn't replace them in 'pc_in' string. 'has_esc' flag is set to
43 |       indicate this case.
44 |  */
45 | size_t read_flags(const char *pc_in, flag_desc_t *p_fdsc);
46 | 
47 | #endif /* __RDFLAGS_H__ */
48 | 


--------------------------------------------------------------------------------
/regex/Makefile:
--------------------------------------------------------------------------------
1 | CFLAGS = -c -I. -DREGEX_STATIC -DHAVE_CONFIG_H
2 | 
3 | all: regex.obj
4 | 
5 | clean:
6 | 	del *.obj
7 | 


--------------------------------------------------------------------------------
/regex/config.h:
--------------------------------------------------------------------------------
 1 | /* MS SDK specific config */
 2 | 
 3 | #ifndef _CONFIG_H
 4 | #define _CONFIG_H
 5 | 
 6 | #define bool	int
 7 | #define true	1
 8 | #define false	0
 9 | 
10 | #define strcasecmp _stricmp
11 | #define alloca _alloca
12 | 
13 | #endif /* _CONFIG_H */
14 | 


--------------------------------------------------------------------------------
/regex/re_comp.h:
--------------------------------------------------------------------------------
 1 | /*  Copyright (C) 1996 Free Software Foundation, Inc.
 2 |    This file is part of the GNU C Library.
 3 | 
 4 |    The GNU C Library is free software; you can redistribute it and/or
 5 |    modify it under the terms of the GNU Lesser General Public
 6 |    License as published by the Free Software Foundation; either
 7 |    version 2.1 of the License, or (at your option) any later version.
 8 | 
 9 |    The GNU C Library is distributed in the hope that it will be useful,
10 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |    Lesser General Public License for more details.
13 | 
14 |    You should have received a copy of the GNU Lesser General Public
15 |    License along with the GNU C Library; if not, write to the Free
16 |    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 |    02111-1307 USA.  */
18 | 
19 | #ifndef _RE_COMP_H
20 | #define _RE_COMP_H	1
21 | 
22 | /* This is only a wrapper around the <regex.h> file.  XPG4.2 mentions
23 |    this name.  */
24 | #include <regex.h>
25 | 
26 | #endif /* re_comp.h */
27 | 


--------------------------------------------------------------------------------
/regex/regex.c:
--------------------------------------------------------------------------------
 1 | /* Extended regular expression matching and search library.
 2 |    Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
 3 |    This file is part of the GNU C Library.
 4 |    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
 5 | 
 6 |    The GNU C Library is free software; you can redistribute it and/or
 7 |    modify it under the terms of the GNU Lesser General Public
 8 |    License as published by the Free Software Foundation; either
 9 |    version 2.1 of the License, or (at your option) any later version.
10 | 
11 |    The GNU C Library is distributed in the hope that it will be useful,
12 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 |    Lesser General Public License for more details.
15 | 
16 |    You should have received a copy of the GNU Lesser General Public
17 |    License along with the GNU C Library; if not, write to the Free
18 |    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 |    02111-1307 USA.  */
20 | 
21 | #ifdef HAVE_CONFIG_H
22 | #include "config.h"
23 | #endif
24 | 
25 | /* Make sure noone compiles this code with a C++ compiler.  */
26 | #ifdef __cplusplus
27 | # error "This is C code, use a C compiler"
28 | #endif
29 | 
30 | #ifdef _LIBC
31 | /* We have to keep the namespace clean.  */
32 | # define regfree(preg) __regfree (preg)
33 | # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
34 | # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
35 | # define regerror(errcode, preg, errbuf, errbuf_size) \
36 | 	__regerror(errcode, preg, errbuf, errbuf_size)
37 | # define re_set_registers(bu, re, nu, st, en) \
38 | 	__re_set_registers (bu, re, nu, st, en)
39 | # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
40 | 	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
41 | # define re_match(bufp, string, size, pos, regs) \
42 | 	__re_match (bufp, string, size, pos, regs)
43 | # define re_search(bufp, string, size, startpos, range, regs) \
44 | 	__re_search (bufp, string, size, startpos, range, regs)
45 | # define re_compile_pattern(pattern, length, bufp) \
46 | 	__re_compile_pattern (pattern, length, bufp)
47 | # define re_set_syntax(syntax) __re_set_syntax (syntax)
48 | # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
49 | 	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
50 | # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
51 | 
52 | # include "../locale/localeinfo.h"
53 | #endif
54 | 
55 | /* On some systems, limits.h sets RE_DUP_MAX to a lower value than
56 |    GNU regex allows.  Include it before <regex.h>, which correctly
57 |    #undefs RE_DUP_MAX and sets it to the right value.  */
58 | #include <limits.h>
59 | 
60 | #include <regex.h>
61 | #include "regex_internal.h"
62 | 
63 | #include "regex_internal.c"
64 | #include "regcomp.c"
65 | #include "regexec.c"
66 | 
67 | /* Binary backward compatibility.  */
68 | #if _LIBC
69 | # include <shlib-compat.h>
70 | # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
71 | link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
72 | int re_max_failures = 2000;
73 | # endif
74 | #endif
75 | 


--------------------------------------------------------------------------------
/regex/regex.h:
--------------------------------------------------------------------------------
  1 | /* Definitions for data structures and routines for the regular
  2 |    expression library.
  3 |    Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006
  4 |    Free Software Foundation, Inc.
  5 |    This file is part of the GNU C Library.
  6 | 
  7 |    The GNU C Library is free software; you can redistribute it and/or
  8 |    modify it under the terms of the GNU Lesser General Public
  9 |    License as published by the Free Software Foundation; either
 10 |    version 2.1 of the License, or (at your option) any later version.
 11 | 
 12 |    The GNU C Library is distributed in the hope that it will be useful,
 13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 15 |    Lesser General Public License for more details.
 16 | 
 17 |    You should have received a copy of the GNU Lesser General Public
 18 |    License along with the GNU C Library; if not, write to the Free
 19 |    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 20 |    02111-1307 USA.  */
 21 | 
 22 | #ifndef _REGEX_H
 23 | #define _REGEX_H 1
 24 | 
 25 | #include <sys/types.h>
 26 | 
 27 | #ifndef __GNUC__
 28 | # define __DLL_IMPORT__	__declspec(dllimport)
 29 | # define __DLL_EXPORT__	__declspec(dllexport)
 30 | #else
 31 | # define __DLL_IMPORT__	__attribute__((dllimport)) extern
 32 | # define __DLL_EXPORT__	__attribute__((dllexport)) extern
 33 | #endif 
 34 | 
 35 | #if (defined __WIN32__) || (defined _WIN32)
 36 | # ifdef BUILD_REGEX_DLL
 37 | #  define REGEX_DLL_IMPEXP	__DLL_EXPORT__
 38 | # elif defined(REGEX_STATIC)
 39 | #  define REGEX_DLL_IMPEXP	 
 40 | # elif defined (USE_REGEX_DLL)
 41 | #  define REGEX_DLL_IMPEXP	__DLL_IMPORT__
 42 | # elif defined (USE_REGEX_STATIC)
 43 | #  define REGEX_DLL_IMPEXP 	 
 44 | # else /* assume USE_REGEX_DLL */
 45 | #  define REGEX_DLL_IMPEXP	__DLL_IMPORT__
 46 | # endif
 47 | #else /* __WIN32__ */
 48 | # define REGEX_DLL_IMPEXP	 
 49 | #endif
 50 | 
 51 | /* Allow the use in C++ code.  */
 52 | #ifdef __cplusplus
 53 | extern "C" {
 54 | #endif
 55 | 
 56 | /* The following two types have to be signed and unsigned integer type
 57 |    wide enough to hold a value of a pointer.  For most ANSI compilers
 58 |    ptrdiff_t and size_t should be likely OK.  Still size of these two
 59 |    types is 2 for Microsoft C.  Ugh... */
 60 | typedef long int s_reg_t;
 61 | typedef unsigned long int active_reg_t;
 62 | 
 63 | /* The following bits are used to determine the regexp syntax we
 64 |    recognize.  The set/not-set meanings are chosen so that Emacs syntax
 65 |    remains the value 0.  The bits are given in alphabetical order, and
 66 |    the definitions shifted by one from the previous bit; thus, when we
 67 |    add or remove a bit, only one other definition need change.  */
 68 | typedef unsigned long int reg_syntax_t;
 69 | 
 70 | /* If this bit is not set, then \ inside a bracket expression is literal.
 71 |    If set, then such a \ quotes the following character.  */
 72 | #define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
 73 | 
 74 | /* If this bit is not set, then + and ? are operators, and \+ and \? are
 75 |      literals.
 76 |    If set, then \+ and \? are operators and + and ? are literals.  */
 77 | #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
 78 | 
 79 | /* If this bit is set, then character classes are supported.  They are:
 80 |      [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
 81 |      [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
 82 |    If not set, then character classes are not supported.  */
 83 | #define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
 84 | 
 85 | /* If this bit is set, then ^ and $ are always anchors (outside bracket
 86 |      expressions, of course).
 87 |    If this bit is not set, then it depends:
 88 |         ^  is an anchor if it is at the beginning of a regular
 89 |            expression or after an open-group or an alternation operator;
 90 |         $  is an anchor if it is at the end of a regular expression, or
 91 |            before a close-group or an alternation operator.
 92 | 
 93 |    This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
 94 |    POSIX draft 11.2 says that * etc. in leading positions is undefined.
 95 |    We already implemented a previous draft which made those constructs
 96 |    invalid, though, so we haven't changed the code back.  */
 97 | #define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
 98 | 
 99 | /* If this bit is set, then special characters are always special
100 |      regardless of where they are in the pattern.
101 |    If this bit is not set, then special characters are special only in
102 |      some contexts; otherwise they are ordinary.  Specifically,
103 |      * + ? and intervals are only special when not after the beginning,
104 |      open-group, or alternation operator.  */
105 | #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
106 | 
107 | /* If this bit is set, then *, +, ?, and { cannot be first in an re or
108 |      immediately after an alternation or begin-group operator.  */
109 | #define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
110 | 
111 | /* If this bit is set, then . matches newline.
112 |    If not set, then it doesn't.  */
113 | #define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
114 | 
115 | /* If this bit is set, then . doesn't match NUL.
116 |    If not set, then it does.  */
117 | #define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
118 | 
119 | /* If this bit is set, nonmatching lists [^...] do not match newline.
120 |    If not set, they do.  */
121 | #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
122 | 
123 | /* If this bit is set, either \{...\} or {...} defines an
124 |      interval, depending on RE_NO_BK_BRACES.
125 |    If not set, \{, \}, {, and } are literals.  */
126 | #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
127 | 
128 | /* If this bit is set, +, ? and | aren't recognized as operators.
129 |    If not set, they are.  */
130 | #define RE_LIMITED_OPS (RE_INTERVALS << 1)
131 | 
132 | /* If this bit is set, newline is an alternation operator.
133 |    If not set, newline is literal.  */
134 | #define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
135 | 
136 | /* If this bit is set, then `{...}' defines an interval, and \{ and \}
137 |      are literals.
138 |   If not set, then `\{...\}' defines an interval.  */
139 | #define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
140 | 
141 | /* If this bit is set, (...) defines a group, and \( and \) are literals.
142 |    If not set, \(...\) defines a group, and ( and ) are literals.  */
143 | #define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
144 | 
145 | /* If this bit is set, then \<digit> matches <digit>.
146 |    If not set, then \<digit> is a back-reference.  */
147 | #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
148 | 
149 | /* If this bit is set, then | is an alternation operator, and \| is literal.
150 |    If not set, then \| is an alternation operator, and | is literal.  */
151 | #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
152 | 
153 | /* If this bit is set, then an ending range point collating higher
154 |      than the starting range point, as in [z-a], is invalid.
155 |    If not set, then when ending range point collates higher than the
156 |      starting range point, the range is ignored.  */
157 | #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
158 | 
159 | /* If this bit is set, then an unmatched ) is ordinary.
160 |    If not set, then an unmatched ) is invalid.  */
161 | #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
162 | 
163 | /* If this bit is set, succeed as soon as we match the whole pattern,
164 |    without further backtracking.  */
165 | #define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
166 | 
167 | /* If this bit is set, do not process the GNU regex operators.
168 |    If not set, then the GNU regex operators are recognized. */
169 | #define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
170 | 
171 | /* If this bit is set, turn on internal regex debugging.
172 |    If not set, and debugging was on, turn it off.
173 |    This only works if regex.c is compiled -DDEBUG.
174 |    We define this bit always, so that all that's needed to turn on
175 |    debugging is to recompile regex.c; the calling code can always have
176 |    this bit set, and it won't affect anything in the normal case. */
177 | #define RE_DEBUG (RE_NO_GNU_OPS << 1)
178 | 
179 | /* If this bit is set, a syntactically invalid interval is treated as
180 |    a string of ordinary characters.  For example, the ERE 'a{1' is
181 |    treated as 'a\{1'.  */
182 | #define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
183 | 
184 | /* If this bit is set, then ignore case when matching.
185 |    If not set, then case is significant.  */
186 | #define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
187 | 
188 | /* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
189 |    for ^, because it is difficult to scan the regex backwards to find
190 |    whether ^ should be special.  */
191 | #define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
192 | 
193 | /* If this bit is set, then \{ cannot be first in an bre or
194 |    immediately after an alternation or begin-group operator.  */
195 | #define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
196 | 
197 | /* If this bit is set, then no_sub will be set to 1 during
198 |    re_compile_pattern.  */
199 | #define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
200 | 
201 | /* This global variable defines the particular regexp syntax to use (for
202 |    some interfaces).  When a regexp is compiled, the syntax used is
203 |    stored in the pattern buffer, so changing this does not affect
204 |    already-compiled regexps.  */
205 | REGEX_DLL_IMPEXP reg_syntax_t re_syntax_options;
206 | 
207 | /* Define combinations of the above bits for the standard possibilities.
208 |    (The [[[ comments delimit what gets put into the Texinfo file, so
209 |    don't delete them!)  */
210 | /* [[[begin syntaxes]]] */
211 | #define RE_SYNTAX_EMACS 0
212 | 
213 | #define RE_SYNTAX_AWK							\
214 |   (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
215 |    | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
216 |    | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
217 |    | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
218 |    | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
219 | 
220 | #define RE_SYNTAX_GNU_AWK						\
221 |   ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
222 |    & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS		\
223 |        | RE_CONTEXT_INVALID_OPS ))
224 | 
225 | #define RE_SYNTAX_POSIX_AWK						\
226 |   (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
227 |    | RE_INTERVALS	    | RE_NO_GNU_OPS)
228 | 
229 | #define RE_SYNTAX_GREP							\
230 |   (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
231 |    | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
232 |    | RE_NEWLINE_ALT)
233 | 
234 | #define RE_SYNTAX_EGREP							\
235 |   (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
236 |    | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
237 |    | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
238 |    | RE_NO_BK_VBAR)
239 | 
240 | #define RE_SYNTAX_POSIX_EGREP						\
241 |   (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES			\
242 |    | RE_INVALID_INTERVAL_ORD)
243 | 
244 | /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
245 | #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
246 | 
247 | #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
248 | 
249 | /* Syntax bits common to both basic and extended POSIX regex syntax.  */
250 | #define _RE_SYNTAX_POSIX_COMMON						\
251 |   (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
252 |    | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
253 | 
254 | #define RE_SYNTAX_POSIX_BASIC						\
255 |   (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
256 | 
257 | /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
258 |    RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
259 |    isn't minimal, since other operators, such as \`, aren't disabled.  */
260 | #define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
261 |   (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
262 | 
263 | #define RE_SYNTAX_POSIX_EXTENDED					\
264 |   (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
265 |    | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
266 |    | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
267 |    | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
268 | 
269 | /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
270 |    removed and RE_NO_BK_REFS is added.  */
271 | #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
272 |   (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
273 |    | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
274 |    | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
275 |    | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
276 | /* [[[end syntaxes]]] */
277 | 
278 | /* Maximum number of duplicates an interval can allow.  Some systems
279 |    (erroneously) define this in other header files, but we want our
280 |    value, so remove any previous define.  */
281 | #ifdef RE_DUP_MAX
282 | # undef RE_DUP_MAX
283 | #endif
284 | /* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
285 | #define RE_DUP_MAX (0x7fff)
286 | 
287 | 
288 | /* POSIX `cflags' bits (i.e., information for `regcomp').  */
289 | 
290 | /* If this bit is set, then use extended regular expression syntax.
291 |    If not set, then use basic regular expression syntax.  */
292 | #define REG_EXTENDED 1
293 | 
294 | /* If this bit is set, then ignore case when matching.
295 |    If not set, then case is significant.  */
296 | #define REG_ICASE (REG_EXTENDED << 1)
297 | 
298 | /* If this bit is set, then anchors do not match at newline
299 |      characters in the string.
300 |    If not set, then anchors do match at newlines.  */
301 | #define REG_NEWLINE (REG_ICASE << 1)
302 | 
303 | /* If this bit is set, then report only success or fail in regexec.
304 |    If not set, then returns differ between not matching and errors.  */
305 | #define REG_NOSUB (REG_NEWLINE << 1)
306 | 
307 | 
308 | /* POSIX `eflags' bits (i.e., information for regexec).  */
309 | 
310 | /* If this bit is set, then the beginning-of-line operator doesn't match
311 |      the beginning of the string (presumably because it's not the
312 |      beginning of a line).
313 |    If not set, then the beginning-of-line operator does match the
314 |      beginning of the string.  */
315 | #define REG_NOTBOL 1
316 | 
317 | /* Like REG_NOTBOL, except for the end-of-line.  */
318 | #define REG_NOTEOL (1 << 1)
319 | 
320 | /* Use PMATCH[0] to delimit the start and end of the search in the
321 |    buffer.  */
322 | #define REG_STARTEND (1 << 2)
323 | 
324 | 
325 | /* If any error codes are removed, changed, or added, update the
326 |    `re_error_msg' table in regex.c.  */
327 | typedef enum
328 | {
329 | #ifdef _XOPEN_SOURCE
330 |   REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
331 | #endif
332 | 
333 |   REG_NOERROR = 0,	/* Success.  */
334 |   REG_NOMATCH,		/* Didn't find a match (for regexec).  */
335 | 
336 |   /* POSIX regcomp return error codes.  (In the order listed in the
337 |      standard.)  */
338 |   REG_BADPAT,		/* Invalid pattern.  */
339 |   REG_ECOLLATE,		/* Inalid collating element.  */
340 |   REG_ECTYPE,		/* Invalid character class name.  */
341 |   REG_EESCAPE,		/* Trailing backslash.  */
342 |   REG_ESUBREG,		/* Invalid back reference.  */
343 |   REG_EBRACK,		/* Unmatched left bracket.  */
344 |   REG_EPAREN,		/* Parenthesis imbalance.  */
345 |   REG_EBRACE,		/* Unmatched \{.  */
346 |   REG_BADBR,		/* Invalid contents of \{\}.  */
347 |   REG_ERANGE,		/* Invalid range end.  */
348 |   REG_ESPACE,		/* Ran out of memory.  */
349 |   REG_BADRPT,		/* No preceding re for repetition op.  */
350 | 
351 |   /* Error codes we've added.  */
352 |   REG_EEND,		/* Premature end.  */
353 |   REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
354 |   REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
355 | } reg_errcode_t;
356 | 
357 | /* This data structure represents a compiled pattern.  Before calling
358 |    the pattern compiler, the fields `buffer', `allocated', `fastmap',
359 |    `translate', and `no_sub' can be set.  After the pattern has been
360 |    compiled, the `re_nsub' field is available.  All other fields are
361 |    private to the regex routines.  */
362 | 
363 | #ifndef RE_TRANSLATE_TYPE
364 | # define RE_TRANSLATE_TYPE unsigned char *
365 | #endif
366 | 
367 | struct re_pattern_buffer
368 | {
369 |   /* Space that holds the compiled pattern.  It is declared as
370 |      `unsigned char *' because its elements are sometimes used as
371 |      array indexes.  */
372 |   unsigned char *buffer;
373 | 
374 |   /* Number of bytes to which `buffer' points.  */
375 |   unsigned long int allocated;
376 | 
377 |   /* Number of bytes actually used in `buffer'.  */
378 |   unsigned long int used;
379 | 
380 |   /* Syntax setting with which the pattern was compiled.  */
381 |   reg_syntax_t syntax;
382 | 
383 |   /* Pointer to a fastmap, if any, otherwise zero.  re_search uses the
384 |      fastmap, if there is one, to skip over impossible starting points
385 |      for matches.  */
386 |   char *fastmap;
387 | 
388 |   /* Either a translate table to apply to all characters before
389 |      comparing them, or zero for no translation.  The translation is
390 |      applied to a pattern when it is compiled and to a string when it
391 |      is matched.  */
392 |   RE_TRANSLATE_TYPE translate;
393 | 
394 |   /* Number of subexpressions found by the compiler.  */
395 |   size_t re_nsub;
396 | 
397 |   /* Zero if this pattern cannot match the empty string, one else.
398 |      Well, in truth it's used only in `re_search_2', to see whether or
399 |      not we should use the fastmap, so we don't set this absolutely
400 |      perfectly; see `re_compile_fastmap' (the `duplicate' case).  */
401 |   unsigned can_be_null : 1;
402 | 
403 |   /* If REGS_UNALLOCATED, allocate space in the `regs' structure
404 |      for `max (RE_NREGS, re_nsub + 1)' groups.
405 |      If REGS_REALLOCATE, reallocate space if necessary.
406 |      If REGS_FIXED, use what's there.  */
407 | #define REGS_UNALLOCATED 0
408 | #define REGS_REALLOCATE 1
409 | #define REGS_FIXED 2
410 |   unsigned regs_allocated : 2;
411 | 
412 |   /* Set to zero when `regex_compile' compiles a pattern; set to one
413 |      by `re_compile_fastmap' if it updates the fastmap.  */
414 |   unsigned fastmap_accurate : 1;
415 | 
416 |   /* If set, `re_match_2' does not return information about
417 |      subexpressions.  */
418 |   unsigned no_sub : 1;
419 | 
420 |   /* If set, a beginning-of-line anchor doesn't match at the beginning
421 |      of the string.  */
422 |   unsigned not_bol : 1;
423 | 
424 |   /* Similarly for an end-of-line anchor.  */
425 |   unsigned not_eol : 1;
426 | 
427 |   /* If true, an anchor at a newline matches.  */
428 |   unsigned newline_anchor : 1;
429 | };
430 | 
431 | typedef struct re_pattern_buffer regex_t;
432 | 
433 | /* Type for byte offsets within the string.  POSIX mandates this.  */
434 | typedef int regoff_t;
435 | 
436 | 
437 | /* This is the structure we store register match data in.  See
438 |    regex.texinfo for a full description of what registers match.  */
439 | struct re_registers
440 | {
441 |   unsigned num_regs;
442 |   regoff_t *start;
443 |   regoff_t *end;
444 | };
445 | 
446 | 
447 | /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
448 |    `re_match_2' returns information about at least this many registers
449 |    the first time a `regs' structure is passed.  */
450 | #ifndef RE_NREGS
451 | # define RE_NREGS 30
452 | #endif
453 | 
454 | 
455 | /* POSIX specification for registers.  Aside from the different names than
456 |    `re_registers', POSIX uses an array of structures, instead of a
457 |    structure of arrays.  */
458 | typedef struct
459 | {
460 |   regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
461 |   regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
462 | } regmatch_t;
463 | 
464 | /* Declarations for routines.  */
465 | 
466 | /* Sets the current default syntax to SYNTAX, and return the old syntax.
467 |    You can also simply assign to the `re_syntax_options' variable.  */
468 | REGEX_DLL_IMPEXP reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
469 | 
470 | /* Compile the regular expression PATTERN, with length LENGTH
471 |    and syntax given by the global `re_syntax_options', into the buffer
472 |    BUFFER.  Return NULL if successful, and an error string if not.  */
473 | REGEX_DLL_IMPEXP const char *re_compile_pattern (const char *__pattern, size_t __length,
474 | 				       struct re_pattern_buffer *__buffer);
475 | 
476 | 
477 | /* Compile a fastmap for the compiled pattern in BUFFER; used to
478 |    accelerate searches.  Return 0 if successful and -2 if was an
479 |    internal error.  */
480 | REGEX_DLL_IMPEXP int re_compile_fastmap (struct re_pattern_buffer *__buffer);
481 | 
482 | 
483 | /* Search in the string STRING (with length LENGTH) for the pattern
484 |    compiled into BUFFER.  Start searching at position START, for RANGE
485 |    characters.  Return the starting position of the match, -1 for no
486 |    match, or -2 for an internal error.  Also return register
487 |    information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
488 | REGEX_DLL_IMPEXP int re_search (struct re_pattern_buffer *__buffer, const char *__string,
489 | 		      int __length, int __start, int __range,
490 | 		      struct re_registers *__regs);
491 | 
492 | 
493 | /* Like `re_search', but search in the concatenation of STRING1 and
494 |    STRING2.  Also, stop searching at index START + STOP.  */
495 | REGEX_DLL_IMPEXP int re_search_2 (struct re_pattern_buffer *__buffer,
496 | 			const char *__string1, int __length1,
497 | 			const char *__string2, int __length2, int __start,
498 | 			int __range, struct re_registers *__regs, int __stop);
499 | 
500 | 
501 | /* Like `re_search', but return how many characters in STRING the regexp
502 |    in BUFFER matched, starting at position START.  */
503 | REGEX_DLL_IMPEXP int re_match (struct re_pattern_buffer *__buffer, const char *__string,
504 | 		     int __length, int __start, struct re_registers *__regs);
505 | 
506 | 
507 | /* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
508 | REGEX_DLL_IMPEXP int re_match_2 (struct re_pattern_buffer *__buffer,
509 | 		       const char *__string1, int __length1,
510 | 		       const char *__string2, int __length2, int __start,
511 | 		       struct re_registers *__regs, int __stop);
512 | 
513 | 
514 | /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
515 |    ENDS.  Subsequent matches using BUFFER and REGS will use this memory
516 |    for recording register information.  STARTS and ENDS must be
517 |    allocated with malloc, and must each be at least `NUM_REGS * sizeof
518 |    (regoff_t)' bytes long.
519 | 
520 |    If NUM_REGS == 0, then subsequent matches should allocate their own
521 |    register data.
522 | 
523 |    Unless this function is called, the first search or match using
524 |    PATTERN_BUFFER will allocate its own register data, without
525 |    freeing the old data.  */
526 | REGEX_DLL_IMPEXP void re_set_registers (struct re_pattern_buffer *__buffer,
527 | 			      struct re_registers *__regs,
528 | 			      unsigned int __num_regs,
529 | 			      regoff_t *__starts, regoff_t *__ends);
530 | 
531 | #if defined _REGEX_RE_COMP || defined _LIBC
532 | # ifndef _CRAY
533 | /* 4.2 bsd compatibility.  */
534 | REGEX_DLL_IMPEXP char *re_comp (const char *);
535 | REGEX_DLL_IMPEXP int re_exec (const char *);
536 | # endif
537 | #endif
538 | 
539 | /* GCC 2.95 and later have "__restrict"; C99 compilers have
540 |    "restrict", and "configure" may have defined "restrict".  */
541 | #ifndef __restrict
542 | # if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
543 | #  if defined restrict || 199901L <= __STDC_VERSION__
544 | #   define __restrict restrict
545 | #  else
546 | #   define __restrict
547 | #  endif
548 | # endif
549 | #endif
550 | /* gcc 3.1 and up support the [restrict] syntax.  */
551 | #ifndef __restrict_arr
552 | # if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
553 |      && !defined __GNUG__
554 | #  define __restrict_arr __restrict
555 | # else
556 | #  define __restrict_arr
557 | # endif
558 | #endif
559 | 
560 | /* POSIX compatibility.  */
561 | REGEX_DLL_IMPEXP int regcomp (regex_t *__restrict __preg,
562 | 		    const char *__restrict __pattern,
563 | 		    int __cflags);
564 | 
565 | REGEX_DLL_IMPEXP int regexec (const regex_t *__restrict __preg,
566 | 		    const char *__restrict __string, size_t __nmatch,
567 | 		    regmatch_t __pmatch[__restrict_arr],
568 | 		    int __eflags);
569 | 
570 | REGEX_DLL_IMPEXP size_t regerror (int __errcode, const regex_t *__restrict __preg,
571 | 			char *__restrict __errbuf, size_t __errbuf_size);
572 | 
573 | REGEX_DLL_IMPEXP void regfree (regex_t *__preg);
574 | 
575 | 
576 | #ifdef __cplusplus
577 | }
578 | #endif	/* C++ */
579 | 
580 | #endif /* regex.h */
581 | 


--------------------------------------------------------------------------------
/regex/regex_internal.c:
--------------------------------------------------------------------------------
   1 | /* Extended regular expression matching and search library.
   2 |    Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
   3 |    This file is part of the GNU C Library.
   4 |    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   5 | 
   6 |    The GNU C Library is free software; you can redistribute it and/or
   7 |    modify it under the terms of the GNU Lesser General Public
   8 |    License as published by the Free Software Foundation; either
   9 |    version 2.1 of the License, or (at your option) any later version.
  10 | 
  11 |    The GNU C Library is distributed in the hope that it will be useful,
  12 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 |    Lesser General Public License for more details.
  15 | 
  16 |    You should have received a copy of the GNU Lesser General Public
  17 |    License along with the GNU C Library; if not, write to the Free
  18 |    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19 |    02111-1307 USA.  */
  20 | 
  21 | static void re_string_construct_common (const char *str, int len,
  22 | 					re_string_t *pstr,
  23 | 					RE_TRANSLATE_TYPE trans, int icase,
  24 | 					const re_dfa_t *dfa) internal_function;
  25 | static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
  26 | 					  const re_node_set *nodes,
  27 | 					  unsigned int hash) internal_function;
  28 | static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
  29 | 					  const re_node_set *nodes,
  30 | 					  unsigned int context,
  31 | 					  unsigned int hash) internal_function;
  32 | 
  33 | /* Functions for string operation.  */
  34 | 
  35 | /* This function allocate the buffers.  It is necessary to call
  36 |    re_string_reconstruct before using the object.  */
  37 | 
  38 | static reg_errcode_t
  39 | internal_function
  40 | re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
  41 | 		    RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
  42 | {
  43 |   reg_errcode_t ret;
  44 |   int init_buf_len;
  45 | 
  46 |   /* Ensure at least one character fits into the buffers.  */
  47 |   if (init_len < dfa->mb_cur_max)
  48 |     init_len = dfa->mb_cur_max;
  49 |   init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
  50 |   re_string_construct_common (str, len, pstr, trans, icase, dfa);
  51 | 
  52 |   ret = re_string_realloc_buffers (pstr, init_buf_len);
  53 |   if (BE (ret != REG_NOERROR, 0))
  54 |     return ret;
  55 | 
  56 |   pstr->word_char = dfa->word_char;
  57 |   pstr->word_ops_used = dfa->word_ops_used;
  58 |   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
  59 |   pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
  60 |   pstr->valid_raw_len = pstr->valid_len;
  61 |   return REG_NOERROR;
  62 | }
  63 | 
  64 | /* This function allocate the buffers, and initialize them.  */
  65 | 
  66 | static reg_errcode_t
  67 | internal_function
  68 | re_string_construct (re_string_t *pstr, const char *str, int len,
  69 | 		     RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
  70 | {
  71 |   reg_errcode_t ret;
  72 |   memset (pstr, '\0', sizeof (re_string_t));
  73 |   re_string_construct_common (str, len, pstr, trans, icase, dfa);
  74 | 
  75 |   if (len > 0)
  76 |     {
  77 |       ret = re_string_realloc_buffers (pstr, len + 1);
  78 |       if (BE (ret != REG_NOERROR, 0))
  79 | 	return ret;
  80 |     }
  81 |   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
  82 | 
  83 |   if (icase)
  84 |     {
  85 | #ifdef RE_ENABLE_I18N
  86 |       if (dfa->mb_cur_max > 1)
  87 | 	{
  88 | 	  while (1)
  89 | 	    {
  90 | 	      ret = build_wcs_upper_buffer (pstr);
  91 | 	      if (BE (ret != REG_NOERROR, 0))
  92 | 		return ret;
  93 | 	      if (pstr->valid_raw_len >= len)
  94 | 		break;
  95 | 	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
  96 | 		break;
  97 | 	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
  98 | 	      if (BE (ret != REG_NOERROR, 0))
  99 | 		return ret;
 100 | 	    }
 101 | 	}
 102 |       else
 103 | #endif /* RE_ENABLE_I18N  */
 104 | 	build_upper_buffer (pstr);
 105 |     }
 106 |   else
 107 |     {
 108 | #ifdef RE_ENABLE_I18N
 109 |       if (dfa->mb_cur_max > 1)
 110 | 	build_wcs_buffer (pstr);
 111 |       else
 112 | #endif /* RE_ENABLE_I18N  */
 113 | 	{
 114 | 	  if (trans != NULL)
 115 | 	    re_string_translate_buffer (pstr);
 116 | 	  else
 117 | 	    {
 118 | 	      pstr->valid_len = pstr->bufs_len;
 119 | 	      pstr->valid_raw_len = pstr->bufs_len;
 120 | 	    }
 121 | 	}
 122 |     }
 123 | 
 124 |   return REG_NOERROR;
 125 | }
 126 | 
 127 | /* Helper functions for re_string_allocate, and re_string_construct.  */
 128 | 
 129 | static reg_errcode_t
 130 | internal_function
 131 | re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
 132 | {
 133 | #ifdef RE_ENABLE_I18N
 134 |   if (pstr->mb_cur_max > 1)
 135 |     {
 136 |       wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
 137 |       if (BE (new_wcs == NULL, 0))
 138 | 	return REG_ESPACE;
 139 |       pstr->wcs = new_wcs;
 140 |       if (pstr->offsets != NULL)
 141 | 	{
 142 | 	  int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
 143 | 	  if (BE (new_offsets == NULL, 0))
 144 | 	    return REG_ESPACE;
 145 | 	  pstr->offsets = new_offsets;
 146 | 	}
 147 |     }
 148 | #endif /* RE_ENABLE_I18N  */
 149 |   if (pstr->mbs_allocated)
 150 |     {
 151 |       unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
 152 | 					   new_buf_len);
 153 |       if (BE (new_mbs == NULL, 0))
 154 | 	return REG_ESPACE;
 155 |       pstr->mbs = new_mbs;
 156 |     }
 157 |   pstr->bufs_len = new_buf_len;
 158 |   return REG_NOERROR;
 159 | }
 160 | 
 161 | 
 162 | static void
 163 | internal_function
 164 | re_string_construct_common (const char *str, int len, re_string_t *pstr,
 165 | 			    RE_TRANSLATE_TYPE trans, int icase,
 166 | 			    const re_dfa_t *dfa)
 167 | {
 168 |   pstr->raw_mbs = (const unsigned char *) str;
 169 |   pstr->len = len;
 170 |   pstr->raw_len = len;
 171 |   pstr->trans = trans;
 172 |   pstr->icase = icase ? 1 : 0;
 173 |   pstr->mbs_allocated = (trans != NULL || icase);
 174 |   pstr->mb_cur_max = dfa->mb_cur_max;
 175 |   pstr->is_utf8 = dfa->is_utf8;
 176 |   pstr->map_notascii = dfa->map_notascii;
 177 |   pstr->stop = pstr->len;
 178 |   pstr->raw_stop = pstr->stop;
 179 | }
 180 | 
 181 | #ifdef RE_ENABLE_I18N
 182 | 
 183 | /* Build wide character buffer PSTR->WCS.
 184 |    If the byte sequence of the string are:
 185 |      <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
 186 |    Then wide character buffer will be:
 187 |      <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
 188 |    We use WEOF for padding, they indicate that the position isn't
 189 |    a first byte of a multibyte character.
 190 | 
 191 |    Note that this function assumes PSTR->VALID_LEN elements are already
 192 |    built and starts from PSTR->VALID_LEN.  */
 193 | 
 194 | static void
 195 | internal_function
 196 | build_wcs_buffer (re_string_t *pstr)
 197 | {
 198 | #ifdef _LIBC
 199 |   unsigned char buf[MB_LEN_MAX];
 200 |   assert (MB_LEN_MAX >= pstr->mb_cur_max);
 201 | #else
 202 |   unsigned char buf[64];
 203 | #endif
 204 |   mbstate_t prev_st;
 205 |   int byte_idx, end_idx, remain_len;
 206 |   size_t mbclen;
 207 | 
 208 |   /* Build the buffers from pstr->valid_len to either pstr->len or
 209 |      pstr->bufs_len.  */
 210 |   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
 211 |   for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
 212 |     {
 213 |       wchar_t wc;
 214 |       const char *p;
 215 | 
 216 |       remain_len = end_idx - byte_idx;
 217 |       prev_st = pstr->cur_state;
 218 |       /* Apply the translation if we need.  */
 219 |       if (BE (pstr->trans != NULL, 0))
 220 | 	{
 221 | 	  int i, ch;
 222 | 
 223 | 	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
 224 | 	    {
 225 | 	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
 226 | 	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
 227 | 	    }
 228 | 	  p = (const char *) buf;
 229 | 	}
 230 |       else
 231 | 	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
 232 |       mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
 233 |       if (BE (mbclen == (size_t) -2, 0))
 234 | 	{
 235 | 	  /* The buffer doesn't have enough space, finish to build.  */
 236 | 	  pstr->cur_state = prev_st;
 237 | 	  break;
 238 | 	}
 239 |       else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
 240 | 	{
 241 | 	  /* We treat these cases as a singlebyte character.  */
 242 | 	  mbclen = 1;
 243 | 	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
 244 | 	  if (BE (pstr->trans != NULL, 0))
 245 | 	    wc = pstr->trans[wc];
 246 | 	  pstr->cur_state = prev_st;
 247 | 	}
 248 | 
 249 |       /* Write wide character and padding.  */
 250 |       pstr->wcs[byte_idx++] = wc;
 251 |       /* Write paddings.  */
 252 |       for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
 253 | 	pstr->wcs[byte_idx++] = WEOF;
 254 |     }
 255 |   pstr->valid_len = byte_idx;
 256 |   pstr->valid_raw_len = byte_idx;
 257 | }
 258 | 
 259 | /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
 260 |    but for REG_ICASE.  */
 261 | 
 262 | static reg_errcode_t
 263 | internal_function
 264 | build_wcs_upper_buffer (re_string_t *pstr)
 265 | {
 266 |   mbstate_t prev_st;
 267 |   int src_idx, byte_idx, end_idx, remain_len;
 268 |   size_t mbclen;
 269 | #ifdef _LIBC
 270 |   char buf[MB_LEN_MAX];
 271 |   assert (MB_LEN_MAX >= pstr->mb_cur_max);
 272 | #else
 273 |   char buf[64];
 274 | #endif
 275 | 
 276 |   byte_idx = pstr->valid_len;
 277 |   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
 278 | 
 279 |   /* The following optimization assumes that ASCII characters can be
 280 |      mapped to wide characters with a simple cast.  */
 281 |   if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
 282 |     {
 283 |       while (byte_idx < end_idx)
 284 | 	{
 285 | 	  wchar_t wc;
 286 | 
 287 | 	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
 288 | 	      && mbsinit (&pstr->cur_state))
 289 | 	    {
 290 | 	      /* In case of a singlebyte character.  */
 291 | 	      pstr->mbs[byte_idx]
 292 | 		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
 293 | 	      /* The next step uses the assumption that wchar_t is encoded
 294 | 		 ASCII-safe: all ASCII values can be converted like this.  */
 295 | 	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
 296 | 	      ++byte_idx;
 297 | 	      continue;
 298 | 	    }
 299 | 
 300 | 	  remain_len = end_idx - byte_idx;
 301 | 	  prev_st = pstr->cur_state;
 302 | 	  mbclen = mbrtowc (&wc,
 303 | 			    ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
 304 | 			     + byte_idx), remain_len, &pstr->cur_state);
 305 | 	  if (BE (mbclen + 2 > 2, 1))
 306 | 	    {
 307 | 	      wchar_t wcu = wc;
 308 | 	      if (iswlower (wc))
 309 | 		{
 310 | 		  size_t mbcdlen;
 311 | 
 312 | 		  wcu = towupper (wc);
 313 | 		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
 314 | 		  if (BE (mbclen == mbcdlen, 1))
 315 | 		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
 316 | 		  else
 317 | 		    {
 318 | 		      src_idx = byte_idx;
 319 | 		      goto offsets_needed;
 320 | 		    }
 321 | 		}
 322 | 	      else
 323 | 		memcpy (pstr->mbs + byte_idx,
 324 | 			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
 325 | 	      pstr->wcs[byte_idx++] = wcu;
 326 | 	      /* Write paddings.  */
 327 | 	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
 328 | 		pstr->wcs[byte_idx++] = WEOF;
 329 | 	    }
 330 | 	  else if (mbclen == (size_t) -1 || mbclen == 0)
 331 | 	    {
 332 | 	      /* It is an invalid character or '\0'.  Just use the byte.  */
 333 | 	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
 334 | 	      pstr->mbs[byte_idx] = ch;
 335 | 	      /* And also cast it to wide char.  */
 336 | 	      pstr->wcs[byte_idx++] = (wchar_t) ch;
 337 | 	      if (BE (mbclen == (size_t) -1, 0))
 338 | 		pstr->cur_state = prev_st;
 339 | 	    }
 340 | 	  else
 341 | 	    {
 342 | 	      /* The buffer doesn't have enough space, finish to build.  */
 343 | 	      pstr->cur_state = prev_st;
 344 | 	      break;
 345 | 	    }
 346 | 	}
 347 |       pstr->valid_len = byte_idx;
 348 |       pstr->valid_raw_len = byte_idx;
 349 |       return REG_NOERROR;
 350 |     }
 351 |   else
 352 |     for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
 353 |       {
 354 | 	wchar_t wc;
 355 | 	const char *p;
 356 |       offsets_needed:
 357 | 	remain_len = end_idx - byte_idx;
 358 | 	prev_st = pstr->cur_state;
 359 | 	if (BE (pstr->trans != NULL, 0))
 360 | 	  {
 361 | 	    int i, ch;
 362 | 
 363 | 	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
 364 | 	      {
 365 | 		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
 366 | 		buf[i] = pstr->trans[ch];
 367 | 	      }
 368 | 	    p = (const char *) buf;
 369 | 	  }
 370 | 	else
 371 | 	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
 372 | 	mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
 373 | 	if (BE (mbclen + 2 > 2, 1))
 374 | 	  {
 375 | 	    wchar_t wcu = wc;
 376 | 	    if (iswlower (wc))
 377 | 	      {
 378 | 		size_t mbcdlen;
 379 | 
 380 | 		wcu = towupper (wc);
 381 | 		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
 382 | 		if (BE (mbclen == mbcdlen, 1))
 383 | 		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
 384 | 		else if (mbcdlen != (size_t) -1)
 385 | 		  {
 386 | 		    size_t i;
 387 | 
 388 | 		    if (byte_idx + mbcdlen > pstr->bufs_len)
 389 | 		      {
 390 | 			pstr->cur_state = prev_st;
 391 | 			break;
 392 | 		      }
 393 | 
 394 | 		    if (pstr->offsets == NULL)
 395 | 		      {
 396 | 			pstr->offsets = re_malloc (int, pstr->bufs_len);
 397 | 
 398 | 			if (pstr->offsets == NULL)
 399 | 			  return REG_ESPACE;
 400 | 		      }
 401 | 		    if (!pstr->offsets_needed)
 402 | 		      {
 403 | 			for (i = 0; i < (size_t) byte_idx; ++i)
 404 | 			  pstr->offsets[i] = i;
 405 | 			pstr->offsets_needed = 1;
 406 | 		      }
 407 | 
 408 | 		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
 409 | 		    pstr->wcs[byte_idx] = wcu;
 410 | 		    pstr->offsets[byte_idx] = src_idx;
 411 | 		    for (i = 1; i < mbcdlen; ++i)
 412 | 		      {
 413 | 			pstr->offsets[byte_idx + i]
 414 | 			  = src_idx + (i < mbclen ? i : mbclen - 1);
 415 | 			pstr->wcs[byte_idx + i] = WEOF;
 416 | 		      }
 417 | 		    pstr->len += mbcdlen - mbclen;
 418 | 		    if (pstr->raw_stop > src_idx)
 419 | 		      pstr->stop += mbcdlen - mbclen;
 420 | 		    end_idx = (pstr->bufs_len > pstr->len)
 421 | 			      ? pstr->len : pstr->bufs_len;
 422 | 		    byte_idx += mbcdlen;
 423 | 		    src_idx += mbclen;
 424 | 		    continue;
 425 | 		  }
 426 |                 else
 427 |                   memcpy (pstr->mbs + byte_idx, p, mbclen);
 428 | 	      }
 429 | 	    else
 430 | 	      memcpy (pstr->mbs + byte_idx, p, mbclen);
 431 | 
 432 | 	    if (BE (pstr->offsets_needed != 0, 0))
 433 | 	      {
 434 | 		size_t i;
 435 | 		for (i = 0; i < mbclen; ++i)
 436 | 		  pstr->offsets[byte_idx + i] = src_idx + i;
 437 | 	      }
 438 | 	    src_idx += mbclen;
 439 | 
 440 | 	    pstr->wcs[byte_idx++] = wcu;
 441 | 	    /* Write paddings.  */
 442 | 	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
 443 | 	      pstr->wcs[byte_idx++] = WEOF;
 444 | 	  }
 445 | 	else if (mbclen == (size_t) -1 || mbclen == 0)
 446 | 	  {
 447 | 	    /* It is an invalid character or '\0'.  Just use the byte.  */
 448 | 	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
 449 | 
 450 | 	    if (BE (pstr->trans != NULL, 0))
 451 | 	      ch = pstr->trans [ch];
 452 | 	    pstr->mbs[byte_idx] = ch;
 453 | 
 454 | 	    if (BE (pstr->offsets_needed != 0, 0))
 455 | 	      pstr->offsets[byte_idx] = src_idx;
 456 | 	    ++src_idx;
 457 | 
 458 | 	    /* And also cast it to wide char.  */
 459 | 	    pstr->wcs[byte_idx++] = (wchar_t) ch;
 460 | 	    if (BE (mbclen == (size_t) -1, 0))
 461 | 	      pstr->cur_state = prev_st;
 462 | 	  }
 463 | 	else
 464 | 	  {
 465 | 	    /* The buffer doesn't have enough space, finish to build.  */
 466 | 	    pstr->cur_state = prev_st;
 467 | 	    break;
 468 | 	  }
 469 |       }
 470 |   pstr->valid_len = byte_idx;
 471 |   pstr->valid_raw_len = src_idx;
 472 |   return REG_NOERROR;
 473 | }
 474 | 
 475 | /* Skip characters until the index becomes greater than NEW_RAW_IDX.
 476 |    Return the index.  */
 477 | 
 478 | static int
 479 | internal_function
 480 | re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
 481 | {
 482 |   mbstate_t prev_st;
 483 |   int rawbuf_idx;
 484 |   size_t mbclen;
 485 |   wchar_t wc = WEOF;
 486 | 
 487 |   /* Skip the characters which are not necessary to check.  */
 488 |   for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
 489 |        rawbuf_idx < new_raw_idx;)
 490 |     {
 491 |       int remain_len;
 492 |       remain_len = pstr->len - rawbuf_idx;
 493 |       prev_st = pstr->cur_state;
 494 |       mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
 495 | 			remain_len, &pstr->cur_state);
 496 |       if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
 497 | 	{
 498 | 	  /* We treat these cases as a single byte character.  */
 499 | 	  if (mbclen == 0 || remain_len == 0)
 500 | 	    wc = L'\0';
 501 | 	  else
 502 | 	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
 503 | 	  mbclen = 1;
 504 | 	  pstr->cur_state = prev_st;
 505 | 	}
 506 |       /* Then proceed the next character.  */
 507 |       rawbuf_idx += mbclen;
 508 |     }
 509 |   *last_wc = (wint_t) wc;
 510 |   return rawbuf_idx;
 511 | }
 512 | #endif /* RE_ENABLE_I18N  */
 513 | 
 514 | /* Build the buffer PSTR->MBS, and apply the translation if we need.
 515 |    This function is used in case of REG_ICASE.  */
 516 | 
 517 | static void
 518 | internal_function
 519 | build_upper_buffer (re_string_t *pstr)
 520 | {
 521 |   int char_idx, end_idx;
 522 |   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
 523 | 
 524 |   for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
 525 |     {
 526 |       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
 527 |       if (BE (pstr->trans != NULL, 0))
 528 | 	ch = pstr->trans[ch];
 529 |       if (islower (ch))
 530 | 	pstr->mbs[char_idx] = toupper (ch);
 531 |       else
 532 | 	pstr->mbs[char_idx] = ch;
 533 |     }
 534 |   pstr->valid_len = char_idx;
 535 |   pstr->valid_raw_len = char_idx;
 536 | }
 537 | 
 538 | /* Apply TRANS to the buffer in PSTR.  */
 539 | 
 540 | static void
 541 | internal_function
 542 | re_string_translate_buffer (re_string_t *pstr)
 543 | {
 544 |   int buf_idx, end_idx;
 545 |   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
 546 | 
 547 |   for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
 548 |     {
 549 |       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
 550 |       pstr->mbs[buf_idx] = pstr->trans[ch];
 551 |     }
 552 | 
 553 |   pstr->valid_len = buf_idx;
 554 |   pstr->valid_raw_len = buf_idx;
 555 | }
 556 | 
 557 | /* This function re-construct the buffers.
 558 |    Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
 559 |    convert to upper case in case of REG_ICASE, apply translation.  */
 560 | 
 561 | static reg_errcode_t
 562 | internal_function
 563 | re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
 564 | {
 565 |   int offset = idx - pstr->raw_mbs_idx;
 566 |   if (BE (offset < 0, 0))
 567 |     {
 568 |       /* Reset buffer.  */
 569 | #ifdef RE_ENABLE_I18N
 570 |       if (pstr->mb_cur_max > 1)
 571 | 	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
 572 | #endif /* RE_ENABLE_I18N */
 573 |       pstr->len = pstr->raw_len;
 574 |       pstr->stop = pstr->raw_stop;
 575 |       pstr->valid_len = 0;
 576 |       pstr->raw_mbs_idx = 0;
 577 |       pstr->valid_raw_len = 0;
 578 |       pstr->offsets_needed = 0;
 579 |       pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 580 | 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
 581 |       if (!pstr->mbs_allocated)
 582 | 	pstr->mbs = (unsigned char *) pstr->raw_mbs;
 583 |       offset = idx;
 584 |     }
 585 | 
 586 |   if (BE (offset != 0, 1))
 587 |     {
 588 |       /* Should the already checked characters be kept?  */
 589 |       if (BE (offset < pstr->valid_raw_len, 1))
 590 | 	{
 591 | 	  /* Yes, move them to the front of the buffer.  */
 592 | #ifdef RE_ENABLE_I18N
 593 | 	  if (BE (pstr->offsets_needed, 0))
 594 | 	    {
 595 | 	      int low = 0, high = pstr->valid_len, mid;
 596 | 	      do
 597 | 		{
 598 | 		  mid = (high + low) / 2;
 599 | 		  if (pstr->offsets[mid] > offset)
 600 | 		    high = mid;
 601 | 		  else if (pstr->offsets[mid] < offset)
 602 | 		    low = mid + 1;
 603 | 		  else
 604 | 		    break;
 605 | 		}
 606 | 	      while (low < high);
 607 | 	      if (pstr->offsets[mid] < offset)
 608 | 		++mid;
 609 | 	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
 610 | 							eflags);
 611 | 	      /* This can be quite complicated, so handle specially
 612 | 		 only the common and easy case where the character with
 613 | 		 different length representation of lower and upper
 614 | 		 case is present at or after offset.  */
 615 | 	      if (pstr->valid_len > offset
 616 | 		  && mid == offset && pstr->offsets[mid] == offset)
 617 | 		{
 618 | 		  memmove (pstr->wcs, pstr->wcs + offset,
 619 | 			   (pstr->valid_len - offset) * sizeof (wint_t));
 620 | 		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
 621 | 		  pstr->valid_len -= offset;
 622 | 		  pstr->valid_raw_len -= offset;
 623 | 		  for (low = 0; low < pstr->valid_len; low++)
 624 | 		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
 625 | 		}
 626 | 	      else
 627 | 		{
 628 | 		  /* Otherwise, just find out how long the partial multibyte
 629 | 		     character at offset is and fill it with WEOF/255.  */
 630 | 		  pstr->len = pstr->raw_len - idx + offset;
 631 | 		  pstr->stop = pstr->raw_stop - idx + offset;
 632 | 		  pstr->offsets_needed = 0;
 633 | 		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
 634 | 		    --mid;
 635 | 		  while (mid < pstr->valid_len)
 636 | 		    if (pstr->wcs[mid] != WEOF)
 637 | 		      break;
 638 | 		    else
 639 | 		      ++mid;
 640 | 		  if (mid == pstr->valid_len)
 641 | 		    pstr->valid_len = 0;
 642 | 		  else
 643 | 		    {
 644 | 		      pstr->valid_len = pstr->offsets[mid] - offset;
 645 | 		      if (pstr->valid_len)
 646 | 			{
 647 | 			  for (low = 0; low < pstr->valid_len; ++low)
 648 | 			    pstr->wcs[low] = WEOF;
 649 | 			  memset (pstr->mbs, 255, pstr->valid_len);
 650 | 			}
 651 | 		    }
 652 | 		  pstr->valid_raw_len = pstr->valid_len;
 653 | 		}
 654 | 	    }
 655 | 	  else
 656 | #endif
 657 | 	    {
 658 | 	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
 659 | 							eflags);
 660 | #ifdef RE_ENABLE_I18N
 661 | 	      if (pstr->mb_cur_max > 1)
 662 | 		memmove (pstr->wcs, pstr->wcs + offset,
 663 | 			 (pstr->valid_len - offset) * sizeof (wint_t));
 664 | #endif /* RE_ENABLE_I18N */
 665 | 	      if (BE (pstr->mbs_allocated, 0))
 666 | 		memmove (pstr->mbs, pstr->mbs + offset,
 667 | 			 pstr->valid_len - offset);
 668 | 	      pstr->valid_len -= offset;
 669 | 	      pstr->valid_raw_len -= offset;
 670 | #if DEBUG
 671 | 	      assert (pstr->valid_len > 0);
 672 | #endif
 673 | 	    }
 674 | 	}
 675 |       else
 676 | 	{
 677 | 	  /* No, skip all characters until IDX.  */
 678 | 	  int prev_valid_len = pstr->valid_len;
 679 | 
 680 | #ifdef RE_ENABLE_I18N
 681 | 	  if (BE (pstr->offsets_needed, 0))
 682 | 	    {
 683 | 	      pstr->len = pstr->raw_len - idx + offset;
 684 | 	      pstr->stop = pstr->raw_stop - idx + offset;
 685 | 	      pstr->offsets_needed = 0;
 686 | 	    }
 687 | #endif
 688 | 	  pstr->valid_len = 0;
 689 | #ifdef RE_ENABLE_I18N
 690 | 	  if (pstr->mb_cur_max > 1)
 691 | 	    {
 692 | 	      int wcs_idx;
 693 | 	      wint_t wc = WEOF;
 694 | 
 695 | 	      if (pstr->is_utf8)
 696 | 		{
 697 | 		  const unsigned char *raw, *p, *q, *end;
 698 | 
 699 | 		  /* Special case UTF-8.  Multi-byte chars start with any
 700 | 		     byte other than 0x80 - 0xbf.  */
 701 | 		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
 702 | 		  end = raw + (offset - pstr->mb_cur_max);
 703 | 		  if (end < pstr->raw_mbs)
 704 | 		    end = pstr->raw_mbs;
 705 | 		  p = raw + offset - 1;
 706 | #ifdef _LIBC
 707 | 		  /* We know the wchar_t encoding is UCS4, so for the simple
 708 | 		     case, ASCII characters, skip the conversion step.  */
 709 | 		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
 710 | 		    {
 711 | 		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
 712 | 		      /* pstr->valid_len = 0; */
 713 | 		      wc = (wchar_t) *p;
 714 | 		    }
 715 | 		  else
 716 | #endif
 717 | 		    for (; p >= end; --p)
 718 | 		      if ((*p & 0xc0) != 0x80)
 719 | 			{
 720 | 			  mbstate_t cur_state;
 721 | 			  wchar_t wc2;
 722 | 			  int mlen = raw + pstr->len - p;
 723 | 			  unsigned char buf[6];
 724 | 			  size_t mbclen;
 725 | 
 726 | 			  q = p;
 727 | 			  if (BE (pstr->trans != NULL, 0))
 728 | 			    {
 729 | 			      int i = mlen < 6 ? mlen : 6;
 730 | 			      while (--i >= 0)
 731 | 				buf[i] = pstr->trans[p[i]];
 732 | 			      q = buf;
 733 | 			    }
 734 | 			  /* XXX Don't use mbrtowc, we know which conversion
 735 | 			     to use (UTF-8 -> UCS4).  */
 736 | 			  memset (&cur_state, 0, sizeof (cur_state));
 737 | 			  mbclen = mbrtowc (&wc2, (const char *) p, mlen,
 738 | 					    &cur_state);
 739 | 			  if (raw + offset - p <= mbclen
 740 | 			      && mbclen < (size_t) -2)
 741 | 			    {
 742 | 			      memset (&pstr->cur_state, '\0',
 743 | 				      sizeof (mbstate_t));
 744 | 			      pstr->valid_len = mbclen - (raw + offset - p);
 745 | 			      wc = wc2;
 746 | 			    }
 747 | 			  break;
 748 | 			}
 749 | 		}
 750 | 
 751 | 	      if (wc == WEOF)
 752 | 		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
 753 | 	      if (wc == WEOF)
 754 | 		pstr->tip_context
 755 | 		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
 756 | 	      else
 757 | 		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
 758 | 				      && IS_WIDE_WORD_CHAR (wc))
 759 | 				     ? CONTEXT_WORD
 760 | 				     : ((IS_WIDE_NEWLINE (wc)
 761 | 					 && pstr->newline_anchor)
 762 | 					? CONTEXT_NEWLINE : 0));
 763 | 	      if (BE (pstr->valid_len, 0))
 764 | 		{
 765 | 		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
 766 | 		    pstr->wcs[wcs_idx] = WEOF;
 767 | 		  if (pstr->mbs_allocated)
 768 | 		    memset (pstr->mbs, 255, pstr->valid_len);
 769 | 		}
 770 | 	      pstr->valid_raw_len = pstr->valid_len;
 771 | 	    }
 772 | 	  else
 773 | #endif /* RE_ENABLE_I18N */
 774 | 	    {
 775 | 	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
 776 | 	      pstr->valid_raw_len = 0;
 777 | 	      if (pstr->trans)
 778 | 		c = pstr->trans[c];
 779 | 	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
 780 | 				   ? CONTEXT_WORD
 781 | 				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
 782 | 				      ? CONTEXT_NEWLINE : 0));
 783 | 	    }
 784 | 	}
 785 |       if (!BE (pstr->mbs_allocated, 0))
 786 | 	pstr->mbs += offset;
 787 |     }
 788 |   pstr->raw_mbs_idx = idx;
 789 |   pstr->len -= offset;
 790 |   pstr->stop -= offset;
 791 | 
 792 |   /* Then build the buffers.  */
 793 | #ifdef RE_ENABLE_I18N
 794 |   if (pstr->mb_cur_max > 1)
 795 |     {
 796 |       if (pstr->icase)
 797 | 	{
 798 | 	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
 799 | 	  if (BE (ret != REG_NOERROR, 0))
 800 | 	    return ret;
 801 | 	}
 802 |       else
 803 | 	build_wcs_buffer (pstr);
 804 |     }
 805 |   else
 806 | #endif /* RE_ENABLE_I18N */
 807 |     if (BE (pstr->mbs_allocated, 0))
 808 |       {
 809 | 	if (pstr->icase)
 810 | 	  build_upper_buffer (pstr);
 811 | 	else if (pstr->trans != NULL)
 812 | 	  re_string_translate_buffer (pstr);
 813 |       }
 814 |     else
 815 |       pstr->valid_len = pstr->len;
 816 | 
 817 |   pstr->cur_idx = 0;
 818 |   return REG_NOERROR;
 819 | }
 820 | 
 821 | static unsigned char
 822 | internal_function __attribute ((pure))
 823 | re_string_peek_byte_case (const re_string_t *pstr, int idx)
 824 | {
 825 |   int ch, off;
 826 | 
 827 |   /* Handle the common (easiest) cases first.  */
 828 |   if (BE (!pstr->mbs_allocated, 1))
 829 |     return re_string_peek_byte (pstr, idx);
 830 | 
 831 | #ifdef RE_ENABLE_I18N
 832 |   if (pstr->mb_cur_max > 1
 833 |       && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
 834 |     return re_string_peek_byte (pstr, idx);
 835 | #endif
 836 | 
 837 |   off = pstr->cur_idx + idx;
 838 | #ifdef RE_ENABLE_I18N
 839 |   if (pstr->offsets_needed)
 840 |     off = pstr->offsets[off];
 841 | #endif
 842 | 
 843 |   ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
 844 | 
 845 | #ifdef RE_ENABLE_I18N
 846 |   /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
 847 |      this function returns CAPITAL LETTER I instead of first byte of
 848 |      DOTLESS SMALL LETTER I.  The latter would confuse the parser,
 849 |      since peek_byte_case doesn't advance cur_idx in any way.  */
 850 |   if (pstr->offsets_needed && !isascii (ch))
 851 |     return re_string_peek_byte (pstr, idx);
 852 | #endif
 853 | 
 854 |   return ch;
 855 | }
 856 | 
 857 | static unsigned char
 858 | internal_function __attribute ((pure))
 859 | re_string_fetch_byte_case (re_string_t *pstr)
 860 | {
 861 |   if (BE (!pstr->mbs_allocated, 1))
 862 |     return re_string_fetch_byte (pstr);
 863 | 
 864 | #ifdef RE_ENABLE_I18N
 865 |   if (pstr->offsets_needed)
 866 |     {
 867 |       int off, ch;
 868 | 
 869 |       /* For tr_TR.UTF-8 [[:islower:]] there is
 870 | 	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
 871 | 	 in that case the whole multi-byte character and return
 872 | 	 the original letter.  On the other side, with
 873 | 	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
 874 | 	 anything else would complicate things too much.  */
 875 | 
 876 |       if (!re_string_first_byte (pstr, pstr->cur_idx))
 877 | 	return re_string_fetch_byte (pstr);
 878 | 
 879 |       off = pstr->offsets[pstr->cur_idx];
 880 |       ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
 881 | 
 882 |       if (! isascii (ch))
 883 | 	return re_string_fetch_byte (pstr);
 884 | 
 885 |       re_string_skip_bytes (pstr,
 886 | 			    re_string_char_size_at (pstr, pstr->cur_idx));
 887 |       return ch;
 888 |     }
 889 | #endif
 890 | 
 891 |   return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
 892 | }
 893 | 
 894 | static void
 895 | internal_function
 896 | re_string_destruct (re_string_t *pstr)
 897 | {
 898 | #ifdef RE_ENABLE_I18N
 899 |   re_free (pstr->wcs);
 900 |   re_free (pstr->offsets);
 901 | #endif /* RE_ENABLE_I18N  */
 902 |   if (pstr->mbs_allocated)
 903 |     re_free (pstr->mbs);
 904 | }
 905 | 
 906 | /* Return the context at IDX in INPUT.  */
 907 | 
 908 | static unsigned int
 909 | internal_function
 910 | re_string_context_at (const re_string_t *input, int idx, int eflags)
 911 | {
 912 |   int c;
 913 |   if (BE (idx < 0, 0))
 914 |     /* In this case, we use the value stored in input->tip_context,
 915 |        since we can't know the character in input->mbs[-1] here.  */
 916 |     return input->tip_context;
 917 |   if (BE (idx == input->len, 0))
 918 |     return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
 919 | 	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
 920 | #ifdef RE_ENABLE_I18N
 921 |   if (input->mb_cur_max > 1)
 922 |     {
 923 |       wint_t wc;
 924 |       int wc_idx = idx;
 925 |       while(input->wcs[wc_idx] == WEOF)
 926 | 	{
 927 | #ifdef DEBUG
 928 | 	  /* It must not happen.  */
 929 | 	  assert (wc_idx >= 0);
 930 | #endif
 931 | 	  --wc_idx;
 932 | 	  if (wc_idx < 0)
 933 | 	    return input->tip_context;
 934 | 	}
 935 |       wc = input->wcs[wc_idx];
 936 |       if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
 937 | 	return CONTEXT_WORD;
 938 |       return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
 939 | 	      ? CONTEXT_NEWLINE : 0);
 940 |     }
 941 |   else
 942 | #endif
 943 |     {
 944 |       c = re_string_byte_at (input, idx);
 945 |       if (bitset_contain (input->word_char, c))
 946 | 	return CONTEXT_WORD;
 947 |       return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
 948 |     }
 949 | }
 950 | 
 951 | /* Functions for set operation.  */
 952 | 
 953 | static reg_errcode_t
 954 | internal_function
 955 | re_node_set_alloc (re_node_set *set, int size)
 956 | {
 957 |   set->alloc = size;
 958 |   set->nelem = 0;
 959 |   set->elems = re_malloc (int, size);
 960 |   if (BE (set->elems == NULL, 0))
 961 |     return REG_ESPACE;
 962 |   return REG_NOERROR;
 963 | }
 964 | 
 965 | static reg_errcode_t
 966 | internal_function
 967 | re_node_set_init_1 (re_node_set *set, int elem)
 968 | {
 969 |   set->alloc = 1;
 970 |   set->nelem = 1;
 971 |   set->elems = re_malloc (int, 1);
 972 |   if (BE (set->elems == NULL, 0))
 973 |     {
 974 |       set->alloc = set->nelem = 0;
 975 |       return REG_ESPACE;
 976 |     }
 977 |   set->elems[0] = elem;
 978 |   return REG_NOERROR;
 979 | }
 980 | 
 981 | static reg_errcode_t
 982 | internal_function
 983 | re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
 984 | {
 985 |   set->alloc = 2;
 986 |   set->elems = re_malloc (int, 2);
 987 |   if (BE (set->elems == NULL, 0))
 988 |     return REG_ESPACE;
 989 |   if (elem1 == elem2)
 990 |     {
 991 |       set->nelem = 1;
 992 |       set->elems[0] = elem1;
 993 |     }
 994 |   else
 995 |     {
 996 |       set->nelem = 2;
 997 |       if (elem1 < elem2)
 998 | 	{
 999 | 	  set->elems[0] = elem1;
1000 | 	  set->elems[1] = elem2;
1001 | 	}
1002 |       else
1003 | 	{
1004 | 	  set->elems[0] = elem2;
1005 | 	  set->elems[1] = elem1;
1006 | 	}
1007 |     }
1008 |   return REG_NOERROR;
1009 | }
1010 | 
1011 | static reg_errcode_t
1012 | internal_function
1013 | re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1014 | {
1015 |   dest->nelem = src->nelem;
1016 |   if (src->nelem > 0)
1017 |     {
1018 |       dest->alloc = dest->nelem;
1019 |       dest->elems = re_malloc (int, dest->alloc);
1020 |       if (BE (dest->elems == NULL, 0))
1021 | 	{
1022 | 	  dest->alloc = dest->nelem = 0;
1023 | 	  return REG_ESPACE;
1024 | 	}
1025 |       memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1026 |     }
1027 |   else
1028 |     re_node_set_init_empty (dest);
1029 |   return REG_NOERROR;
1030 | }
1031 | 
1032 | /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1033 |    DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1034 |    Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
1035 | 
1036 | static reg_errcode_t
1037 | internal_function
1038 | re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1039 | 			   const re_node_set *src2)
1040 | {
1041 |   int i1, i2, is, id, delta, sbase;
1042 |   if (src1->nelem == 0 || src2->nelem == 0)
1043 |     return REG_NOERROR;
1044 | 
1045 |   /* We need dest->nelem + 2 * elems_in_intersection; this is a
1046 |      conservative estimate.  */
1047 |   if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1048 |     {
1049 |       int new_alloc = src1->nelem + src2->nelem + dest->alloc;
1050 |       int *new_elems = re_realloc (dest->elems, int, new_alloc);
1051 |       if (BE (new_elems == NULL, 0))
1052 |         return REG_ESPACE;
1053 |       dest->elems = new_elems;
1054 |       dest->alloc = new_alloc;
1055 |     }
1056 | 
1057 |   /* Find the items in the intersection of SRC1 and SRC2, and copy
1058 |      into the top of DEST those that are not already in DEST itself.  */
1059 |   sbase = dest->nelem + src1->nelem + src2->nelem;
1060 |   i1 = src1->nelem - 1;
1061 |   i2 = src2->nelem - 1;
1062 |   id = dest->nelem - 1;
1063 |   for (;;)
1064 |     {
1065 |       if (src1->elems[i1] == src2->elems[i2])
1066 | 	{
1067 | 	  /* Try to find the item in DEST.  Maybe we could binary search?  */
1068 | 	  while (id >= 0 && dest->elems[id] > src1->elems[i1])
1069 | 	    --id;
1070 | 
1071 |           if (id < 0 || dest->elems[id] != src1->elems[i1])
1072 |             dest->elems[--sbase] = src1->elems[i1];
1073 | 
1074 | 	  if (--i1 < 0 || --i2 < 0)
1075 | 	    break;
1076 | 	}
1077 | 
1078 |       /* Lower the highest of the two items.  */
1079 |       else if (src1->elems[i1] < src2->elems[i2])
1080 | 	{
1081 | 	  if (--i2 < 0)
1082 | 	    break;
1083 | 	}
1084 |       else
1085 | 	{
1086 | 	  if (--i1 < 0)
1087 | 	    break;
1088 | 	}
1089 |     }
1090 | 
1091 |   id = dest->nelem - 1;
1092 |   is = dest->nelem + src1->nelem + src2->nelem - 1;
1093 |   delta = is - sbase + 1;
1094 | 
1095 |   /* Now copy.  When DELTA becomes zero, the remaining
1096 |      DEST elements are already in place; this is more or
1097 |      less the same loop that is in re_node_set_merge.  */
1098 |   dest->nelem += delta;
1099 |   if (delta > 0 && id >= 0)
1100 |     for (;;)
1101 |       {
1102 |         if (dest->elems[is] > dest->elems[id])
1103 |           {
1104 |             /* Copy from the top.  */
1105 |             dest->elems[id + delta--] = dest->elems[is--];
1106 |             if (delta == 0)
1107 |               break;
1108 |           }
1109 |         else
1110 |           {
1111 |             /* Slide from the bottom.  */
1112 |             dest->elems[id + delta] = dest->elems[id];
1113 |             if (--id < 0)
1114 |               break;
1115 |           }
1116 |       }
1117 | 
1118 |   /* Copy remaining SRC elements.  */
1119 |   memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
1120 | 
1121 |   return REG_NOERROR;
1122 | }
1123 | 
1124 | /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1125 |    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
1126 | 
1127 | static reg_errcode_t
1128 | internal_function
1129 | re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1130 | 			const re_node_set *src2)
1131 | {
1132 |   int i1, i2, id;
1133 |   if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1134 |     {
1135 |       dest->alloc = src1->nelem + src2->nelem;
1136 |       dest->elems = re_malloc (int, dest->alloc);
1137 |       if (BE (dest->elems == NULL, 0))
1138 | 	return REG_ESPACE;
1139 |     }
1140 |   else
1141 |     {
1142 |       if (src1 != NULL && src1->nelem > 0)
1143 | 	return re_node_set_init_copy (dest, src1);
1144 |       else if (src2 != NULL && src2->nelem > 0)
1145 | 	return re_node_set_init_copy (dest, src2);
1146 |       else
1147 | 	re_node_set_init_empty (dest);
1148 |       return REG_NOERROR;
1149 |     }
1150 |   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1151 |     {
1152 |       if (src1->elems[i1] > src2->elems[i2])
1153 | 	{
1154 | 	  dest->elems[id++] = src2->elems[i2++];
1155 | 	  continue;
1156 | 	}
1157 |       if (src1->elems[i1] == src2->elems[i2])
1158 | 	++i2;
1159 |       dest->elems[id++] = src1->elems[i1++];
1160 |     }
1161 |   if (i1 < src1->nelem)
1162 |     {
1163 |       memcpy (dest->elems + id, src1->elems + i1,
1164 | 	     (src1->nelem - i1) * sizeof (int));
1165 |       id += src1->nelem - i1;
1166 |     }
1167 |   else if (i2 < src2->nelem)
1168 |     {
1169 |       memcpy (dest->elems + id, src2->elems + i2,
1170 | 	     (src2->nelem - i2) * sizeof (int));
1171 |       id += src2->nelem - i2;
1172 |     }
1173 |   dest->nelem = id;
1174 |   return REG_NOERROR;
1175 | }
1176 | 
1177 | /* Calculate the union set of the sets DEST and SRC. And store it to
1178 |    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
1179 | 
1180 | static reg_errcode_t
1181 | internal_function
1182 | re_node_set_merge (re_node_set *dest, const re_node_set *src)
1183 | {
1184 |   int is, id, sbase, delta;
1185 |   if (src == NULL || src->nelem == 0)
1186 |     return REG_NOERROR;
1187 |   if (dest->alloc < 2 * src->nelem + dest->nelem)
1188 |     {
1189 |       int new_alloc = 2 * (src->nelem + dest->alloc);
1190 |       int *new_buffer = re_realloc (dest->elems, int, new_alloc);
1191 |       if (BE (new_buffer == NULL, 0))
1192 | 	return REG_ESPACE;
1193 |       dest->elems = new_buffer;
1194 |       dest->alloc = new_alloc;
1195 |     }
1196 | 
1197 |   if (BE (dest->nelem == 0, 0))
1198 |     {
1199 |       dest->nelem = src->nelem;
1200 |       memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1201 |       return REG_NOERROR;
1202 |     }
1203 | 
1204 |   /* Copy into the top of DEST the items of SRC that are not
1205 |      found in DEST.  Maybe we could binary search in DEST?  */
1206 |   for (sbase = dest->nelem + 2 * src->nelem,
1207 |        is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1208 |     {
1209 |       if (dest->elems[id] == src->elems[is])
1210 |         is--, id--;
1211 |       else if (dest->elems[id] < src->elems[is])
1212 |         dest->elems[--sbase] = src->elems[is--];
1213 |       else /* if (dest->elems[id] > src->elems[is]) */
1214 |         --id;
1215 |     }
1216 | 
1217 |   if (is >= 0)
1218 |     {
1219 |       /* If DEST is exhausted, the remaining items of SRC must be unique.  */
1220 |       sbase -= is + 1;
1221 |       memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
1222 |     }
1223 | 
1224 |   id = dest->nelem - 1;
1225 |   is = dest->nelem + 2 * src->nelem - 1;
1226 |   delta = is - sbase + 1;
1227 |   if (delta == 0)
1228 |     return REG_NOERROR;
1229 | 
1230 |   /* Now copy.  When DELTA becomes zero, the remaining
1231 |      DEST elements are already in place.  */
1232 |   dest->nelem += delta;
1233 |   for (;;)
1234 |     {
1235 |       if (dest->elems[is] > dest->elems[id])
1236 |         {
1237 | 	  /* Copy from the top.  */
1238 |           dest->elems[id + delta--] = dest->elems[is--];
1239 | 	  if (delta == 0)
1240 | 	    break;
1241 | 	}
1242 |       else
1243 |         {
1244 |           /* Slide from the bottom.  */
1245 |           dest->elems[id + delta] = dest->elems[id];
1246 | 	  if (--id < 0)
1247 | 	    {
1248 | 	      /* Copy remaining SRC elements.  */
1249 | 	      memcpy (dest->elems, dest->elems + sbase,
1250 | 	              delta * sizeof (int));
1251 | 	      break;
1252 | 	    }
1253 | 	}
1254 |     }
1255 | 
1256 |   return REG_NOERROR;
1257 | }
1258 | 
1259 | /* Insert the new element ELEM to the re_node_set* SET.
1260 |    SET should not already have ELEM.
1261 |    return -1 if an error is occured, return 1 otherwise.  */
1262 | 
1263 | static int
1264 | internal_function
1265 | re_node_set_insert (re_node_set *set, int elem)
1266 | {
1267 |   int idx;
1268 |   /* In case the set is empty.  */
1269 |   if (set->alloc == 0)
1270 |     {
1271 |       if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1272 | 	return 1;
1273 |       else
1274 | 	return -1;
1275 |     }
1276 | 
1277 |   if (BE (set->nelem, 0) == 0)
1278 |     {
1279 |       /* We already guaranteed above that set->alloc != 0.  */
1280 |       set->elems[0] = elem;
1281 |       ++set->nelem;
1282 |       return 1;
1283 |     }
1284 | 
1285 |   /* Realloc if we need.  */
1286 |   if (set->alloc == set->nelem)
1287 |     {
1288 |       int *new_elems;
1289 |       set->alloc = set->alloc * 2;
1290 |       new_elems = re_realloc (set->elems, int, set->alloc);
1291 |       if (BE (new_elems == NULL, 0))
1292 | 	return -1;
1293 |       set->elems = new_elems;
1294 |     }
1295 | 
1296 |   /* Move the elements which follows the new element.  Test the
1297 |      first element separately to skip a check in the inner loop.  */
1298 |   if (elem < set->elems[0])
1299 |     {
1300 |       idx = 0;
1301 |       for (idx = set->nelem; idx > 0; idx--)
1302 |         set->elems[idx] = set->elems[idx - 1];
1303 |     }
1304 |   else
1305 |     {
1306 |       for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1307 |         set->elems[idx] = set->elems[idx - 1];
1308 |     }
1309 | 
1310 |   /* Insert the new element.  */
1311 |   set->elems[idx] = elem;
1312 |   ++set->nelem;
1313 |   return 1;
1314 | }
1315 | 
1316 | /* Insert the new element ELEM to the re_node_set* SET.
1317 |    SET should not already have any element greater than or equal to ELEM.
1318 |    Return -1 if an error is occured, return 1 otherwise.  */
1319 | 
1320 | static int
1321 | internal_function
1322 | re_node_set_insert_last (re_node_set *set, int elem)
1323 | {
1324 |   /* Realloc if we need.  */
1325 |   if (set->alloc == set->nelem)
1326 |     {
1327 |       int *new_elems;
1328 |       set->alloc = (set->alloc + 1) * 2;
1329 |       new_elems = re_realloc (set->elems, int, set->alloc);
1330 |       if (BE (new_elems == NULL, 0))
1331 | 	return -1;
1332 |       set->elems = new_elems;
1333 |     }
1334 | 
1335 |   /* Insert the new element.  */
1336 |   set->elems[set->nelem++] = elem;
1337 |   return 1;
1338 | }
1339 | 
1340 | /* Compare two node sets SET1 and SET2.
1341 |    return 1 if SET1 and SET2 are equivalent, return 0 otherwise.  */
1342 | 
1343 | static int
1344 | internal_function __attribute ((pure))
1345 | re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1346 | {
1347 |   int i;
1348 |   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1349 |     return 0;
1350 |   for (i = set1->nelem ; --i >= 0 ; )
1351 |     if (set1->elems[i] != set2->elems[i])
1352 |       return 0;
1353 |   return 1;
1354 | }
1355 | 
1356 | /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
1357 | 
1358 | static int
1359 | internal_function __attribute ((pure))
1360 | re_node_set_contains (const re_node_set *set, int elem)
1361 | {
1362 |   unsigned int idx, right, mid;
1363 |   if (set->nelem <= 0)
1364 |     return 0;
1365 | 
1366 |   /* Binary search the element.  */
1367 |   idx = 0;
1368 |   right = set->nelem - 1;
1369 |   while (idx < right)
1370 |     {
1371 |       mid = (idx + right) / 2;
1372 |       if (set->elems[mid] < elem)
1373 | 	idx = mid + 1;
1374 |       else
1375 | 	right = mid;
1376 |     }
1377 |   return set->elems[idx] == elem ? idx + 1 : 0;
1378 | }
1379 | 
1380 | static void
1381 | internal_function
1382 | re_node_set_remove_at (re_node_set *set, int idx)
1383 | {
1384 |   if (idx < 0 || idx >= set->nelem)
1385 |     return;
1386 |   --set->nelem;
1387 |   for (; idx < set->nelem; idx++)
1388 |     set->elems[idx] = set->elems[idx + 1];
1389 | }
1390 | 
1391 | 
1392 | /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1393 |    Or return -1, if an error will be occured.  */
1394 | 
1395 | static int
1396 | internal_function
1397 | re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1398 | {
1399 |   int type = token.type;
1400 |   if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1401 |     {
1402 |       size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1403 |       int *new_nexts, *new_indices;
1404 |       re_node_set *new_edests, *new_eclosures;
1405 |       re_token_t *new_nodes;
1406 | 
1407 |       /* Avoid overflows.  */
1408 |       if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
1409 | 	return -1;
1410 | 
1411 |       new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1412 |       if (BE (new_nodes == NULL, 0))
1413 | 	return -1;
1414 |       dfa->nodes = new_nodes;
1415 |       new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
1416 |       new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
1417 |       new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1418 |       new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1419 |       if (BE (new_nexts == NULL || new_indices == NULL
1420 | 	      || new_edests == NULL || new_eclosures == NULL, 0))
1421 | 	return -1;
1422 |       dfa->nexts = new_nexts;
1423 |       dfa->org_indices = new_indices;
1424 |       dfa->edests = new_edests;
1425 |       dfa->eclosures = new_eclosures;
1426 |       dfa->nodes_alloc = new_nodes_alloc;
1427 |     }
1428 |   dfa->nodes[dfa->nodes_len] = token;
1429 |   dfa->nodes[dfa->nodes_len].constraint = 0;
1430 | #ifdef RE_ENABLE_I18N
1431 |   dfa->nodes[dfa->nodes_len].accept_mb =
1432 |     (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1433 | #endif
1434 |   dfa->nexts[dfa->nodes_len] = -1;
1435 |   re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1436 |   re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1437 |   return dfa->nodes_len++;
1438 | }
1439 | 
1440 | static inline unsigned int
1441 | internal_function
1442 | calc_state_hash (const re_node_set *nodes, unsigned int context)
1443 | {
1444 |   unsigned int hash = nodes->nelem + context;
1445 |   int i;
1446 |   for (i = 0 ; i < nodes->nelem ; i++)
1447 |     hash += nodes->elems[i];
1448 |   return hash;
1449 | }
1450 | 
1451 | /* Search for the state whose node_set is equivalent to NODES.
1452 |    Return the pointer to the state, if we found it in the DFA.
1453 |    Otherwise create the new one and return it.  In case of an error
1454 |    return NULL and set the error code in ERR.
1455 |    Note: - We assume NULL as the invalid state, then it is possible that
1456 | 	   return value is NULL and ERR is REG_NOERROR.
1457 | 	 - We never return non-NULL value in case of any errors, it is for
1458 | 	   optimization.  */
1459 | 
1460 | static re_dfastate_t *
1461 | internal_function
1462 | re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1463 | 		  const re_node_set *nodes)
1464 | {
1465 |   unsigned int hash;
1466 |   re_dfastate_t *new_state;
1467 |   struct re_state_table_entry *spot;
1468 |   int i;
1469 |   if (BE (nodes->nelem == 0, 0))
1470 |     {
1471 |       *err = REG_NOERROR;
1472 |       return NULL;
1473 |     }
1474 |   hash = calc_state_hash (nodes, 0);
1475 |   spot = dfa->state_table + (hash & dfa->state_hash_mask);
1476 | 
1477 |   for (i = 0 ; i < spot->num ; i++)
1478 |     {
1479 |       re_dfastate_t *state = spot->array[i];
1480 |       if (hash != state->hash)
1481 | 	continue;
1482 |       if (re_node_set_compare (&state->nodes, nodes))
1483 | 	return state;
1484 |     }
1485 | 
1486 |   /* There are no appropriate state in the dfa, create the new one.  */
1487 |   new_state = create_ci_newstate (dfa, nodes, hash);
1488 |   if (BE (new_state == NULL, 0))
1489 |     *err = REG_ESPACE;
1490 | 
1491 |   return new_state;
1492 | }
1493 | 
1494 | /* Search for the state whose node_set is equivalent to NODES and
1495 |    whose context is equivalent to CONTEXT.
1496 |    Return the pointer to the state, if we found it in the DFA.
1497 |    Otherwise create the new one and return it.  In case of an error
1498 |    return NULL and set the error code in ERR.
1499 |    Note: - We assume NULL as the invalid state, then it is possible that
1500 | 	   return value is NULL and ERR is REG_NOERROR.
1501 | 	 - We never return non-NULL value in case of any errors, it is for
1502 | 	   optimization.  */
1503 | 
1504 | static re_dfastate_t *
1505 | internal_function
1506 | re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1507 | 			  const re_node_set *nodes, unsigned int context)
1508 | {
1509 |   unsigned int hash;
1510 |   re_dfastate_t *new_state;
1511 |   struct re_state_table_entry *spot;
1512 |   int i;
1513 |   if (nodes->nelem == 0)
1514 |     {
1515 |       *err = REG_NOERROR;
1516 |       return NULL;
1517 |     }
1518 |   hash = calc_state_hash (nodes, context);
1519 |   spot = dfa->state_table + (hash & dfa->state_hash_mask);
1520 | 
1521 |   for (i = 0 ; i < spot->num ; i++)
1522 |     {
1523 |       re_dfastate_t *state = spot->array[i];
1524 |       if (state->hash == hash
1525 | 	  && state->context == context
1526 | 	  && re_node_set_compare (state->entrance_nodes, nodes))
1527 | 	return state;
1528 |     }
1529 |   /* There are no appropriate state in `dfa', create the new one.  */
1530 |   new_state = create_cd_newstate (dfa, nodes, context, hash);
1531 |   if (BE (new_state == NULL, 0))
1532 |     *err = REG_ESPACE;
1533 | 
1534 |   return new_state;
1535 | }
1536 | 
1537 | /* Finish initialization of the new state NEWSTATE, and using its hash value
1538 |    HASH put in the appropriate bucket of DFA's state table.  Return value
1539 |    indicates the error code if failed.  */
1540 | 
1541 | static reg_errcode_t
1542 | register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1543 | 		unsigned int hash)
1544 | {
1545 |   struct re_state_table_entry *spot;
1546 |   reg_errcode_t err;
1547 |   int i;
1548 | 
1549 |   newstate->hash = hash;
1550 |   err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1551 |   if (BE (err != REG_NOERROR, 0))
1552 |     return REG_ESPACE;
1553 |   for (i = 0; i < newstate->nodes.nelem; i++)
1554 |     {
1555 |       int elem = newstate->nodes.elems[i];
1556 |       if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1557 |         re_node_set_insert_last (&newstate->non_eps_nodes, elem);
1558 |     }
1559 | 
1560 |   spot = dfa->state_table + (hash & dfa->state_hash_mask);
1561 |   if (BE (spot->alloc <= spot->num, 0))
1562 |     {
1563 |       int new_alloc = 2 * spot->num + 2;
1564 |       re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1565 | 					      new_alloc);
1566 |       if (BE (new_array == NULL, 0))
1567 | 	return REG_ESPACE;
1568 |       spot->array = new_array;
1569 |       spot->alloc = new_alloc;
1570 |     }
1571 |   spot->array[spot->num++] = newstate;
1572 |   return REG_NOERROR;
1573 | }
1574 | 
1575 | static void
1576 | free_state (re_dfastate_t *state)
1577 | {
1578 |   re_node_set_free (&state->non_eps_nodes);
1579 |   re_node_set_free (&state->inveclosure);
1580 |   if (state->entrance_nodes != &state->nodes)
1581 |     {
1582 |       re_node_set_free (state->entrance_nodes);
1583 |       re_free (state->entrance_nodes);
1584 |     }
1585 |   re_node_set_free (&state->nodes);
1586 |   re_free (state->word_trtable);
1587 |   re_free (state->trtable);
1588 |   re_free (state);
1589 | }
1590 | 
1591 | /* Create the new state which is independ of contexts.
1592 |    Return the new state if succeeded, otherwise return NULL.  */
1593 | 
1594 | static re_dfastate_t *
1595 | internal_function
1596 | create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1597 | 		    unsigned int hash)
1598 | {
1599 |   int i;
1600 |   reg_errcode_t err;
1601 |   re_dfastate_t *newstate;
1602 | 
1603 |   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1604 |   if (BE (newstate == NULL, 0))
1605 |     return NULL;
1606 |   err = re_node_set_init_copy (&newstate->nodes, nodes);
1607 |   if (BE (err != REG_NOERROR, 0))
1608 |     {
1609 |       re_free (newstate);
1610 |       return NULL;
1611 |     }
1612 | 
1613 |   newstate->entrance_nodes = &newstate->nodes;
1614 |   for (i = 0 ; i < nodes->nelem ; i++)
1615 |     {
1616 |       re_token_t *node = dfa->nodes + nodes->elems[i];
1617 |       re_token_type_t type = node->type;
1618 |       if (type == CHARACTER && !node->constraint)
1619 | 	continue;
1620 | #ifdef RE_ENABLE_I18N
1621 |       newstate->accept_mb |= node->accept_mb;
1622 | #endif /* RE_ENABLE_I18N */
1623 | 
1624 |       /* If the state has the halt node, the state is a halt state.  */
1625 |       if (type == END_OF_RE)
1626 | 	newstate->halt = 1;
1627 |       else if (type == OP_BACK_REF)
1628 | 	newstate->has_backref = 1;
1629 |       else if (type == ANCHOR || node->constraint)
1630 | 	newstate->has_constraint = 1;
1631 |     }
1632 |   err = register_state (dfa, newstate, hash);
1633 |   if (BE (err != REG_NOERROR, 0))
1634 |     {
1635 |       free_state (newstate);
1636 |       newstate = NULL;
1637 |     }
1638 |   return newstate;
1639 | }
1640 | 
1641 | /* Create the new state which is depend on the context CONTEXT.
1642 |    Return the new state if succeeded, otherwise return NULL.  */
1643 | 
1644 | static re_dfastate_t *
1645 | internal_function
1646 | create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1647 | 		    unsigned int context, unsigned int hash)
1648 | {
1649 |   int i, nctx_nodes = 0;
1650 |   reg_errcode_t err;
1651 |   re_dfastate_t *newstate;
1652 | 
1653 |   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1654 |   if (BE (newstate == NULL, 0))
1655 |     return NULL;
1656 |   err = re_node_set_init_copy (&newstate->nodes, nodes);
1657 |   if (BE (err != REG_NOERROR, 0))
1658 |     {
1659 |       re_free (newstate);
1660 |       return NULL;
1661 |     }
1662 | 
1663 |   newstate->context = context;
1664 |   newstate->entrance_nodes = &newstate->nodes;
1665 | 
1666 |   for (i = 0 ; i < nodes->nelem ; i++)
1667 |     {
1668 |       unsigned int constraint = 0;
1669 |       re_token_t *node = dfa->nodes + nodes->elems[i];
1670 |       re_token_type_t type = node->type;
1671 |       if (node->constraint)
1672 | 	constraint = node->constraint;
1673 | 
1674 |       if (type == CHARACTER && !constraint)
1675 | 	continue;
1676 | #ifdef RE_ENABLE_I18N
1677 |       newstate->accept_mb |= node->accept_mb;
1678 | #endif /* RE_ENABLE_I18N */
1679 | 
1680 |       /* If the state has the halt node, the state is a halt state.  */
1681 |       if (type == END_OF_RE)
1682 | 	newstate->halt = 1;
1683 |       else if (type == OP_BACK_REF)
1684 | 	newstate->has_backref = 1;
1685 |       else if (type == ANCHOR)
1686 | 	constraint = node->opr.ctx_type;
1687 | 
1688 |       if (constraint)
1689 | 	{
1690 | 	  if (newstate->entrance_nodes == &newstate->nodes)
1691 | 	    {
1692 | 	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
1693 | 	      if (BE (newstate->entrance_nodes == NULL, 0))
1694 | 		{
1695 | 		  free_state (newstate);
1696 | 		  return NULL;
1697 | 		}
1698 | 	      re_node_set_init_copy (newstate->entrance_nodes, nodes);
1699 | 	      nctx_nodes = 0;
1700 | 	      newstate->has_constraint = 1;
1701 | 	    }
1702 | 
1703 | 	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1704 | 	    {
1705 | 	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1706 | 	      ++nctx_nodes;
1707 | 	    }
1708 | 	}
1709 |     }
1710 |   err = register_state (dfa, newstate, hash);
1711 |   if (BE (err != REG_NOERROR, 0))
1712 |     {
1713 |       free_state (newstate);
1714 |       newstate = NULL;
1715 |     }
1716 |   return  newstate;
1717 | }
1718 | 


--------------------------------------------------------------------------------
/regex/regex_internal.h:
--------------------------------------------------------------------------------
  1 | /* Extended regular expression matching and search library.
  2 |    Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
  3 |    This file is part of the GNU C Library.
  4 |    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
  5 | 
  6 |    The GNU C Library is free software; you can redistribute it and/or
  7 |    modify it under the terms of the GNU Lesser General Public
  8 |    License as published by the Free Software Foundation; either
  9 |    version 2.1 of the License, or (at your option) any later version.
 10 | 
 11 |    The GNU C Library is distributed in the hope that it will be useful,
 12 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 14 |    Lesser General Public License for more details.
 15 | 
 16 |    You should have received a copy of the GNU Lesser General Public
 17 |    License along with the GNU C Library; if not, write to the Free
 18 |    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 19 |    02111-1307 USA.  */
 20 | 
 21 | #ifndef _REGEX_INTERNAL_H
 22 | #define _REGEX_INTERNAL_H 1
 23 | 
 24 | #include <assert.h>
 25 | #include <ctype.h>
 26 | #include <stdio.h>
 27 | #include <stdlib.h>
 28 | #include <string.h>
 29 | 
 30 | #if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
 31 | # include <langinfo.h>
 32 | #endif
 33 | #if defined HAVE_LOCALE_H || defined _LIBC
 34 | # include <locale.h>
 35 | #endif
 36 | #if defined HAVE_WCHAR_H || defined _LIBC
 37 | # include <wchar.h>
 38 | #endif /* HAVE_WCHAR_H || _LIBC */
 39 | #if defined HAVE_WCTYPE_H || defined _LIBC
 40 | # include <wctype.h>
 41 | #endif /* HAVE_WCTYPE_H || _LIBC */
 42 | #if defined HAVE_STDBOOL_H || defined _LIBC
 43 | # include <stdbool.h>
 44 | #endif /* HAVE_STDBOOL_H || _LIBC */
 45 | #if defined HAVE_STDINT_H || defined _LIBC
 46 | # include <stdint.h>
 47 | #endif /* HAVE_STDINT_H || _LIBC */
 48 | #if defined _LIBC
 49 | # include <bits/libc-lock.h>
 50 | #else
 51 | # define __libc_lock_define(CLASS,NAME)
 52 | # define __libc_lock_init(NAME) do { } while (0)
 53 | # define __libc_lock_lock(NAME) do { } while (0)
 54 | # define __libc_lock_unlock(NAME) do { } while (0)
 55 | #endif
 56 | 
 57 | /* In case that the system doesn't have isblank().  */
 58 | #if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
 59 | # define isblank(ch) ((ch) == ' ' || (ch) == '\t')
 60 | #endif
 61 | 
 62 | #ifdef _LIBC
 63 | # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
 64 | #  define _RE_DEFINE_LOCALE_FUNCTIONS 1
 65 | #   include <locale/localeinfo.h>
 66 | #   include <locale/elem-hash.h>
 67 | #   include <locale/coll-lookup.h>
 68 | # endif
 69 | #endif
 70 | 
 71 | /* This is for other GNU distributions with internationalized messages.  */
 72 | #if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
 73 | # include <libintl.h>
 74 | # ifdef _LIBC
 75 | #  undef gettext
 76 | #  define gettext(msgid) \
 77 |   INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
 78 | # endif
 79 | #else
 80 | # define gettext(msgid) (msgid)
 81 | #endif
 82 | 
 83 | #ifndef gettext_noop
 84 | /* This define is so xgettext can find the internationalizable
 85 |    strings.  */
 86 | # define gettext_noop(String) String
 87 | #endif
 88 | 
 89 | /* For loser systems without the definition.  */
 90 | #ifndef SIZE_MAX
 91 | # define SIZE_MAX ((size_t) -1)
 92 | #endif
 93 | 
 94 | #if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
 95 | # define RE_ENABLE_I18N
 96 | #endif
 97 | 
 98 | #if __GNUC__ >= 3
 99 | # define BE(expr, val) __builtin_expect (expr, val)
100 | #else
101 | # define BE(expr, val) (expr)
102 | # define inline
103 | #endif
104 | 
105 | /* Number of single byte character.  */
106 | #define SBC_MAX 256
107 | 
108 | #define COLL_ELEM_LEN_MAX 8
109 | 
110 | /* The character which represents newline.  */
111 | #define NEWLINE_CHAR '\n'
112 | #define WIDE_NEWLINE_CHAR L'\n'
113 | 
114 | /* Rename to standard API for using out of glibc.  */
115 | #ifndef _LIBC
116 | # define __wctype wctype
117 | # define __iswctype iswctype
118 | # define __btowc btowc
119 | # define __mempcpy mempcpy
120 | # define __wcrtomb wcrtomb
121 | # define __regfree regfree
122 | # define attribute_hidden
123 | #endif /* not _LIBC */
124 | 
125 | #ifdef __GNUC__
126 | # define __attribute(arg) __attribute__ (arg)
127 | #else
128 | # define __attribute(arg)
129 | #endif
130 | 
131 | extern const char __re_error_msgid[] attribute_hidden;
132 | extern const size_t __re_error_msgid_idx[] attribute_hidden;
133 | 
134 | /* An integer used to represent a set of bits.  It must be unsigned,
135 |    and must be at least as wide as unsigned int.  */
136 | typedef unsigned long int bitset_word_t;
137 | /* All bits set in a bitset_word_t.  */
138 | #define BITSET_WORD_MAX ULONG_MAX
139 | /* Number of bits in a bitset_word_t.  */
140 | #define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
141 | /* Number of bitset_word_t in a bit_set.  */
142 | #define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
143 | typedef bitset_word_t bitset_t[BITSET_WORDS];
144 | typedef bitset_word_t *re_bitset_ptr_t;
145 | typedef const bitset_word_t *re_const_bitset_ptr_t;
146 | 
147 | #define bitset_set(set,i) \
148 |   (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
149 | #define bitset_clear(set,i) \
150 |   (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
151 | #define bitset_contain(set,i) \
152 |   (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
153 | #define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
154 | #define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
155 | #define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
156 | 
157 | #define PREV_WORD_CONSTRAINT 0x0001
158 | #define PREV_NOTWORD_CONSTRAINT 0x0002
159 | #define NEXT_WORD_CONSTRAINT 0x0004
160 | #define NEXT_NOTWORD_CONSTRAINT 0x0008
161 | #define PREV_NEWLINE_CONSTRAINT 0x0010
162 | #define NEXT_NEWLINE_CONSTRAINT 0x0020
163 | #define PREV_BEGBUF_CONSTRAINT 0x0040
164 | #define NEXT_ENDBUF_CONSTRAINT 0x0080
165 | #define WORD_DELIM_CONSTRAINT 0x0100
166 | #define NOT_WORD_DELIM_CONSTRAINT 0x0200
167 | 
168 | typedef enum
169 | {
170 |   INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
171 |   WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
172 |   WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
173 |   INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
174 |   LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
175 |   LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
176 |   BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
177 |   BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
178 |   WORD_DELIM = WORD_DELIM_CONSTRAINT,
179 |   NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
180 | } re_context_type;
181 | 
182 | typedef struct
183 | {
184 |   int alloc;
185 |   int nelem;
186 |   int *elems;
187 | } re_node_set;
188 | 
189 | typedef enum
190 | {
191 |   NON_TYPE = 0,
192 | 
193 |   /* Node type, These are used by token, node, tree.  */
194 |   CHARACTER = 1,
195 |   END_OF_RE = 2,
196 |   SIMPLE_BRACKET = 3,
197 |   OP_BACK_REF = 4,
198 |   OP_PERIOD = 5,
199 | #ifdef RE_ENABLE_I18N
200 |   COMPLEX_BRACKET = 6,
201 |   OP_UTF8_PERIOD = 7,
202 | #endif /* RE_ENABLE_I18N */
203 | 
204 |   /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
205 |      when the debugger shows values of this enum type.  */
206 | #define EPSILON_BIT 8
207 |   OP_OPEN_SUBEXP = EPSILON_BIT | 0,
208 |   OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
209 |   OP_ALT = EPSILON_BIT | 2,
210 |   OP_DUP_ASTERISK = EPSILON_BIT | 3,
211 |   ANCHOR = EPSILON_BIT | 4,
212 | 
213 |   /* Tree type, these are used only by tree. */
214 |   CONCAT = 16,
215 |   SUBEXP = 17,
216 | 
217 |   /* Token type, these are used only by token.  */
218 |   OP_DUP_PLUS = 18,
219 |   OP_DUP_QUESTION,
220 |   OP_OPEN_BRACKET,
221 |   OP_CLOSE_BRACKET,
222 |   OP_CHARSET_RANGE,
223 |   OP_OPEN_DUP_NUM,
224 |   OP_CLOSE_DUP_NUM,
225 |   OP_NON_MATCH_LIST,
226 |   OP_OPEN_COLL_ELEM,
227 |   OP_CLOSE_COLL_ELEM,
228 |   OP_OPEN_EQUIV_CLASS,
229 |   OP_CLOSE_EQUIV_CLASS,
230 |   OP_OPEN_CHAR_CLASS,
231 |   OP_CLOSE_CHAR_CLASS,
232 |   OP_WORD,
233 |   OP_NOTWORD,
234 |   OP_SPACE,
235 |   OP_NOTSPACE,
236 |   BACK_SLASH
237 | 
238 | } re_token_type_t;
239 | 
240 | #ifdef RE_ENABLE_I18N
241 | typedef struct
242 | {
243 |   /* Multibyte characters.  */
244 |   wchar_t *mbchars;
245 | 
246 |   /* Collating symbols.  */
247 | # ifdef _LIBC
248 |   int32_t *coll_syms;
249 | # endif
250 | 
251 |   /* Equivalence classes. */
252 | # ifdef _LIBC
253 |   int32_t *equiv_classes;
254 | # endif
255 | 
256 |   /* Range expressions. */
257 | # ifdef _LIBC
258 |   uint32_t *range_starts;
259 |   uint32_t *range_ends;
260 | # else /* not _LIBC */
261 |   wchar_t *range_starts;
262 |   wchar_t *range_ends;
263 | # endif /* not _LIBC */
264 | 
265 |   /* Character classes. */
266 |   wctype_t *char_classes;
267 | 
268 |   /* If this character set is the non-matching list.  */
269 |   unsigned int non_match : 1;
270 | 
271 |   /* # of multibyte characters.  */
272 |   int nmbchars;
273 | 
274 |   /* # of collating symbols.  */
275 |   int ncoll_syms;
276 | 
277 |   /* # of equivalence classes. */
278 |   int nequiv_classes;
279 | 
280 |   /* # of range expressions. */
281 |   int nranges;
282 | 
283 |   /* # of character classes. */
284 |   int nchar_classes;
285 | } re_charset_t;
286 | #endif /* RE_ENABLE_I18N */
287 | 
288 | typedef struct
289 | {
290 |   union
291 |   {
292 |     unsigned char c;		/* for CHARACTER */
293 |     re_bitset_ptr_t sbcset;	/* for SIMPLE_BRACKET */
294 | #ifdef RE_ENABLE_I18N
295 |     re_charset_t *mbcset;	/* for COMPLEX_BRACKET */
296 | #endif /* RE_ENABLE_I18N */
297 |     int idx;			/* for BACK_REF */
298 |     re_context_type ctx_type;	/* for ANCHOR */
299 |   } opr;
300 | #if __GNUC__ >= 2
301 |   re_token_type_t type : 8;
302 | #else
303 |   re_token_type_t type;
304 | #endif
305 |   unsigned int constraint : 10;	/* context constraint */
306 |   unsigned int duplicated : 1;
307 |   unsigned int opt_subexp : 1;
308 | #ifdef RE_ENABLE_I18N
309 |   unsigned int accept_mb : 1;
310 |   /* These 2 bits can be moved into the union if needed (e.g. if running out
311 |      of bits; move opr.c to opr.c.c and move the flags to opr.c.flags).  */
312 |   unsigned int mb_partial : 1;
313 | #endif
314 |   unsigned int word_char : 1;
315 | } re_token_t;
316 | 
317 | #define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
318 | 
319 | struct re_string_t
320 | {
321 |   /* Indicate the raw buffer which is the original string passed as an
322 |      argument of regexec(), re_search(), etc..  */
323 |   const unsigned char *raw_mbs;
324 |   /* Store the multibyte string.  In case of "case insensitive mode" like
325 |      REG_ICASE, upper cases of the string are stored, otherwise MBS points
326 |      the same address that RAW_MBS points.  */
327 |   unsigned char *mbs;
328 | #ifdef RE_ENABLE_I18N
329 |   /* Store the wide character string which is corresponding to MBS.  */
330 |   wint_t *wcs;
331 |   int *offsets;
332 |   mbstate_t cur_state;
333 | #endif
334 |   /* Index in RAW_MBS.  Each character mbs[i] corresponds to
335 |      raw_mbs[raw_mbs_idx + i].  */
336 |   int raw_mbs_idx;
337 |   /* The length of the valid characters in the buffers.  */
338 |   int valid_len;
339 |   /* The corresponding number of bytes in raw_mbs array.  */
340 |   int valid_raw_len;
341 |   /* The length of the buffers MBS and WCS.  */
342 |   int bufs_len;
343 |   /* The index in MBS, which is updated by re_string_fetch_byte.  */
344 |   int cur_idx;
345 |   /* length of RAW_MBS array.  */
346 |   int raw_len;
347 |   /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN.  */
348 |   int len;
349 |   /* End of the buffer may be shorter than its length in the cases such
350 |      as re_match_2, re_search_2.  Then, we use STOP for end of the buffer
351 |      instead of LEN.  */
352 |   int raw_stop;
353 |   /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS.  */
354 |   int stop;
355 | 
356 |   /* The context of mbs[0].  We store the context independently, since
357 |      the context of mbs[0] may be different from raw_mbs[0], which is
358 |      the beginning of the input string.  */
359 |   unsigned int tip_context;
360 |   /* The translation passed as a part of an argument of re_compile_pattern.  */
361 |   RE_TRANSLATE_TYPE trans;
362 |   /* Copy of re_dfa_t's word_char.  */
363 |   re_const_bitset_ptr_t word_char;
364 |   /* 1 if REG_ICASE.  */
365 |   unsigned char icase;
366 |   unsigned char is_utf8;
367 |   unsigned char map_notascii;
368 |   unsigned char mbs_allocated;
369 |   unsigned char offsets_needed;
370 |   unsigned char newline_anchor;
371 |   unsigned char word_ops_used;
372 |   int mb_cur_max;
373 | };
374 | typedef struct re_string_t re_string_t;
375 | 
376 | 
377 | struct re_dfa_t;
378 | typedef struct re_dfa_t re_dfa_t;
379 | 
380 | #ifndef _LIBC
381 | # ifdef __i386__
382 | #  define internal_function   __attribute ((regparm (3), stdcall))
383 | # else
384 | #  define internal_function
385 | # endif
386 | #endif
387 | 
388 | #ifndef NOT_IN_libc
389 | static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
390 | 						int new_buf_len)
391 |      internal_function;
392 | # ifdef RE_ENABLE_I18N
393 | static void build_wcs_buffer (re_string_t *pstr) internal_function;
394 | static int build_wcs_upper_buffer (re_string_t *pstr) internal_function;
395 | # endif /* RE_ENABLE_I18N */
396 | static void build_upper_buffer (re_string_t *pstr) internal_function;
397 | static void re_string_translate_buffer (re_string_t *pstr) internal_function;
398 | static unsigned int re_string_context_at (const re_string_t *input, int idx,
399 | 					  int eflags)
400 |      internal_function __attribute ((pure));
401 | #endif
402 | #define re_string_peek_byte(pstr, offset) \
403 |   ((pstr)->mbs[(pstr)->cur_idx + offset])
404 | #define re_string_fetch_byte(pstr) \
405 |   ((pstr)->mbs[(pstr)->cur_idx++])
406 | #define re_string_first_byte(pstr, idx) \
407 |   ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
408 | #define re_string_is_single_byte_char(pstr, idx) \
409 |   ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
410 | 				|| (pstr)->wcs[(idx) + 1] != WEOF))
411 | #define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
412 | #define re_string_cur_idx(pstr) ((pstr)->cur_idx)
413 | #define re_string_get_buffer(pstr) ((pstr)->mbs)
414 | #define re_string_length(pstr) ((pstr)->len)
415 | #define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
416 | #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
417 | #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
418 | 
419 | #ifdef HAVE_ALLOCA_H
420 |  #include <alloca.h>
421 | #endif
422 | 
423 | #ifndef _LIBC
424 | # if HAVE_ALLOCA
425 | /* The OS usually guarantees only one guard page at the bottom of the stack,
426 |    and a page size can be as small as 4096 bytes.  So we cannot safely
427 |    allocate anything larger than 4096 bytes.  Also care for the possibility
428 |    of a few compiler-allocated temporary stack slots.  */
429 | #  define __libc_use_alloca(n) ((n) < 4032)
430 | # else
431 | /* alloca is implemented with malloc, so just use malloc.  */
432 | #  define __libc_use_alloca(n) 0
433 | # endif
434 | #endif
435 | 
436 | #define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
437 | #define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
438 | #define re_free(p) free (p)
439 | 
440 | struct bin_tree_t
441 | {
442 |   struct bin_tree_t *parent;
443 |   struct bin_tree_t *left;
444 |   struct bin_tree_t *right;
445 |   struct bin_tree_t *first;
446 |   struct bin_tree_t *next;
447 | 
448 |   re_token_t token;
449 | 
450 |   /* `node_idx' is the index in dfa->nodes, if `type' == 0.
451 |      Otherwise `type' indicate the type of this node.  */
452 |   int node_idx;
453 | };
454 | typedef struct bin_tree_t bin_tree_t;
455 | 
456 | #define BIN_TREE_STORAGE_SIZE \
457 |   ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
458 | 
459 | struct bin_tree_storage_t
460 | {
461 |   struct bin_tree_storage_t *next;
462 |   bin_tree_t data[BIN_TREE_STORAGE_SIZE];
463 | };
464 | typedef struct bin_tree_storage_t bin_tree_storage_t;
465 | 
466 | #define CONTEXT_WORD 1
467 | #define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
468 | #define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
469 | #define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
470 | 
471 | #define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
472 | #define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
473 | #define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
474 | #define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
475 | #define IS_ORDINARY_CONTEXT(c) ((c) == 0)
476 | 
477 | #define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
478 | #define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
479 | #define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
480 | #define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
481 | 
482 | #define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
483 |  ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
484 |   || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
485 |   || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
486 |   || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
487 | 
488 | #define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
489 |  ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
490 |   || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
491 |   || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
492 |   || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
493 | 
494 | struct re_dfastate_t
495 | {
496 |   unsigned int hash;
497 |   re_node_set nodes;
498 |   re_node_set non_eps_nodes;
499 |   re_node_set inveclosure;
500 |   re_node_set *entrance_nodes;
501 |   struct re_dfastate_t **trtable, **word_trtable;
502 |   unsigned int context : 4;
503 |   unsigned int halt : 1;
504 |   /* If this state can accept `multi byte'.
505 |      Note that we refer to multibyte characters, and multi character
506 |      collating elements as `multi byte'.  */
507 |   unsigned int accept_mb : 1;
508 |   /* If this state has backreference node(s).  */
509 |   unsigned int has_backref : 1;
510 |   unsigned int has_constraint : 1;
511 | };
512 | typedef struct re_dfastate_t re_dfastate_t;
513 | 
514 | struct re_state_table_entry
515 | {
516 |   int num;
517 |   int alloc;
518 |   re_dfastate_t **array;
519 | };
520 | 
521 | /* Array type used in re_sub_match_last_t and re_sub_match_top_t.  */
522 | 
523 | typedef struct
524 | {
525 |   int next_idx;
526 |   int alloc;
527 |   re_dfastate_t **array;
528 | } state_array_t;
529 | 
530 | /* Store information about the node NODE whose type is OP_CLOSE_SUBEXP.  */
531 | 
532 | typedef struct
533 | {
534 |   int node;
535 |   int str_idx; /* The position NODE match at.  */
536 |   state_array_t path;
537 | } re_sub_match_last_t;
538 | 
539 | /* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
540 |    And information about the node, whose type is OP_CLOSE_SUBEXP,
541 |    corresponding to NODE is stored in LASTS.  */
542 | 
543 | typedef struct
544 | {
545 |   int str_idx;
546 |   int node;
547 |   state_array_t *path;
548 |   int alasts; /* Allocation size of LASTS.  */
549 |   int nlasts; /* The number of LASTS.  */
550 |   re_sub_match_last_t **lasts;
551 | } re_sub_match_top_t;
552 | 
553 | struct re_backref_cache_entry
554 | {
555 |   int node;
556 |   int str_idx;
557 |   int subexp_from;
558 |   int subexp_to;
559 |   char more;
560 |   char unused;
561 |   unsigned short int eps_reachable_subexps_map;
562 | };
563 | 
564 | typedef struct
565 | {
566 |   /* The string object corresponding to the input string.  */
567 |   re_string_t input;
568 | #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
569 |   const re_dfa_t *const dfa;
570 | #else
571 |   const re_dfa_t *dfa;
572 | #endif
573 |   /* EFLAGS of the argument of regexec.  */
574 |   int eflags;
575 |   /* Where the matching ends.  */
576 |   int match_last;
577 |   int last_node;
578 |   /* The state log used by the matcher.  */
579 |   re_dfastate_t **state_log;
580 |   int state_log_top;
581 |   /* Back reference cache.  */
582 |   int nbkref_ents;
583 |   int abkref_ents;
584 |   struct re_backref_cache_entry *bkref_ents;
585 |   int max_mb_elem_len;
586 |   int nsub_tops;
587 |   int asub_tops;
588 |   re_sub_match_top_t **sub_tops;
589 | } re_match_context_t;
590 | 
591 | typedef struct
592 | {
593 |   re_dfastate_t **sifted_states;
594 |   re_dfastate_t **limited_states;
595 |   int last_node;
596 |   int last_str_idx;
597 |   re_node_set limits;
598 | } re_sift_context_t;
599 | 
600 | struct re_fail_stack_ent_t
601 | {
602 |   int idx;
603 |   int node;
604 |   regmatch_t *regs;
605 |   re_node_set eps_via_nodes;
606 | };
607 | 
608 | struct re_fail_stack_t
609 | {
610 |   int num;
611 |   int alloc;
612 |   struct re_fail_stack_ent_t *stack;
613 | };
614 | 
615 | struct re_dfa_t
616 | {
617 |   re_token_t *nodes;
618 |   size_t nodes_alloc;
619 |   size_t nodes_len;
620 |   int *nexts;
621 |   int *org_indices;
622 |   re_node_set *edests;
623 |   re_node_set *eclosures;
624 |   re_node_set *inveclosures;
625 |   struct re_state_table_entry *state_table;
626 |   re_dfastate_t *init_state;
627 |   re_dfastate_t *init_state_word;
628 |   re_dfastate_t *init_state_nl;
629 |   re_dfastate_t *init_state_begbuf;
630 |   bin_tree_t *str_tree;
631 |   bin_tree_storage_t *str_tree_storage;
632 |   re_bitset_ptr_t sb_char;
633 |   int str_tree_storage_idx;
634 | 
635 |   /* number of subexpressions `re_nsub' is in regex_t.  */
636 |   unsigned int state_hash_mask;
637 |   int init_node;
638 |   int nbackref; /* The number of backreference in this dfa.  */
639 | 
640 |   /* Bitmap expressing which backreference is used.  */
641 |   bitset_word_t used_bkref_map;
642 |   bitset_word_t completed_bkref_map;
643 | 
644 |   unsigned int has_plural_match : 1;
645 |   /* If this dfa has "multibyte node", which is a backreference or
646 |      a node which can accept multibyte character or multi character
647 |      collating element.  */
648 |   unsigned int has_mb_node : 1;
649 |   unsigned int is_utf8 : 1;
650 |   unsigned int map_notascii : 1;
651 |   unsigned int word_ops_used : 1;
652 |   int mb_cur_max;
653 |   bitset_t word_char;
654 |   reg_syntax_t syntax;
655 |   int *subexp_map;
656 | #ifdef DEBUG
657 |   char* re_str;
658 | #endif
659 |   __libc_lock_define (, lock)
660 | };
661 | 
662 | #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
663 | #define re_node_set_remove(set,id) \
664 |   (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
665 | #define re_node_set_empty(p) ((p)->nelem = 0)
666 | #define re_node_set_free(set) re_free ((set)->elems)
667 | 
668 | 
669 | typedef enum
670 | {
671 |   SB_CHAR,
672 |   MB_CHAR,
673 |   EQUIV_CLASS,
674 |   COLL_SYM,
675 |   CHAR_CLASS
676 | } bracket_elem_type;
677 | 
678 | typedef struct
679 | {
680 |   bracket_elem_type type;
681 |   union
682 |   {
683 |     unsigned char ch;
684 |     unsigned char *name;
685 |     wchar_t wch;
686 |   } opr;
687 | } bracket_elem_t;
688 | 
689 | 
690 | /* Inline functions for bitset operation.  */
691 | static inline void
692 | bitset_not (bitset_t set)
693 | {
694 |   int bitset_i;
695 |   for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
696 |     set[bitset_i] = ~set[bitset_i];
697 | }
698 | 
699 | static inline void
700 | bitset_merge (bitset_t dest, const bitset_t src)
701 | {
702 |   int bitset_i;
703 |   for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
704 |     dest[bitset_i] |= src[bitset_i];
705 | }
706 | 
707 | static inline void
708 | bitset_mask (bitset_t dest, const bitset_t src)
709 | {
710 |   int bitset_i;
711 |   for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
712 |     dest[bitset_i] &= src[bitset_i];
713 | }
714 | 
715 | #ifdef RE_ENABLE_I18N
716 | /* Inline functions for re_string.  */
717 | static inline int
718 | internal_function __attribute ((pure))
719 | re_string_char_size_at (const re_string_t *pstr, int idx)
720 | {
721 |   int byte_idx;
722 |   if (pstr->mb_cur_max == 1)
723 |     return 1;
724 |   for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
725 |     if (pstr->wcs[idx + byte_idx] != WEOF)
726 |       break;
727 |   return byte_idx;
728 | }
729 | 
730 | static inline wint_t
731 | internal_function __attribute ((pure))
732 | re_string_wchar_at (const re_string_t *pstr, int idx)
733 | {
734 |   if (pstr->mb_cur_max == 1)
735 |     return (wint_t) pstr->mbs[idx];
736 |   return (wint_t) pstr->wcs[idx];
737 | }
738 | 
739 | # ifndef NOT_IN_libc
740 | static int
741 | internal_function __attribute ((pure))
742 | re_string_elem_size_at (const re_string_t *pstr, int idx)
743 | {
744 | #  ifdef _LIBC
745 |   const unsigned char *p, *extra;
746 |   const int32_t *table, *indirect;
747 |   int32_t tmp;
748 | #   include <locale/weight.h>
749 |   uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
750 | 
751 |   if (nrules != 0)
752 |     {
753 |       table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
754 |       extra = (const unsigned char *)
755 | 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
756 |       indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
757 | 						_NL_COLLATE_INDIRECTMB);
758 |       p = pstr->mbs + idx;
759 |       tmp = findidx (&p);
760 |       return p - pstr->mbs - idx;
761 |     }
762 |   else
763 | #  endif /* _LIBC */
764 |     return 1;
765 | }
766 | # endif
767 | #endif /* RE_ENABLE_I18N */
768 | 
769 | #endif /*  _REGEX_INTERNAL_H */
770 | 


--------------------------------------------------------------------------------
/scriptext.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright (c) 2015 Piotr Stolarz
  3 |    scriptext: Various scripting utilities WinDbg extension
  4 | 
  5 |    Distributed under the GNU General Public License (the License)
  6 |    see accompanying file LICENSE for details.
  7 | 
  8 |    This software is distributed WITHOUT ANY WARRANTY; without even the
  9 |    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 10 |    See the License for more information.
 11 |  */
 12 | 
 13 | #include "common.h"
 14 | #include "string.h"
 15 | #include "file.h"
 16 | #include <errno.h>
 17 | 
 18 | /* DLL entry point */
 19 | BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
 20 | {
 21 |     BOOL ret=TRUE;
 22 | 
 23 |     switch(fdwReason)
 24 |     {
 25 |     case DLL_PROCESS_ATTACH:
 26 |         set_tls_i(TlsAlloc());
 27 |         ret = (get_tls_i()!=TLS_OUT_OF_INDEXES);
 28 |         break;
 29 | 
 30 |     case DLL_PROCESS_DETACH:
 31 |         if (get_tls_i()!=TLS_OUT_OF_INDEXES) TlsFree(get_tls_i());
 32 |         break;
 33 |     }
 34 | 
 35 |     return ret;
 36 | }
 37 | 
 38 | /* Extension initialization */
 39 | HRESULT CALLBACK
 40 | DebugExtensionInitialize(PULONG Version, PULONG Flags)
 41 | {
 42 |     *Version = DEBUG_EXTENSION_VERSION(1, 0);
 43 |     *Flags = 0;
 44 | 
 45 |     return S_OK;
 46 | }
 47 | 
 48 | #define MAX_PR_NAME     16
 49 | 
 50 | /* Get pseudo-reg name from flag arg and write it to the buffer 'pc_pr_name'
 51 |    (min MAX_PR_NAME long)
 52 |  */
 53 | static BOOL get_pr_name(const flag_desc_t *p_flags_dsc, char *pc_pr_name)
 54 | {
 55 |     BOOL ret=FALSE;
 56 | 
 57 |     memset(pc_pr_name, 0, MAX_PR_NAME);
 58 |     pc_pr_name[0]='$'; pc_pr_name[1]='t';
 59 | 
 60 |     memcpy(pc_pr_name+2,
 61 |         p_flags_dsc->pc_arg, min(p_flags_dsc->arg_len, MAX_PR_NAME-2));
 62 |     pc_pr_name[MAX_PR_NAME-1]=0;
 63 | 
 64 |     int pr_num = atoi(pc_pr_name+2);
 65 |     if (!(pr_num>=0 && pr_num<20)) {
 66 |         err_dbgprintf("Pseudo-reg number out of the range [0..19].\n");
 67 |         goto finish;
 68 |     }
 69 | 
 70 |     ret=TRUE;
 71 | finish:
 72 |     return ret;
 73 | }
 74 | 
 75 | /* sxtr input/pattern/replacement */
 76 | HRESULT CALLBACK sxtr(PDEBUG_CLIENT4 Client, PCSTR args)
 77 | {
 78 |     HRESULT ret=E_FAIL;
 79 |     set_client(Client);
 80 | 
 81 |     flag_desc_t flags_dsc[] = {{'t', TRUE}, {0}};
 82 |     size_t rd_sz = read_flags(args, flags_dsc);
 83 |     args += rd_sz;
 84 | 
 85 |     char pr_name[MAX_PR_NAME];
 86 |     if (flags_dsc[0].is_pres) {
 87 |         if (!get_pr_name(&flags_dsc[0], pr_name)) goto finish;
 88 |         str_extr(args, pr_name);
 89 |     } else {
 90 |         str_extr(args, NULL);
 91 |     }
 92 | 
 93 |     ret=S_OK;
 94 | finish:
 95 |     return ret;
 96 | }
 97 | 
 98 | /* fopn [-m mode] [-t num] fname */
 99 | HRESULT CALLBACK fopn(PDEBUG_CLIENT4 Client, PCSTR args)
100 | {
101 |     HRESULT ret=E_FAIL;
102 |     set_client(Client);
103 | 
104 |     flag_desc_t flags_dsc[] = {{'m', TRUE}, {'t', TRUE}, {0}};
105 |     size_t rd_sz = read_flags(args, flags_dsc);
106 |     args += rd_sz;
107 | 
108 |     char mode[16] = "r+";
109 |     if (flags_dsc[0].is_pres) {
110 |         memcpy(mode,
111 |             flags_dsc[0].pc_arg, min(flags_dsc[0].arg_len, sizeof(mode)));
112 |         mode[sizeof(mode)-1]=0;
113 |     }
114 | 
115 |     char pr_name[MAX_PR_NAME];
116 |     if (flags_dsc[1].is_pres) {
117 |         if (!get_pr_name(&flags_dsc[1], pr_name)) goto finish;
118 |     } else {
119 |         /* set default pseudo-reg */
120 |         strcpy(pr_name, "$t0");
121 |     }
122 | 
123 |     if (file_open(args, mode, pr_name)) ret=S_OK;
124 | 
125 | finish:
126 |     return ret;
127 | }
128 | 
129 | /* fwrt hndl input */
130 | HRESULT CALLBACK fwrt(PDEBUG_CLIENT4 Client, PCSTR args)
131 | {
132 |     HRESULT ret=E_FAIL;
133 |     set_client(Client);
134 | 
135 |     ULONG64 fh_val;
136 |     if (!get_expression(args, &fh_val, &args)) goto finish;
137 |     for (; *args && isspace(*args); args++);
138 | 
139 |     if (fh_val && file_wrtstr((FILE*)fh_val, args)) ret=S_OK;
140 | 
141 | finish:
142 |     return ret;
143 | }
144 | 
145 | /* frdl hndl */
146 | HRESULT CALLBACK frdl(PDEBUG_CLIENT4 Client, PCSTR args)
147 | {
148 |     HRESULT ret=E_FAIL;
149 |     set_client(Client);
150 | 
151 |     ULONG64 fh_val;
152 |     if (!get_expression(args, &fh_val, &args)) goto finish;
153 | 
154 |     if (fh_val) file_rdln((FILE*)fh_val);
155 | 
156 |     ret=S_OK;
157 | finish:
158 |     return ret;
159 | }
160 | 
161 | /* fcls hndl */
162 | HRESULT CALLBACK fcls(PDEBUG_CLIENT4 Client, PCSTR args)
163 | {
164 |     HRESULT ret=E_FAIL;
165 |     set_client(Client);
166 | 
167 |     ULONG64 fh_val;
168 |     if (!get_expression(args, &fh_val, NULL)) goto finish;
169 | 
170 |     if (!fh_val || fclose((FILE*)fh_val)) {
171 |         err_dbgprintf("File closure error\n");
172 |     } else ret=S_OK;
173 | finish:
174 |     return ret;
175 | }
176 | 
177 | /* help info */
178 | HRESULT CALLBACK help(PDEBUG_CLIENT4 Client, PCSTR args)
179 | {
180 |     set_client(Client);
181 | 
182 |     dbgprintf(
183 | "scriptext: Various scripting utilities\n\n"
184 | "sxtr [-t num] input/pattern/replacement\n"
185 | "    Look for a substring of the input string matching the extended POSIX RE pattern.\n"
186 | "    If the pattern matches, the substring is extracted and modified according to\n"
187 | "    the replacement string. The input string may contain alphanumeric characters\n"
188 | "    plus '_' only, or must be enclosed in '' or \"\". If it's enclosed in [], then\n"
189 | "    the input string specifies an alias name containing a string to process.\n"
190 | "    Delimiter character is recognized as the first one after the input string. All\n"
191 | "    the strings may contain escaped characters.\n"
192 | "    -t: If specified, provides a pseudo-reg $t number where the matching result\n"
193 | "        will be set: 0 - not matched, 1 - matched.\n\n"
194 | "fopn [-m mode] [-t num] fname\n"
195 | "    Open a file with a name fname.\n"
196 | "    -m: Open mode (C standard). \"r+\" by default.\n"
197 | "    -t: Pseudo-reg $t number where a handle of the opened file will be written. In\n"
198 | "        case of opening error zero will be written there. If not specified $t0 is\n"
199 | "        taken.\n"
200 | "fwrt hndl input\n"
201 | "    Write the input string to the file with the handle hndl. If the input string\n"
202 | "    is enclosed in [] then it specifies an alias name containing a string to write.\n"
203 | "    The input string may contain escaped characters.\n"
204 | "frdl hndl\n"
205 | "    Read line from a file with the handle hndl. The file shall be opened for read\n"
206 | "    in the text mode.\n"
207 | "fcls hndl\n"
208 | "    Close a file with a handle hndl.\n\n"
209 | "help\n"
210 | "    Display this help.\n");
211 | 
212 |     return S_OK;
213 | }
214 | 


--------------------------------------------------------------------------------
/scriptext.def:
--------------------------------------------------------------------------------
1 | EXPORTS
2 |     DebugExtensionInitialize
3 |     sxtr
4 |     fopn
5 |     fwrt
6 |     frdl
7 |     fcls
8 |     help
9 | 


--------------------------------------------------------------------------------
/string.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Copyright (c) 2015 Piotr Stolarz
  3 |    scriptext: Various scripting utilities WinDbg extension
  4 | 
  5 |    Distributed under the GNU General Public License (the License)
  6 |    see accompanying file LICENSE for details.
  7 | 
  8 |    This software is distributed WITHOUT ANY WARRANTY; without even the
  9 |    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 10 |    See the License for more information.
 11 |  */
 12 | 
 13 | #include "common.h"
 14 | #include "string.h"
 15 | #include "regex/regex.h"
 16 | 
 17 | #define RE_GROUPS       10
 18 | 
 19 | /* exported; see header for details */
 20 | void str_extr(const char *pc_in, const char *pc_prnm)
 21 | {
 22 |     BOOL b_match=FALSE;
 23 |     IDebugControl2 *DebugControl=NULL;
 24 |     IDebugRegisters2 *DebugRegisters=NULL;
 25 | 
 26 |     char buf[3072];     /* default buffer */
 27 |     char *pc_ebuf=NULL; /* extra buffer */
 28 | 
 29 |     if (get_client()->QueryInterface(
 30 |         __uuidof(IDebugControl2), (void **)&DebugControl)!=S_OK) goto finish;
 31 | 
 32 |     /* parse input arguments */
 33 |     char delim, lstc;
 34 |     char *pc_str=NULL, *pc_aname=NULL;
 35 |     char *pc_pttrn, *pc_replcmt;
 36 | 
 37 |     strncpy(buf, pc_in, sizeof(buf));
 38 |     buf[sizeof(buf)-1]=0;
 39 | 
 40 |     if (buf[0]=='\'' || buf[0]=='"')
 41 |     {
 42 |         /* string in apostrophes  */
 43 |         pc_str = &buf[1];
 44 |         pc_pttrn = pc_str+stresc(pc_str, buf[0], &lstc);
 45 |         if (!lstc || !*pc_str) goto finish;
 46 |     } else
 47 |     if (buf[0]=='[')
 48 |     {
 49 |         /* alias name */
 50 |         pc_aname = &buf[1];
 51 |         pc_pttrn = pc_aname+stresc(pc_aname, ']', &lstc);
 52 |         if (!lstc || !*pc_aname) goto finish;
 53 |     } else
 54 |     {
 55 |         /* string of alphanums + '_' */
 56 |         size_t i;
 57 |         for (i=0, pc_str=&buf[0]; isalnum(pc_str[i]) || pc_str[i]=='_'; i++);
 58 |         pc_pttrn=&pc_str[i];
 59 |         if (!i) goto finish;
 60 |     }
 61 | 
 62 |     /* delimiter char */
 63 |     delim = *pc_pttrn;
 64 |     *pc_pttrn++ = 0;
 65 |     if (!delim || delim=='\\') goto finish;
 66 | 
 67 |     /* RE pattern */
 68 |     pc_replcmt = pc_pttrn+stresc(pc_pttrn, delim, &lstc);
 69 |     if (!*pc_pttrn) goto finish;
 70 | 
 71 |     /* replacement string */
 72 |     if (!lstc) pc_replcmt--;
 73 |     stresc(pc_replcmt);
 74 | 
 75 |     if (pc_aname)
 76 |     {
 77 |         pc_str = pc_replcmt+strlen(pc_replcmt)+1;
 78 |         size_t rem_buf_sz = pc_str-&buf[0];
 79 | 
 80 |         /* get alias val size */
 81 |         ULONG aval_sz;
 82 |         if (DebugControl->GetTextReplacement(
 83 |             pc_aname, 0, NULL, 0, NULL, NULL, 0, &aval_sz)!=S_OK) goto finish;
 84 | 
 85 |         /* read value */
 86 |         aval_sz = RNDUP_DW(aval_sz+1);
 87 |         if (aval_sz > rem_buf_sz) {
 88 |             if (!(pc_str=pc_ebuf=(char*)malloc(aval_sz))) goto finish;
 89 |         }
 90 |         if (DebugControl->GetTextReplacement(
 91 |             pc_aname, 0, NULL, 0, NULL, pc_str, aval_sz, NULL)!=S_OK)
 92 |             goto finish;
 93 | 
 94 |         if (!*pc_str) goto finish;
 95 |     }
 96 | 
 97 |     regex_t re;
 98 |     regmatch_t rms[RE_GROUPS];
 99 | 
100 |     /* compile RE pattern and match with the input string */
101 |     if (regcomp(&re, pc_pttrn, REG_EXTENDED)) goto finish;
102 |     if (!regexec(&re, pc_str, RE_GROUPS, rms, 0))
103 |     {
104 |         b_match=TRUE;
105 | 
106 |         /* print the replacement string */
107 |         size_t i;
108 |         for (i=0; pc_replcmt[i]; i++)
109 |         {
110 |             if (pc_replcmt[i]!='\\') continue;
111 | 
112 |             char esc = pc_replcmt[i+1];
113 |             if (!('0'<=esc && esc<='9')) continue;
114 | 
115 |             /* RE group */
116 |             int grp = esc-'0';
117 |             if (rms[grp].rm_so==-1 || rms[grp].rm_eo==-1) continue;
118 | 
119 |             if (i) {
120 |                 /* print partial replacement */
121 |                 pc_replcmt[i] = 0;
122 |                 cdbgprintf("%s", pc_replcmt);
123 |             }
124 | 
125 |             /* print group */
126 |             char *pc_grp_start = &pc_str[rms[grp].rm_so];
127 |             char *pc_grp_end = &pc_str[rms[grp].rm_eo];
128 | 
129 |             char c = *pc_grp_end;
130 |             *pc_grp_end = 0;
131 |             cdbgprintf("%s", pc_grp_start);
132 |             *pc_grp_end = c;
133 | 
134 |             /* continue loop from the place after the group mark */
135 |             pc_replcmt = &pc_replcmt[i+2];
136 |             i=(size_t)-1;
137 |         }
138 | 
139 |         /* print last part of the replacement */
140 |         if (i) cdbgprintf("%s", pc_replcmt);
141 |     }
142 | 
143 |     regfree(&re);
144 | finish:
145 |     if (pc_prnm) {
146 |         /* if required set the result in pseudo-reg */
147 |         if ((get_client()->QueryInterface(
148 |             __uuidof(IDebugRegisters2), (void **)&DebugRegisters))==S_OK)
149 |         {
150 |             DEBUG_VALUE mtch_val;
151 |             mtch_val.Type = DEBUG_VALUE_INT32;
152 |             mtch_val.I32 = (ULONG)(b_match ? 1 : 0);
153 | 
154 |             ULONG pr_i;
155 |             if (DebugRegisters->GetPseudoIndexByName(pc_prnm, &pr_i)==S_OK) {
156 |                 DebugRegisters->SetPseudoValues(
157 |                     DEBUG_REGSRC_DEBUGGEE, 1, NULL, pr_i, &mtch_val);
158 |             }
159 |         }
160 |     }
161 |     if (pc_ebuf) free(pc_ebuf);
162 |     if (DebugRegisters) DebugRegisters->Release();
163 |     if (DebugControl) DebugControl->Release();
164 |     return;
165 | }
166 | 


--------------------------------------------------------------------------------
/string.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Copyright (c) 2015 Piotr Stolarz
 3 |    scriptext: Various scripting utilities WinDbg extension
 4 | 
 5 |    Distributed under the GNU General Public License (the License)
 6 |    see accompanying file LICENSE for details.
 7 | 
 8 |    This software is distributed WITHOUT ANY WARRANTY; without even the
 9 |    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10 |    See the License for more information.
11 |  */
12 | 
13 | #ifndef __SCRIPTEXT_STRING_H__
14 | #define __SCRIPTEXT_STRING_H__
15 | 
16 | /* Extract and modify a pattern from a string. 'pc_in' points to an input in
17 |    the form of: string | '['alias']' D pattern D replacement, where D is a
18 |    delimiter char. If 'pc_prnm' is not NULL the match result is set under the
19 |    indicated pseudo-reg. Returns TRUE if success and write the result to the
20 |    client's output.
21 |  */
22 | void str_extr(const char *pc_in, const char *pc_prnm);
23 | 
24 | #endif /* __SCRIPTEXT_STRING_H__ */
25 | 


--------------------------------------------------------------------------------