├── .gitignore
├── CHANGES
├── CMakeLists.txt
├── LICENSE
├── README.md
├── docs
    ├── Manual.txt
    ├── hack.txt
    └── regex-module.txt
└── src
    ├── BUILDING
    ├── CHANGES
    ├── COPYING
    ├── Manual.txt
    ├── MiniSCHEMETribute.txt
    ├── api.c
    ├── config.h
    ├── dynload.c
    ├── dynload.h
    ├── hack.txt
    ├── init.scm
    ├── main.c
    ├── opdefines.h
    ├── re
        ├── COPYRIGHT
        ├── Makefile.in
        ├── README
        ├── README.1st
        ├── WHATSNEW
        ├── cclass.h
        ├── cname.h
        ├── debug.c
        ├── debug.ih
        ├── engine.c
        ├── engine.ih
        ├── main.c
        ├── main.ih
        ├── mkh
        ├── re.c
        ├── re.makefile
        ├── re.scm
        ├── regcomp.c
        ├── regcomp.ih
        ├── regerror.c
        ├── regerror.ih
        ├── regex.001
        ├── regex.3
        ├── regex.7
        ├── regex.h
        ├── regex2.h
        ├── regexec.c
        ├── regfree.c
        ├── split.c
        ├── tests
        └── utils.h
    ├── scheme-private.h
    ├── scheme.c
    └── scheme.h


/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/windows,linux,cmake,c++,c
  3 | 
  4 | ### Windows ###
  5 | # Windows image file caches
  6 | Thumbs.db
  7 | ehthumbs.db
  8 | 
  9 | # Folder config file
 10 | Desktop.ini
 11 | 
 12 | # Recycle Bin used on file shares
 13 | $RECYCLE.BIN/
 14 | 
 15 | # Windows Installer files
 16 | *.cab
 17 | *.msi
 18 | *.msm
 19 | *.msp
 20 | 
 21 | # Windows shortcuts
 22 | *.lnk
 23 | 
 24 | 
 25 | ### Linux ###
 26 | *~
 27 | 
 28 | # temporary files which can be created if a process still has a handle open of a deleted file
 29 | .fuse_hidden*
 30 | 
 31 | # KDE directory preferences
 32 | .directory
 33 | 
 34 | # Linux trash folder which might appear on any partition or disk
 35 | .Trash-*
 36 | 
 37 | 
 38 | ### CMake ###
 39 | CMakeCache.txt
 40 | CMakeFiles
 41 | CMakeScripts
 42 | Makefile
 43 | cmake_install.cmake
 44 | install_manifest.txt
 45 | 
 46 | 
 47 | ### C++ ###
 48 | # Compiled Object files
 49 | *.slo
 50 | *.lo
 51 | *.o
 52 | *.obj
 53 | 
 54 | # Precompiled Headers
 55 | *.gch
 56 | *.pch
 57 | 
 58 | # Compiled Dynamic libraries
 59 | *.so
 60 | *.dylib
 61 | *.dll
 62 | 
 63 | # Fortran module files
 64 | *.mod
 65 | 
 66 | # Compiled Static libraries
 67 | *.lai
 68 | *.la
 69 | *.a
 70 | *.lib
 71 | 
 72 | # Executables
 73 | *.exe
 74 | *.out
 75 | *.app
 76 | 
 77 | 
 78 | ### C ###
 79 | # Object files
 80 | *.o
 81 | *.ko
 82 | *.obj
 83 | *.elf
 84 | 
 85 | # Precompiled Headers
 86 | *.gch
 87 | *.pch
 88 | 
 89 | # Libraries
 90 | *.lib
 91 | *.a
 92 | *.la
 93 | *.lo
 94 | 
 95 | # Shared objects (inc. Windows DLLs)
 96 | *.dll
 97 | *.so
 98 | *.so.*
 99 | *.dylib
100 | 
101 | # Executables
102 | *.exe
103 | *.out
104 | *.app
105 | *.i*86
106 | *.x86_64
107 | *.hex
108 | 
109 | # Debug files
110 | *.dSYM/
111 | 
112 | # -------------------------------------------------------
113 | # Project specific ignores
114 | 
115 | # out-of-source build directories
116 | build-*/
117 | 


--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
  1 | Change Log
  2 | ----------
  3 | 
  4 | Version 1.50
  5 |     - extended the public API and properly namespaced everything
  6 |     - separated the main tinyscheme shell into a separate file
  7 |     - made source compatible with Visual C++ (and consequently C89)
  8 |     - added regex module and updated it to use the new API
  9 | 
 10 | Version 1.41
 11 |     Bugs fixed:
 12 |         #3020389 - Added makefile section for Mac OS X  (SL)
 13 |         #3286135 - Fixed num_mod routine which caused errors in use of modulo
 14 |         #3290232 - Corrected version number shown on startup  (GM)
 15 |         #3394882 - Added missing #if in opdefines.h around get and put  (DC)
 16 |         #3395547 - Fix for the modulo procedure  (DC)
 17 |         #3400290 - Optimized append to make it an O(n) operation  (DC)
 18 |         #3493926 - Corrected flag used when building shared files on OSX (J)
 19 | 
 20 |     R5RS related changes:
 21 |         #2866196 - Parser does not handle delimiters correctly
 22 |         #3395548 - Add a decimal point to inexact numbers in atom2str  (DC)
 23 |         #3399331 - Make min/max return inexact when any argument is inexact
 24 |         #3399332 - Compatability fix for expt.
 25 |         #3399335 - Optional radix for string->number and number->string  (DC)
 26 |         #3400202 - Append with one argument should not return a list  (DC)
 27 |         #3400284 - Compatability fix for integer?
 28 | 
 29 |     Other changes:
 30 |         - Added flags to makefile for MinGW/MSYS (TC)
 31 |         - Moved variable declarations to avoid warnings with some compilers
 32 |         - Don't print space after initial #( when printing vectors.
 33 |         - Minor optimization for is_nonneg().
 34 |         - No need to round integers in OP_ROUND (#3400284)
 35 |         - Fixes to code that reports line number with error  (RC)
 36 | 
 37 |     Contributors:
 38 |         Kevin Cozens, Gordon McNutt, Doug Currie, Sean Long, Tim Cas, Joey,
 39 |         Richard Copley, and CMarinier.
 40 | 
 41 | Version 1.40
 42 |     Bugs fixed:
 43 |         #1964950 - Stop core dumps due to bad syntax in LET (and variants)
 44 |         #2826594 - allow reverse to work on empty list (Tony Garnock-Jones)
 45 |         Potential problem of arglist to foreign calls being wrongly GC'ed.
 46 |         Fixed bug that read could loop forever (tehom).
 47 | 
 48 |     API changes:
 49 |         Exposed is_list and list_length.
 50 |         Added scheme_register_foreign_func_list and declarations for it (tehom)
 51 |         Defined *compile-hook* (tehom)
 52 | 
 53 |     Other changes:
 54 |         Updated is_list and list_length to handle circular lists.
 55 |         Nested calling thru C has been made now safer (tehom)
 56 |         Peter Michaux cleaned up port_rep_from_file
 57 |         Added unwind-protect (tehom)
 58 |         Some cleanups to in/outport and Eval_Cycle by Peter Michaux
 59 |         Report error line number (Mostly by Sanel Zukan, back-compatibility by Tehom)
 60 | 
 61 |     Contributors:
 62 |         Kevin Cozens, Dimitrios Souflis, Tom Breton, Peter Michaux, Sanel Zukan,
 63 |         and Tony Garnock-Jones.
 64 | 
 65 | Version 1.39
 66 |     Bugs fixed:
 67 |         Fix for the load bug
 68 |         Fixed parsing of octal coded characters. Fixes bug #1818018.
 69 |         Added tests for when mk_vector is out of memory. Can't rely on sc->sink.
 70 |         Fix for bug #1794369
 71 |         Finished feature-request 1599947: scheme_apply0 etc return values.
 72 |         Partly provided feature-request 1599947: Expose list_length, eqv, etc
 73 |         Provided feature-request 1599945, Scheme->C->Scheme calling.
 74 |         Fix for bug 1593861 (behavior of is_integer)
 75 |         Fix for bug 1589711
 76 |         Error checking of binding spec syntax in LET and LETREC. The bad syntax
 77 |         was causing a segmentation fault in Linux. Complete fixes for bug #1817986.
 78 |         Error checking of binding spec syntax in LET*
 79 |         Bad syntax was causing core dump in Linux.
 80 |         Fix for nasty gc bug
 81 | 
 82 |     R5RS changes:
 83 |         R5RS requires numbers to be of equal value AND of the same type (ie. both
 84 |         exact or inexact) in order to return #t from eqv?. R5RS compliance fix.
 85 |         String output ports now conform to SRFI-6
 86 | 
 87 |     Other changes:
 88 |         Drew Yao fixed buffer overflow problems in mk_sharp_const.
 89 |         put OP_T0LVL in charge of reacting to EOF
 90 |         file_push checks array bounds (patch from Ray Lehtiniemi)
 91 |         Changed to always use snprintf (Patch due to Ramiro bsd1628)
 92 |         Updated usage information using text from the Manual.txt file.
 93 | 
 94 | Version 1.38
 95 |     Interim release until the rewrite, mostly incorporating modifications
 96 |     from Kevin Cozens. Small addition for Cygwin in the makefile, and
 97 |     modifications by Andrew Guenther for Apple platforms.
 98 | 
 99 | Version 1.37
100 |     Joe Buehler submitted reserve_cells.
101 | 
102 | Version 1.36
103 |     Joe Buehler fixed a patch in the allocator.
104 |     Alexander Shendi moved the comment handling in the scanner, which
105 |     fixed an obscure bug for which Mike E had provided a patch as well.
106 |     Kevin Cozens has submitted some fixes and modifications which have
107 |     not been incorporated yet in their entirety.
108 | 
109 | Version 1.35
110 |     Todd Showalter discovered that the number of free cells reported
111 |     after GC was incorrect, which could also cause unnecessary allocations.
112 | 
113 | Version 1.34
114 |     Long missing version. Lots of bugfixes have accumulated in my email, so
115 |     I had to start using them. In this version, Keenan Pepper has submitted
116 |     a bugfix for the string comparison library procedure, Wouter Boeke
117 |     modified some code that was casting to the wrong type and crashed on
118 |     some machines, "SheppardCo" submitted a replacement "modulo" code and
119 |     Scott Fenton submitted lots of corrections that shut up some compiler
120 |     warnings. Brian Maher submitted instructions on how to build on OS-X.
121 |     I have to dig deeper into my mailbox and find earlier emails, too.
122 | 
123 | Version 1.33
124 |     Charles Hayden fixed a nasty GC bug of the new stack frame, while in
125 |     the process of porting TinyScheme to C++. He also submitted other
126 |     changes, and other people also had comments or requests, but the GC
127 |     bug was so important that this version is put through the door to
128 |     correct it.
129 | 
130 | Version 1.32
131 |     Stephen Gildea put some quality time on TinyScheme again, and made
132 |     a whole lot of changes to the interpreter that made it noticeably
133 |     faster.
134 | 
135 | Version 1.31
136 |     Patches to the hastily-done version 1.30. Stephen Gildea fixed
137 |     some things done wrongly, and Richard Russo fixed the makefile
138 |     for building on Windows. Property lists (heritage from MiniScheme)
139 |     are now optional and have dissappeared from the interface. They
140 |     should be considered as deprecated.
141 | 
142 | Version 1.30
143 |     After many months, I followed Preston Bannister's advice of
144 |     using macros and a single source text to keep the enums and the
145 |     dispatch table in sync, and I used his contributed "opdefines.h".
146 |     Timothy Downs contributed a helpful function, "scheme_call".
147 |     Stephen Gildea contributed new versions of the makefile and
148 |     practically all other sources. He created a built-in STRING-APPEND,
149 |     and fixed a lot of other bugs.
150 |     Ruhi Bloodworth reported fixes necessary for OS X and a small
151 |     bug in dynload.c.
152 | 
153 | Version 1.29
154 |     The previous version contained a lot of corrections, but there
155 |     were a lot more that still wait on a sheet of paper lost in a
156 |     carton someplace after my house move... Manuel Heras-Gilsanz
157 |     noticed this and resent his own contribution, which relies on
158 |     another bugfix that v.1.28 was missing: a problem with string
159 |     output, that this version fixes. I hope other people will take
160 |     the time to resend their contributions, if they didn't make it
161 |     to v.1.28.
162 | 
163 | Version 1.28
164 |     Many people have contacted me with bugfixes or remarks in
165 |     the three months I was inactive. A lot of them spotted that
166 |     scheme_deinit crashed while reporting gc results. They suggested
167 |     that sc->outport be set to NIL in scheme_deinit, which I did.
168 |     Dennis Taylor remarked that OP_VALUEPRINT reset sc->value instead
169 |     of preserving it. He submitted a modification which I adopted
170 |     partially. David Hovemeyer sent me many little changes, that you
171 |     will find in version 1.28, and Partice Stoessel modified the
172 |     float reader to conform to R5RS.
173 | 
174 | Version 1.27
175 |     Version 1.27 is the successor of 1.25. Bug fixes only, but I had to
176 |     release them so that everybody can profit. 'Backchar' tried to write
177 |     back to the string, which obviously didn't work for const strings.
178 |     'Substring' didn't check for crossed start and end indices. Defines
179 |     changed to restore the ability to compile under MSVC.
180 | 
181 | Version 1.26
182 |     Version 1.26 was never released. I changed a lot of things, in fact
183 |     too much, even the garbage collector, and hell broke loose. I'll
184 |     try a more gradual approach next time.
185 | 
186 | Version 1.25
187 |     Types have been homogenized to be able to accommodate a different
188 |     representation. Plus, promises are no longer closures. Unfortunately,
189 |     I discovered that continuations and force/delay do not pass the SCM
190 |     test (and never did)... However, on the bright side, what little
191 |     modifications I did had a large impact on the footprint:
192 |     USE_NO_FEATURES now produces an object file of 63960 bytes on Linux!
193 | 
194 | Version 1.24
195 |     SCM tests now pass again after change in atom2str.
196 | 
197 | Version 1.23
198 |     Finally I managed to mess it up with my version control. Version
199 |     1.22 actually lacked some of the things I have been fixing in the
200 |     meantime. This should be considered as a complete replacement for
201 |     1.22.
202 | 
203 | Version 1.22
204 |     The new ports had a bug in LOAD. MK_CLOSURE is introduced.
205 |     Shawn Wagner inquired about string->number and number->string.
206 |     I added string->atom and atom->string and defined the number
207 |     functions from them. Doing that, I fixed WRITE applied to symbols
208 |     (it didn't quote them). Unfortunately, minimum build is now
209 |     slightly larger than 64k... I postpone action because Jason's idea
210 |     might solve it elegantly.
211 | 
212 | Version 1.21
213 |     Jason Felice submitted a radically different datatype representation
214 |     which he had implemented. While discussing its pros and cons, it
215 |     became apparent that the current implementation of ports suffered
216 |     from a grave fault: ports were not garbage-collected. I changed the
217 |     ports to be heap-allocated, which enabled the use of string ports
218 |     for loading. Jason also fixed errors in the garbage collection of
219 |     vectors. USE_VERBATIM is gone. "ssp_compiler.c" has a better solution
220 |     on HTML generation. A bug involving backslash notation in strings
221 |     has been fixed. '-c' flag now executes next argument as a stream of
222 |     Scheme commands. Foreign functions are now also heap allocated,
223 |     and scheme_define is used to define everything.
224 | 
225 | Version 1.20
226 |     Tracing has been added. The toplevel loop has been slightly
227 |     rearranged. Backquote reading for vector templates has been
228 |     sanitized. Symbol interning is now correct. Arithmetic functions
229 |     have been corrected. APPLY, MAP, FOR-EACH, numeric comparison
230 |     functions fixed. String reader/writer understands \xAA notation.
231 | 
232 | Version 1.19
233 |     Carriage Return now delimits identifiers. DOS-formatted Scheme files
234 |     can be used by Unix. Random number generator added to library.
235 |     Fixed some glitches of the new type-checking scheme. Fixed erroneous
236 |     (append '() 'a) behavior. Will continue with r4rstest.scm to
237 |     fix errors.
238 | 
239 | Version 1.18
240 |     The FFI has been extended. USE_VERBOSE_GC has gone. Anyone wanting
241 |     the same functionality can put (gcverbose #t) in init.scm.
242 |     print-width was removed, along with three corresponding op-codes.
243 |     Extended character constants with ASCII names were added.
244 |     mk_counted_string paves the way for full support of binary strings.
245 |     As much as possible of the type-checking chores were delegated
246 |     to the inner loop, thus reducing the code size to less than 4200 loc!
247 | 
248 | Version 1.17
249 |     Dynamically-loaded extensions are more fully integrated.
250 |     TinyScheme is now distributed under the BSD open-source license.
251 | 
252 | Version 1.16
253 |     Dynamically-loaded extensions introduced (USE_DL).
254 |     Santeri Paavolainen found a race condition: When a cons is executed,
255 |     and each of the two arguments is a constructing function,  GC could
256 |     happen before all arguments are evaluated and cons() is called, and
257 |     the evaluated arguments would all be reclaimed!
258 |     Fortunately, such a case was rare in the code, although it is
259 |     a pitfall in new code and code in foreign functions. Currently, only
260 |     one such case remains, when COLON_HOOK is defined.
261 | 
262 | Version 1.15
263 |     David Gould also contributed some changes that speed up operation.
264 |     Kirk Zurell fixed HASPROP.
265 |     The Garbage Collection didn't collect all the garbage...fixed.
266 | 
267 | Version 1.14
268 |     Unfortunately, after Andre fixed the GC it became obvious that the
269 |     algorithm was too slow... Fortunately, David Gould found a way to
270 |     speed it up.
271 | 
272 | Version 1.13
273 |     Silly bug involving division by zero resolved by Roland Kaufman.
274 |     Macintoch support from Shmulik Regev.
275 |     Float parser bug fixed by Alexander Shendi.
276 |     GC bug from Andru Luvisi.
277 | 
278 | Version 1.12
279 |     Cis* incorrectly called isalpha() instead of isascii()
280 |     Added USE_CHAR_CLASSIFIERS, USE_STRING_PORTS.
281 | 
282 | Version 1.11
283 |     BSDI defines isnumber... changed all similar functions to is_*
284 |     EXPT now has correct definition. Added FLOOR,CEILING,TRUNCATE
285 |     and ROUND, courtesy of Bengt Kleberg. Preprocessor symbols now
286 |     have values 1 or 0, and can be set as compiler defines (proposed
287 |     by Andy Ganor *months* ago). 'prompt' and 'InitFile' can now be
288 |     defined during compilation, too.
289 | 
290 | Version 1.10
291 |     Another bug when file ends with comment!
292 |     Added DEFINE-MACRO in init.scm, courtesy of Andy Gaynor.
293 | 
294 | Version 1.09
295 |     Removed bug when READ met EOF. lcm.
296 | 
297 | Version 1.08
298 |     quotient,remainder and modulo. gcd.
299 | 
300 | Version 1.07
301 |     '=>' in cond now exists
302 |     list? now checks for circularity
303 |     some reader bugs removed
304 |     Reader is more consistent wrt vectors
305 |     Quote and Quasiquote work with vectors
306 | 
307 | Version 1.06
308 |     #! is now skipped
309 |     generic-assoc bug removed
310 |     strings are now managed differently, hack.txt is removed
311 |     various delicate points fixed
312 | 
313 | Version 1.05
314 |     Support for scripts, *args*, "-1" option.
315 |     Various R5RS procedures.
316 |     *sharp-hook*
317 |     Handles unmatched parentheses.
318 |     New architecture for procedures.
319 | 
320 | Version 1.04
321 |     Added missing T_ATOM bits...
322 |     Added vectors
323 |     Free-list is sorted by address, since vectors need consecutive cells.
324 |     (quit <exitcode>) for use with scripts
325 | 
326 | Version 1.03 (26 Aug 1998):
327 |     Extended .h with useful functions for FFI
328 |     Library: with-input-* etc.
329 |     Finished R5RS I/O, added string ports.
330 | 
331 | Version 1.02 (25 Aug 1998):
332 |     First part of R5RS I/O.
333 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (tinyscheme)
 3 | 
 4 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 5 | # set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 6 | # set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 7 | 
 8 | # remove 'lib' prefix for dlls 
 9 | if(WIN32 AND CMAKE_COMPILER_IS_GNUCXX)
10 | 	set(CMAKE_SHARED_LIBRARY_PREFIX "")
11 | endif()
12 | 
13 | # ----------------------------------------------------------------------------
14 | 
15 | set (TINYSCHEME_DIR "src")
16 | set (RE_MODULE_DIR "src/re")
17 | 
18 | # ----------------------------------------------------------------------------
19 | 
20 | set (TINYSCHEME_SRCS
21 | 	${TINYSCHEME_DIR}/scheme.c
22 | 	${TINYSCHEME_DIR}/api.c
23 | 	${TINYSCHEME_DIR}/dynload.c
24 | )
25 | 
26 | set(TINYSCHEME_APP_SRCS
27 | 	${TINYSCHEME_DIR}/main.c
28 | )
29 | 
30 | if (WIN32)
31 | 
32 | 	set(TINYSCHEME_DEFS
33 | 		USE_STRLWR=0 USE_DL=1 USE_MATH=1 USE_ASCII_NAMES=0
34 | 	)
35 | 
36 | if (MSVC)
37 | 	set(TINYSCHEME_DEFS ${TINYSCHEME_DEFS} _CRT_SECURE_NO_WARNINGS _CRT_NONSTDC_NO_WARNINGS)
38 | endif(MSVC)
39 | 
40 | else(WIN32)
41 | 	set(TINYSCHEME_DEFS
42 | 		SUN_DL=1 USE_DL=1 USE_MATH=1 USE_ASCII_NAMES=0
43 | 	)
44 | endif(WIN32)
45 | 
46 | # ----------------------------------------------------------------------------
47 | 
48 | add_library (tinyscheme SHARED ${TINYSCHEME_SRCS})
49 | target_compile_definitions(tinyscheme PUBLIC ${TINYSCHEME_DEFS})
50 | 
51 | add_executable(tinyscheme-app ${TINYSCHEME_APP_SRCS})
52 | target_link_libraries(tinyscheme-app tinyscheme)
53 | target_compile_definitions(tinyscheme-app PUBLIC ${TINYSCHEME_DEFS})
54 | set_target_properties (tinyscheme-app PROPERTIES OUTPUT_NAME tinyscheme)
55 | 
56 | # ----------------------------------------------------------------------------
57 | 
58 | set(RE_MODULE_SRCS
59 | 	${RE_MODULE_DIR}/re.c 
60 | 	${RE_MODULE_DIR}/debug.c 
61 | 	${RE_MODULE_DIR}/regcomp.c 
62 | 	${RE_MODULE_DIR}/regerror.c 
63 | 	${RE_MODULE_DIR}/regexec.c 
64 | 	${RE_MODULE_DIR}/regfree.c 
65 | 	${RE_MODULE_DIR}/split.c
66 | )
67 | 
68 | add_library(tinyscheme-re SHARED ${RE_MODULE_SRCS})
69 | target_link_libraries(tinyscheme-re tinyscheme)
70 | target_include_directories (tinyscheme-re PUBLIC ${TINYSCHEME_DIR} PUBLIC ${RE_MODULE_DIR})
71 | target_compile_definitions(tinyscheme-re PUBLIC ${TINYSCHEME_DEFS})
72 | set_target_properties (tinyscheme-re PROPERTIES OUTPUT_NAME re)
73 | 
74 | # ----------------------------------------------------------------------------
75 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |                          LICENSE TERMS
 2 | 
 3 | Copyright (c) 2000, Dimitrios Souflis
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are
 8 | met:
 9 | 
10 | Redistributions of source code must retain the above copyright notice,
11 | this list of conditions and the following disclaimer.
12 | 
13 | Redistributions in binary form must reproduce the above copyright
14 | notice, this list of conditions and the following disclaimer in the
15 | documentation and/or other materials provided with the distribution.
16 | 
17 | Neither the name of Dimitrios Souflis nor the names of the
18 | contributors may be used to endorse or promote products derived from
19 | this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TinyScheme
 2 | 
 3 | ## Notes by armornick
 4 | 
 5 | This is a sort-of continuation of the TinyScheme project, which seems to have been abandoned.
 6 | 
 7 | It started with just trying to get it to compile via CMake before deciding to separate the shell into its own file, and things escalated from there.
 8 | 
 9 | **About support**: while everyone is free to report any issues, this is a only a minor hobby project for me. That means there's about a 50/50 chance that I'll do something about it. The TinyScheme source code seems to be fairly understandable, however, so an intermediate programmer should be able to make changes without too much difficulty.
10 | 
11 | As a final note, I don't need any personal credits since I haven't actually written the original project source. Be sure to properly credit the original author, though.
12 | 
13 | ## Description by original author
14 | 
15 | TinyScheme is a lightweight Scheme interpreter that implements as large a subset of R5RS as was possible without getting very large and complicated. It is meant to be used as an embedded scripting interpreter for other programs. As such, it does not offer IDEs or extensive toolkits although it does sport a small top-level loop, included conditionally. A lot of functionality in TinyScheme is included conditionally, to allow developers freedom in balancing features and footprint.
16 | 
17 | As an embedded interpreter, it allows multiple interpreter states to coexist in the same program, without any interference between them. Programmatically, foreign functions in C can be added and values can be defined in the Scheme environment. Being a quite small program, it is easy to comprehend, get to grips with, and use. TinyScheme was grown out of the MiniScheme distribution during the development of Ovrimos.
18 | 
19 | The TinyScheme project has slowly acquired foothold in many open-source projects over the years, notably GIMP. It is evident that it is in need of constant attention, since any bug potentially affects a lot of installations. I am very grateful to the people that have helped with TinyScheme all this time, and I hope they will continue to do so in the future.
20 | 
21 | There is an issue that is better put forth, rather than stashed under the carpet. The same qualities that have made TinyScheme suitable for many embedded or small-footprint jobs, have earned it an unwanted place in less desirable software. I hope it is clear that TinyScheme itself is domain-agnostic and I, in particular, am not a supporter of malware or adware.
22 | 
23 | ## Licensing
24 | 
25 | This software is open source, covered by a BSD-style license. See the LICENSE file in this directory for the full license terms.


--------------------------------------------------------------------------------
/docs/hack.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                               How to hack TinyScheme
  3 |                               ----------------------
  4 | 
  5 |      TinyScheme is easy to learn and modify. It is structured like a
  6 |      meta-interpreter, only it is written in C. All data are Scheme
  7 |      objects, which facilitates both understanding/modifying the
  8 |      code and reifying the interpreter workings.
  9 | 
 10 |      In place of a dry description, we will pace through the addition
 11 |      of a useful new datatype: garbage-collected memory blocks.
 12 |      The interface will be:
 13 | 
 14 |           (make-block <n> [<fill>]) makes a new block of the specified size
 15 |                optionally filling it with a specified byte
 16 |           (block? <obj>)
 17 |           (block-length <block>)
 18 |           (block-ref <block> <index>) retrieves byte at location
 19 |           (block-set! <block> <index> <byte>) modifies byte at location
 20 |      
 21 |      In the sequel, lines that begin with '>' denote lines to add to the
 22 |      code. Lines that begin with '|' are just citations of existing code.
 23 |      Lines that begin with X denote lines to be removed from the code.
 24 | 
 25 |      First of all, we need to assign a typeid to our new type. Typeids
 26 |      in TinyScheme are small integers declared in the scheme_types enum
 27 |      located near the top of the scheme.c file; it begins with T_STRING.
 28 |      Add a new entry at the end, say T_MEMBLOCK. Remember to adjust the
 29 |      value of T_LAST_SYTEM_TYPE when adding new entries. There can be at
 30 |      most 31 types, but you don't have to worry about that limit yet.
 31 | 
 32 | |       T_ENVIRONMENT=14,
 33 | X       T_LAST_SYSTEM_TYPE=14
 34 | >       T_MEMBLOCK=15,
 35 | >       T_LAST_SYSTEM_TYPE=15
 36 | |     };
 37 | 
 38 | 
 39 |      Then, some helper macros would be useful. Go to where is_string()
 40 |      and the rest are defined and add:
 41 | 
 42 | >     INTERFACE INLINE int is_memblock(pointer p)     { return (type(p)==T_MEMBLOCK); }
 43 | 
 44 |      This actually is a function, because it is meant to be exported by
 45 |      scheme.h. If no foreign function will ever manipulate a memory block,
 46 |      you can instead define it as a macro:
 47 | 
 48 | >     #define is_memblock(p) (type(p)==T_MEMBLOCK)
 49 | 
 50 |      Then we make space for the new type in the main data structure:
 51 |      struct cell. As it happens, the _string part of the union _object
 52 |      (that is used to hold character strings) has two fields that suit us:
 53 | 
 54 | |         struct {
 55 | |              char   *_svalue;
 56 | |              int   _keynum;
 57 | |         } _string;
 58 | 
 59 |      We can use _svalue to hold the actual pointer and _keynum to hold its
 60 |      length. If we couln't reuse existing fields, we could always add other
 61 |      alternatives in union _object.
 62 | 
 63 |      We then proceed to write the function that actually makes a new block.
 64 |      For conformance reasons, we name it mk_memblock
 65 | 
 66 | >     static pointer mk_memblock(scheme *sc, int len, char fill) {
 67 | >          pointer x;
 68 | >          char *p=(char*)sc->malloc(len);
 69 | >
 70 | >          if(p==0) {
 71 | >               return sc->NIL;
 72 | >          }
 73 | >          x = get_cell(sc, sc->NIL, sc->NIL);
 74 | >
 75 | >          typeflag(x) = T_MEMBLOCK|T_ATOM;
 76 | >          strvalue(x)=p;
 77 | >          keynum(x)=len;
 78 | >          memset(p,fill,len);
 79 | >          return (x);
 80 | >     }
 81 | 
 82 |      The memory used by the MEMBLOCK will have to be freed when the cell
 83 |      is reclaimed during garbage collection. There is a placeholder for
 84 |      that staff, function finalize_cell(), currently handling strings only.
 85 | 
 86 | |     static void finalize_cell(scheme *sc, pointer a) {
 87 | |       if(is_string(a)) {
 88 | |          sc->free(strvalue(a));
 89 | >       } else if(is_memblock(a)) {
 90 | >          sc->free(strvalue(a));
 91 | |       } else if(is_port(a)) {
 92 | 
 93 |      There are no MEMBLOCK literals, so we don't concern ourselves with
 94 |      the READER part (yet!). We must cater to the PRINTER, though. We
 95 |      add one case more in atom2str().
 96 | 
 97 | |     } else if (iscontinuation(l)) {
 98 | |          p = "#<CONTINUATION>";
 99 | >     } else if (is_memblock(l)) {
100 | >          p = "#<MEMORY BLOCK>";
101 | |     } else {
102 | 
103 |      Whenever a MEMBLOCK is displayed, it will look like that.
104 |      Now, we must add the interface functions: constructor, predicate,
105 |      accessor, modifier. We must in fact create new op-codes for the virtual
106 |      machine underlying TinyScheme. Since version 1.30, TinyScheme uses
107 |      macros and a single source text to keep the enums and the dispatch table
108 |      in sync. The op-codes are defined in the opdefines.h file with one line
109 |      for each op-code. The lines in the file have six columns between the
110 |      starting _OPDEF( and ending ): A, B, C, D, E, and OP.
111 |      Note that this file uses unusually long lines to accomodate all the
112 |      information; adjust your editor to handle this.
113 | 
114 |      The purpose of the columns is:
115 |        - Column A is the name of the subroutine that handles the op-code.
116 |        - Column B is the name of the op-code function.
117 |        - Columns C and D are the minimum and maximum number of arguments
118 |          that are accepted by the op-code.
119 |        - Column E is a set of flags that tells the interpreter the type of
120 |          each of the arguments expected by the op-code.
121 |        - Column OP is used in the scheme_opcodes enum located in the
122 |          scheme-private.h file.
123 | 
124 |      Op-codes are really just tags for a huge C switch, only this switch
125 |      is broken up in to a number of different opexe_X functions. The
126 |      correspondence is made in table "dispatch_table". There, we assign
127 |      the new op-codes to opexe_2, where the equivalent ones for vectors
128 |      are situated. We also assign a name for them, and specify the minimum
129 |      and maximum arity (number of expected arguments). INF_ARG as a maximum
130 |      arity means "unlimited".
131 | 
132 |      For reasons of consistency, we add the new op-codes right after those
133 |      for vectors:
134 | 
135 | |     _OP_DEF(opexe_2, "vector-set!",                    3,  3,       TST_VECTOR TST_NATURAL TST_ANY,  OP_VECSET           )
136 | >     _OP_DEF(opexe_2, "make-block",                     1,  2,       TST_NATURAL TST_CHAR,            OP_MKBLOCK          )
137 | >     _OP_DEF(opexe_2, "block-length",                   1,  1,       T_MEMBLOCK,                      OP_BLOCKLEN         )
138 | >     _OP_DEF(opexe_2, "block-ref",                      2,  2,       T_MEMBLOCK TST_NATURAL,          OP_BLOCKREF         )
139 | >     _OP_DEF(opexe_2, "block-set!",                     1,  1,       T_MEMBLOCK TST_NATURAL TST_CHAR, OP_BLOCKSET         )
140 | |     _OP_DEF(opexe_3, "not",                            1,  1,       TST_NONE,                        OP_NOT              )
141 | 
142 |      We add the predicate along with the other predicates in opexe_3:
143 | 
144 | |     _OP_DEF(opexe_3, "vector?",                        1,  1,       TST_ANY,                         OP_VECTORP          )
145 | >     _OP_DEF(opexe_3, "block?",                         1,  1,       TST_ANY,                         OP_BLOCKP           )
146 | |     _OP_DEF(opexe_3, "eq?",                            2,  2,       TST_ANY,                         OP_EQ               )
147 | 
148 |      All that remains is to write the actual code to do the processing and
149 |      add it to the switch statement in opexe_2, after the OP_VECSET case.
150 | 
151 | >     case OP_MKBLOCK: { /* make-block */
152 | >          int fill=0;
153 | >          int len;
154 | >
155 | >          if(!isnumber(car(sc->args))) {
156 | >               Error_1(sc,"make-block: not a number:",car(sc->args));
157 | >          }
158 | >          len=ivalue(car(sc->args));
159 | >          if(len<=0) {
160 | >               Error_1(sc,"make-block: not positive:",car(sc->args));
161 | >          }
162 | >
163 | >          if(cdr(sc->args)!=sc->NIL) {
164 | >               if(!isnumber(cadr(sc->args)) || ivalue(cadr(sc->args))<0) {
165 | >                    Error_1(sc,"make-block: not a positive number:",cadr(sc->args));
166 | >               }
167 | >               fill=charvalue(cadr(sc->args))%255;
168 | >          }
169 | >          s_return(sc,mk_memblock(sc,len,(char)fill));
170 | >     }
171 | >
172 | >     case OP_BLOCKLEN:  /* block-length */
173 | >          if(!ismemblock(car(sc->args))) {
174 | >               Error_1(sc,"block-length: not a memory block:",car(sc->args));
175 | >          }
176 | >          s_return(sc,mk_integer(sc,keynum(car(sc->args))));
177 | >
178 | >     case OP_BLOCKREF: { /* block-ref */
179 | >          char *str;
180 | >          int index;
181 | >
182 | >          if(!ismemblock(car(sc->args))) {
183 | >               Error_1(sc,"block-ref: not a memory block:",car(sc->args));
184 | >          }
185 | >          str=strvalue(car(sc->args));
186 | >
187 | >          if(cdr(sc->args)==sc->NIL) {
188 | >               Error_0(sc,"block-ref: needs two arguments");
189 | >          }
190 | >          if(!isnumber(cadr(sc->args))) {
191 | >               Error_1(sc,"block-ref: not a number:",cadr(sc->args));
192 | >          }
193 | >          index=ivalue(cadr(sc->args));
194 | >
195 | >          if(index<0 || index>=keynum(car(sc->args))) {
196 | >               Error_1(sc,"block-ref: out of bounds:",cadr(sc->args));
197 | >          }
198 | >
199 | >          s_return(sc,mk_integer(sc,str[index]));
200 | >     }
201 | >
202 | >     case OP_BLOCKSET: { /* block-set! */
203 | >          char *str;
204 | >          int index;
205 | >          int c;
206 | >
207 | >          if(!ismemblock(car(sc->args))) {
208 | >               Error_1(sc,"block-set!: not a memory block:",car(sc->args));
209 | >          }
210 | >          if(isimmutable(car(sc->args))) {
211 | >               Error_1(sc,"block-set!: unable to alter immutable memory block:",car(sc->args));
212 | >          }
213 | >          str=strvalue(car(sc->args));
214 | >
215 | >          if(cdr(sc->args)==sc->NIL) {
216 | >               Error_0(sc,"block-set!: needs three arguments");
217 | >          }
218 | >          if(!isnumber(cadr(sc->args))) {
219 | >               Error_1(sc,"block-set!: not a number:",cadr(sc->args));
220 | >          }
221 | >          index=ivalue(cadr(sc->args));
222 | >          if(index<0 || index>=keynum(car(sc->args))) {
223 | >               Error_1(sc,"block-set!: out of bounds:",cadr(sc->args));
224 | >          }
225 | >
226 | >          if(cddr(sc->args)==sc->NIL) {
227 | >               Error_0(sc,"block-set!: needs three arguments");
228 | >          }
229 | >          if(!isinteger(caddr(sc->args))) {
230 | >               Error_1(sc,"block-set!: not an integer:",caddr(sc->args));
231 | >          }
232 | >          c=ivalue(caddr(sc->args))%255;
233 | >
234 | >          str[index]=(char)c;
235 | >          s_return(sc,car(sc->args));
236 | >     }
237 | 
238 |      Finally, do the same for the predicate in opexe_3.
239 | 
240 | |     case OP_VECTORP:     /* vector? */
241 | |          s_retbool(is_vector(car(sc->args)));
242 | >     case OP_BLOCKP:     /* block? */
243 | >          s_retbool(is_memblock(car(sc->args)));
244 | |     case OP_EQ:         /* eq? */
245 | 


--------------------------------------------------------------------------------
/docs/regex-module.txt:
--------------------------------------------------------------------------------
 1 |              TinyScheme RE (Regular Expressions) extension
 2 |              ---------------------------------------------
 3 | Version 1.2, August 2002
 4 | 
 5 | The bulk of this directory is the regular expression library written
 6 | by Henry Spencer (see file README and COPYRIGHT).
 7 | 
 8 | Two files were added to produce the TinyScheme regular expression
 9 | library, re.so: re.c and re.makefile. The included re.makefile was contributed
10 | initially by Stephen Gildea and should be adaptable to all Unix systems.
11 | 
12 | The makefile produces a DLL named re.so. For now, it contains just
13 | a single foreign function (re-match <pattern> <string>). It returns
14 | true (string matches pattern) or false. If it is called with an
15 | extra parameter, which should be a vector, overwrites as many elements
16 | of the vector as needed with the strings that matched the corresponding
17 | parenthesized subexpressions inside <pattern>.
18 | 
19 | It is not fully tested, so use with caution.
20 | 
21 | Load the extension from inside TinyScheme using
22 | (load-extension "re/re")
23 | assuming that re.so is in the directory "re".
24 | 
25 | Load "re.scm" if you wish to use v.1.1 behavior.
26 |  
27 | dsouflis@acm.org
28 | 


--------------------------------------------------------------------------------
/src/BUILDING:
--------------------------------------------------------------------------------
  1 |         Building TinyScheme
  2 |         -------------------
  3 | 
  4 | The included makefile includes logic for Linux, Solaris and Win32, and can
  5 | readily serve as an example for other OSes, especially Unixes. There are
  6 | a lot of compile-time flags in TinyScheme (preprocessor defines) that can trim
  7 | unwanted features. See next section. 'make all' and 'make clean' function as
  8 | expected.
  9 | 
 10 | Autoconfing TinyScheme was once proposed, but the distribution would not be
 11 | so small anymore. There are few platform dependencies in TinyScheme, and in
 12 | general compiles out of the box.
 13 | 
 14 |      Customizing
 15 |      -----------
 16 | 
 17 |      The following symbols are defined to default values in scheme.h.
 18 |      Use the -D flag of cc to set to either 1 or 0.
 19 | 
 20 |      STANDALONE
 21 |      Define this to produce a standalone interpreter.
 22 | 
 23 |      USE_MATH
 24 |      Includes math routines.
 25 | 
 26 |      USE_CHAR_CLASSIFIERS
 27 |      Includes character classifier procedures.
 28 | 
 29 |      USE_ASCII_NAMES
 30 |      Enable extended character notation based on ASCII names.
 31 | 
 32 |      USE_STRING_PORTS
 33 |      Enables string ports.
 34 | 
 35 |      USE_ERROR_HOOK
 36 |      To force system errors through user-defined error handling.
 37 |      (see "Error handling")
 38 | 
 39 |      USE_TRACING
 40 |      To enable use of TRACING.
 41 | 
 42 |      USE_COLON_HOOK
 43 |      Enable use of qualified identifiers. (see "Colon Qualifiers - Packages")
 44 |      Defining this as 0 has the rather drastic consequence that any code using
 45 |      packages will stop working, and will have to be modified. It should only
 46 |      be used if you *absolutely* need to use '::' in identifiers.
 47 | 
 48 |      USE_STRCASECMP
 49 |      Defines stricmp as strcasecmp, for Unix.
 50 | 
 51 |      STDIO_ADDS_CR
 52 |      Informs TinyScheme that stdio translates "\n" to "\r\n". For DOS/Windows.
 53 | 
 54 |      USE_DL
 55 |      Enables dynamically loaded routines. If you define this symbol, you
 56 |      should also include dynload.c in your compile.
 57 | 
 58 |      USE_PLIST
 59 |      Enables property lists (not Standard Scheme stuff). Off by default.
 60 |      
 61 |      USE_NO_FEATURES
 62 |      Shortcut to disable USE_MATH, USE_CHAR_CLASSIFIERS, USE_ASCII_NAMES,
 63 |      USE_STRING_PORTS, USE_ERROR_HOOK, USE_TRACING, USE_COLON_HOOK,
 64 |      USE_DL.
 65 | 
 66 |      USE_SCHEME_STACK
 67 |      Enables 'cons' stack (the alternative is a faster calling scheme, which 
 68 |      breaks continuations). Undefine it if you don't care about strict compatibility
 69 |      but you do care about faster execution.
 70 | 
 71 | 
 72 |      OS-X tip
 73 |      --------
 74 |      I don't have access to OS-X, but Brian Maher submitted the following tip:
 75 | 
 76 | [1] Download and install fink (I installed fink in
 77 | /usr/local/fink)
 78 | [2] Install the 'dlcompat' package using fink as such:
 79 | > fink install dlcompat
 80 | [3] Make the following changes to the
 81 | tinyscheme-1.32.tar.gz
 82 | 
 83 | diff -r tinyscheme-1.32/dynload.c
 84 | tinyscheme-1.32-new/dynload.c
 85 | 24c24
 86 | < #define SUN_DL
 87 | ---
 88 | > 
 89 | Only in tinyscheme-1.32-new/: dynload.o
 90 | Only in tinyscheme-1.32-new/: libtinyscheme.a Only in tinyscheme-1.32-new/: libtinyscheme.so diff -r tinyscheme-1.32/makefile tinyscheme-1.32-new/makefile
 91 | 33,34c33,43
 92 | < LD = gcc
 93 | < LDFLAGS = -shared
 94 | ---
 95 | > #LD = gcc
 96 | > #LDFLAGS = -shared
 97 | > #DEBUG=-g -Wno-char-subscripts -O
 98 | > #SYS_LIBS= -ldl
 99 | > #PLATFORM_FEATURES= -DSUN_DL=1
100 | > 
101 | > # Mac OS X
102 | > CC = gcc
103 | > CFLAGS = -I/usr/local/fink/include
104 | > LD = gcc
105 | > LDFLAGS = -L/usr/local/fink/lib
106 | 37c46
107 | < PLATFORM_FEATURES= -DSUN_DL=1
108 | ---
109 | > PLATFORM_FEATURES= -DSUN_DL=1 -DOSX
110 | 60c69
111 | <       $(CC) -I. -c $(DEBUG) $(FEATURES)
112 | $(DL_FLAGS) $<
113 | ---
114 | >       $(CC) $(CFLAGS) -I. -c $(DEBUG)
115 | $(FEATURES) $(DL_FLAGS) $<
116 | 66c75
117 | <       $(CC) -o $@ $(DEBUG) $(OBJS) $(SYS_LIBS) 
118 | ---
119 | >       $(CC) $(LDFLAGS) -o $@ $(DEBUG) $(OBJS)
120 | $(SYS_LIBS)
121 | Only in tinyscheme-1.32-new/: scheme
122 | diff -r tinyscheme-1.32/scheme.c
123 | tinyscheme-1.32-new/scheme.c
124 | 60,61c60,61
125 | < #ifndef macintosh
126 | < # include <malloc.h>
127 | ---
128 | > #ifdef OSX
129 | > /* Do nothing */
130 | 62a63,65
131 | > # ifndef macintosh
132 | > #  include <malloc.h>
133 | > # else
134 | 77c80,81
135 | < #endif /* macintosh */
136 | ---
137 | > # endif /* macintosh */
138 | > #endif /* !OSX */
139 | Only in tinyscheme-1.32-new/: scheme.o
140 | 


--------------------------------------------------------------------------------
/src/CHANGES:
--------------------------------------------------------------------------------
  1 | Change Log
  2 | ----------
  3 | 
  4 | Version 1.41
  5 |     Bugs fixed:
  6 |         #3020389 - Added makefile section for Mac OS X  (SL)
  7 |         #3286135 - Fixed num_mod routine which caused errors in use of modulo
  8 |         #3290232 - Corrected version number shown on startup  (GM)
  9 |         #3394882 - Added missing #if in opdefines.h around get and put  (DC)
 10 |         #3395547 - Fix for the modulo procedure  (DC)
 11 |         #3400290 - Optimized append to make it an O(n) operation  (DC)
 12 |         #3493926 - Corrected flag used when building shared files on OSX (J)
 13 | 
 14 |     R5RS related changes:
 15 |         #2866196 - Parser does not handle delimiters correctly
 16 |         #3395548 - Add a decimal point to inexact numbers in atom2str  (DC)
 17 |         #3399331 - Make min/max return inexact when any argument is inexact
 18 |         #3399332 - Compatability fix for expt.
 19 |         #3399335 - Optional radix for string->number and number->string  (DC)
 20 |         #3400202 - Append with one argument should not return a list  (DC)
 21 |         #3400284 - Compatability fix for integer?
 22 | 
 23 |     Other changes:
 24 |         - Added flags to makefile for MinGW/MSYS (TC)
 25 |         - Moved variable declarations to avoid warnings with some compilers
 26 |         - Don't print space after initial #( when printing vectors.
 27 |         - Minor optimization for is_nonneg().
 28 |         - No need to round integers in OP_ROUND (#3400284)
 29 |         - Fixes to code that reports line number with error  (RC)
 30 | 
 31 |     Contributors:
 32 |         Kevin Cozens, Gordon McNutt, Doug Currie, Sean Long, Tim Cas, Joey,
 33 |         Richard Copley, and CMarinier.
 34 | 
 35 | Version 1.40
 36 |     Bugs fixed:
 37 |         #1964950 - Stop core dumps due to bad syntax in LET (and variants)
 38 |         #2826594 - allow reverse to work on empty list (Tony Garnock-Jones)
 39 |         Potential problem of arglist to foreign calls being wrongly GC'ed.
 40 |         Fixed bug that read could loop forever (tehom).
 41 | 
 42 |     API changes:
 43 |         Exposed is_list and list_length.
 44 |         Added scheme_register_foreign_func_list and declarations for it (tehom)
 45 |         Defined *compile-hook* (tehom)
 46 | 
 47 |     Other changes:
 48 |         Updated is_list and list_length to handle circular lists.
 49 |         Nested calling thru C has been made now safer (tehom)
 50 |         Peter Michaux cleaned up port_rep_from_file
 51 |         Added unwind-protect (tehom)
 52 |         Some cleanups to in/outport and Eval_Cycle by Peter Michaux
 53 |         Report error line number (Mostly by Sanel Zukan, back-compatibility by Tehom)
 54 | 
 55 |     Contributors:
 56 |         Kevin Cozens, Dimitrios Souflis, Tom Breton, Peter Michaux, Sanel Zukan,
 57 |         and Tony Garnock-Jones.
 58 | 
 59 | Version 1.39
 60 |     Bugs fixed:
 61 |         Fix for the load bug
 62 |         Fixed parsing of octal coded characters. Fixes bug #1818018.
 63 |         Added tests for when mk_vector is out of memory. Can't rely on sc->sink.
 64 |         Fix for bug #1794369
 65 |         Finished feature-request 1599947: scheme_apply0 etc return values.
 66 |         Partly provided feature-request 1599947: Expose list_length, eqv, etc
 67 |         Provided feature-request 1599945, Scheme->C->Scheme calling.
 68 |         Fix for bug 1593861 (behavior of is_integer)
 69 |         Fix for bug 1589711
 70 |         Error checking of binding spec syntax in LET and LETREC. The bad syntax
 71 |         was causing a segmentation fault in Linux. Complete fixes for bug #1817986.
 72 |         Error checking of binding spec syntax in LET*
 73 |         Bad syntax was causing core dump in Linux.
 74 |         Fix for nasty gc bug
 75 | 
 76 |     R5RS changes:
 77 |         R5RS requires numbers to be of equal value AND of the same type (ie. both
 78 |         exact or inexact) in order to return #t from eqv?. R5RS compliance fix.
 79 |         String output ports now conform to SRFI-6
 80 | 
 81 |     Other changes:
 82 |         Drew Yao fixed buffer overflow problems in mk_sharp_const.
 83 |         put OP_T0LVL in charge of reacting to EOF
 84 |         file_push checks array bounds (patch from Ray Lehtiniemi)
 85 |         Changed to always use snprintf (Patch due to Ramiro bsd1628)
 86 |         Updated usage information using text from the Manual.txt file.
 87 | 
 88 | Version 1.38
 89 |     Interim release until the rewrite, mostly incorporating modifications
 90 |     from Kevin Cozens. Small addition for Cygwin in the makefile, and
 91 |     modifications by Andrew Guenther for Apple platforms.
 92 | 
 93 | Version 1.37
 94 |     Joe Buehler submitted reserve_cells.
 95 | 
 96 | Version 1.36
 97 |     Joe Buehler fixed a patch in the allocator.
 98 |     Alexander Shendi moved the comment handling in the scanner, which
 99 |     fixed an obscure bug for which Mike E had provided a patch as well.
100 |     Kevin Cozens has submitted some fixes and modifications which have
101 |     not been incorporated yet in their entirety.
102 | 
103 | Version 1.35
104 |     Todd Showalter discovered that the number of free cells reported
105 |     after GC was incorrect, which could also cause unnecessary allocations.
106 | 
107 | Version 1.34
108 |     Long missing version. Lots of bugfixes have accumulated in my email, so
109 |     I had to start using them. In this version, Keenan Pepper has submitted
110 |     a bugfix for the string comparison library procedure, Wouter Boeke
111 |     modified some code that was casting to the wrong type and crashed on
112 |     some machines, "SheppardCo" submitted a replacement "modulo" code and
113 |     Scott Fenton submitted lots of corrections that shut up some compiler
114 |     warnings. Brian Maher submitted instructions on how to build on OS-X.
115 |     I have to dig deeper into my mailbox and find earlier emails, too.
116 | 
117 | Version 1.33
118 |     Charles Hayden fixed a nasty GC bug of the new stack frame, while in
119 |     the process of porting TinyScheme to C++. He also submitted other
120 |     changes, and other people also had comments or requests, but the GC
121 |     bug was so important that this version is put through the door to
122 |     correct it.
123 | 
124 | Version 1.32
125 |     Stephen Gildea put some quality time on TinyScheme again, and made
126 |     a whole lot of changes to the interpreter that made it noticeably
127 |     faster.
128 | 
129 | Version 1.31
130 |     Patches to the hastily-done version 1.30. Stephen Gildea fixed
131 |     some things done wrongly, and Richard Russo fixed the makefile
132 |     for building on Windows. Property lists (heritage from MiniScheme)
133 |     are now optional and have dissappeared from the interface. They
134 |     should be considered as deprecated.
135 | 
136 | Version 1.30
137 |     After many months, I followed Preston Bannister's advice of
138 |     using macros and a single source text to keep the enums and the
139 |     dispatch table in sync, and I used his contributed "opdefines.h".
140 |     Timothy Downs contributed a helpful function, "scheme_call".
141 |     Stephen Gildea contributed new versions of the makefile and
142 |     practically all other sources. He created a built-in STRING-APPEND,
143 |     and fixed a lot of other bugs.
144 |     Ruhi Bloodworth reported fixes necessary for OS X and a small
145 |     bug in dynload.c.
146 | 
147 | Version 1.29
148 |     The previous version contained a lot of corrections, but there
149 |     were a lot more that still wait on a sheet of paper lost in a
150 |     carton someplace after my house move... Manuel Heras-Gilsanz
151 |     noticed this and resent his own contribution, which relies on
152 |     another bugfix that v.1.28 was missing: a problem with string
153 |     output, that this version fixes. I hope other people will take
154 |     the time to resend their contributions, if they didn't make it
155 |     to v.1.28.
156 | 
157 | Version 1.28
158 |     Many people have contacted me with bugfixes or remarks in
159 |     the three months I was inactive. A lot of them spotted that
160 |     scheme_deinit crashed while reporting gc results. They suggested
161 |     that sc->outport be set to NIL in scheme_deinit, which I did.
162 |     Dennis Taylor remarked that OP_VALUEPRINT reset sc->value instead
163 |     of preserving it. He submitted a modification which I adopted
164 |     partially. David Hovemeyer sent me many little changes, that you
165 |     will find in version 1.28, and Partice Stoessel modified the
166 |     float reader to conform to R5RS.
167 | 
168 | Version 1.27
169 |     Version 1.27 is the successor of 1.25. Bug fixes only, but I had to
170 |     release them so that everybody can profit. 'Backchar' tried to write
171 |     back to the string, which obviously didn't work for const strings.
172 |     'Substring' didn't check for crossed start and end indices. Defines
173 |     changed to restore the ability to compile under MSVC.
174 | 
175 | Version 1.26
176 |     Version 1.26 was never released. I changed a lot of things, in fact
177 |     too much, even the garbage collector, and hell broke loose. I'll
178 |     try a more gradual approach next time.
179 | 
180 | Version 1.25
181 |     Types have been homogenized to be able to accommodate a different
182 |     representation. Plus, promises are no longer closures. Unfortunately,
183 |     I discovered that continuations and force/delay do not pass the SCM
184 |     test (and never did)... However, on the bright side, what little
185 |     modifications I did had a large impact on the footprint:
186 |     USE_NO_FEATURES now produces an object file of 63960 bytes on Linux!
187 | 
188 | Version 1.24
189 |     SCM tests now pass again after change in atom2str.
190 | 
191 | Version 1.23
192 |     Finally I managed to mess it up with my version control. Version
193 |     1.22 actually lacked some of the things I have been fixing in the
194 |     meantime. This should be considered as a complete replacement for
195 |     1.22.
196 | 
197 | Version 1.22
198 |     The new ports had a bug in LOAD. MK_CLOSURE is introduced.
199 |     Shawn Wagner inquired about string->number and number->string.
200 |     I added string->atom and atom->string and defined the number
201 |     functions from them. Doing that, I fixed WRITE applied to symbols
202 |     (it didn't quote them). Unfortunately, minimum build is now
203 |     slightly larger than 64k... I postpone action because Jason's idea
204 |     might solve it elegantly.
205 | 
206 | Version 1.21
207 |     Jason Felice submitted a radically different datatype representation
208 |     which he had implemented. While discussing its pros and cons, it
209 |     became apparent that the current implementation of ports suffered
210 |     from a grave fault: ports were not garbage-collected. I changed the
211 |     ports to be heap-allocated, which enabled the use of string ports
212 |     for loading. Jason also fixed errors in the garbage collection of
213 |     vectors. USE_VERBATIM is gone. "ssp_compiler.c" has a better solution
214 |     on HTML generation. A bug involving backslash notation in strings
215 |     has been fixed. '-c' flag now executes next argument as a stream of
216 |     Scheme commands. Foreign functions are now also heap allocated,
217 |     and scheme_define is used to define everything.
218 | 
219 | Version 1.20
220 |     Tracing has been added. The toplevel loop has been slightly
221 |     rearranged. Backquote reading for vector templates has been
222 |     sanitized. Symbol interning is now correct. Arithmetic functions
223 |     have been corrected. APPLY, MAP, FOR-EACH, numeric comparison
224 |     functions fixed. String reader/writer understands \xAA notation.
225 | 
226 | Version 1.19
227 |     Carriage Return now delimits identifiers. DOS-formatted Scheme files
228 |     can be used by Unix. Random number generator added to library.
229 |     Fixed some glitches of the new type-checking scheme. Fixed erroneous
230 |     (append '() 'a) behavior. Will continue with r4rstest.scm to
231 |     fix errors.
232 | 
233 | Version 1.18
234 |     The FFI has been extended. USE_VERBOSE_GC has gone. Anyone wanting
235 |     the same functionality can put (gcverbose #t) in init.scm.
236 |     print-width was removed, along with three corresponding op-codes.
237 |     Extended character constants with ASCII names were added.
238 |     mk_counted_string paves the way for full support of binary strings.
239 |     As much as possible of the type-checking chores were delegated
240 |     to the inner loop, thus reducing the code size to less than 4200 loc!
241 | 
242 | Version 1.17
243 |     Dynamically-loaded extensions are more fully integrated.
244 |     TinyScheme is now distributed under the BSD open-source license.
245 | 
246 | Version 1.16
247 |     Dynamically-loaded extensions introduced (USE_DL).
248 |     Santeri Paavolainen found a race condition: When a cons is executed,
249 |     and each of the two arguments is a constructing function,  GC could
250 |     happen before all arguments are evaluated and cons() is called, and
251 |     the evaluated arguments would all be reclaimed!
252 |     Fortunately, such a case was rare in the code, although it is
253 |     a pitfall in new code and code in foreign functions. Currently, only
254 |     one such case remains, when COLON_HOOK is defined.
255 | 
256 | Version 1.15
257 |     David Gould also contributed some changes that speed up operation.
258 |     Kirk Zurell fixed HASPROP.
259 |     The Garbage Collection didn't collect all the garbage...fixed.
260 | 
261 | Version 1.14
262 |     Unfortunately, after Andre fixed the GC it became obvious that the
263 |     algorithm was too slow... Fortunately, David Gould found a way to
264 |     speed it up.
265 | 
266 | Version 1.13
267 |     Silly bug involving division by zero resolved by Roland Kaufman.
268 |     Macintoch support from Shmulik Regev.
269 |     Float parser bug fixed by Alexander Shendi.
270 |     GC bug from Andru Luvisi.
271 | 
272 | Version 1.12
273 |     Cis* incorrectly called isalpha() instead of isascii()
274 |     Added USE_CHAR_CLASSIFIERS, USE_STRING_PORTS.
275 | 
276 | Version 1.11
277 |     BSDI defines isnumber... changed all similar functions to is_*
278 |     EXPT now has correct definition. Added FLOOR,CEILING,TRUNCATE
279 |     and ROUND, courtesy of Bengt Kleberg. Preprocessor symbols now
280 |     have values 1 or 0, and can be set as compiler defines (proposed
281 |     by Andy Ganor *months* ago). 'prompt' and 'InitFile' can now be
282 |     defined during compilation, too.
283 | 
284 | Version 1.10
285 |     Another bug when file ends with comment!
286 |     Added DEFINE-MACRO in init.scm, courtesy of Andy Gaynor.
287 | 
288 | Version 1.09
289 |     Removed bug when READ met EOF. lcm.
290 | 
291 | Version 1.08
292 |     quotient,remainder and modulo. gcd.
293 | 
294 | Version 1.07
295 |     '=>' in cond now exists
296 |     list? now checks for circularity
297 |     some reader bugs removed
298 |     Reader is more consistent wrt vectors
299 |     Quote and Quasiquote work with vectors
300 | 
301 | Version 1.06
302 |     #! is now skipped
303 |     generic-assoc bug removed
304 |     strings are now managed differently, hack.txt is removed
305 |     various delicate points fixed
306 | 
307 | Version 1.05
308 |     Support for scripts, *args*, "-1" option.
309 |     Various R5RS procedures.
310 |     *sharp-hook*
311 |     Handles unmatched parentheses.
312 |     New architecture for procedures.
313 | 
314 | Version 1.04
315 |     Added missing T_ATOM bits...
316 |     Added vectors
317 |     Free-list is sorted by address, since vectors need consecutive cells.
318 |     (quit <exitcode>) for use with scripts
319 | 
320 | Version 1.03 (26 Aug 1998):
321 |     Extended .h with useful functions for FFI
322 |     Library: with-input-* etc.
323 |     Finished R5RS I/O, added string ports.
324 | 
325 | Version 1.02 (25 Aug 1998):
326 |     First part of R5RS I/O.
327 | 


--------------------------------------------------------------------------------
/src/COPYING:
--------------------------------------------------------------------------------
 1 |                          LICENSE TERMS
 2 | 
 3 | Copyright (c) 2000, Dimitrios Souflis
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are
 8 | met:
 9 | 
10 | Redistributions of source code must retain the above copyright notice,
11 | this list of conditions and the following disclaimer.
12 | 
13 | Redistributions in binary form must reproduce the above copyright
14 | notice, this list of conditions and the following disclaimer in the
15 | documentation and/or other materials provided with the distribution.
16 | 
17 | Neither the name of Dimitrios Souflis nor the names of the
18 | contributors may be used to endorse or promote products derived from
19 | this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/src/MiniSCHEMETribute.txt:
--------------------------------------------------------------------------------
 1 |      TinyScheme would not exist if it wasn't for MiniScheme. I had just
 2 |      written the HTTP server for Ovrimos SQL Server, and I was lamenting the
 3 |      lack of a scripting language. Server-side Javascript would have been the
 4 |      preferred solution, had there been a Javascript interpreter I could
 5 |      lay my hands on. But there weren't. Perl would have been another solution,
 6 |      but it was probably ten times bigger that the program it was supposed to
 7 |      be embedded in. There would also be thorny licencing issues. 
 8 |      
 9 |      So, the obvious thing to do was find a trully small interpreter. Forth
10 |      was a language I had once quasi-implemented, but the difficulty of
11 |      handling dynamic data and the weirdness of the language put me off. I then
12 |      looked around for a LISP interpreter, the next thing I knew was easy to
13 |      implement. Alas, the LeLisp I knew from my days in UPMC (Universite Pierre
14 |      et Marie Curie) had given way to Common Lisp, a megalith of a language!
15 |      Then my search lead me to Scheme, a language I knew was very orthogonal
16 |      and clean. When I found Mini-Scheme, a single C file of some 2400 loc, I 
17 |      fell in love with it! What if it lacked floating-point numbers and 
18 |      strings! The rest, as they say, is history.
19 |      
20 |      Below  are the original credits. Don't email Akira KIDA, the address has
21 |      changed.
22 |      
23 |      ---------- Mini-Scheme Interpreter Version 0.85 ----------
24 | 
25 |                 coded by Atsushi Moriwaki (11/5/1989)
26 | 
27 |             E-MAIL :  moriwaki@kurims.kurims.kyoto-u.ac.jp
28 | 
29 |                THIS SOFTWARE IS IN THE PUBLIC DOMAIN
30 |                ------------------------------------
31 |  This software is completely free to copy, modify and/or re-distribute.
32 |  But I would appreciate it if you left my name on the code as the author.
33 | 
34 |   This version has been modified by R.C. Secrist.
35 | 
36 |   Mini-Scheme is now maintained by Akira KIDA.
37 | 
38 |   This is a revised and modified version by Akira KIDA.
39 |    current version is 0.85k4 (15 May 1994)
40 | 
41 |   Please send suggestions, bug reports and/or requests to:
42 |         <SDI00379@niftyserve.or.jp>
43 | 
44 | 
45 |      Features compared to MiniSCHEME
46 |      -------------------------------
47 | 
48 |      All code is now reentrant. Interpreter state is held in a 'scheme'
49 |      struct, and many interpreters can coexist in the same program, possibly
50 |      in different threads. The user can specify user-defined memory allocation
51 |      primitives. (see "Programmer's Reference")
52 | 
53 |      The reader is more consistent.
54 | 
55 |      Strings, characters and flonums are supported. (see "Types")
56 | 
57 |      Files being loaded can be nested up to some depth.
58 | 
59 |      R5RS I/O is there, plus String Ports. (see "Scheme Reference","I/O")
60 | 
61 |      Vectors exist.
62 | 
63 |      As a standalone application, it supports command-line arguments.
64 |      (see "Standalone")
65 | 
66 |      Running out of memory is now handled.
67 | 
68 |      The user can add foreign functions in C. (see "Foreign Functions")
69 | 
70 |      The code has been changed slightly, core functions have been moved
71 |      to the library, behavior has been aligned with R5RS etc.
72 | 
73 |      Support has been added for user-defined error recovery.
74 |      (see "Error Handling")
75 | 
76 |      Support has been added for modular programming.
77 |      (see "Colon Qualifiers - Packages")
78 | 
79 |      To enable this, EVAL has changed internally, and can
80 |      now take two arguments, as per R5RS. Environments are supported.
81 |      (see "Colon Qualifiers - Packages")
82 | 
83 |      Promises are now evaluated once only.
84 | 
85 |      (macro (foo form) ...) is now equivalent to (macro foo (lambda(form) ...))
86 | 
87 |      The reader can be extended using new #-expressions
88 |      (see "Reader extensions")
89 | 


--------------------------------------------------------------------------------
/src/api.c:
--------------------------------------------------------------------------------
  1 | /* T I N Y S C H E M E    1 . 50
  2 |  *   Continued by armornick (March 2016)
  3 |  *   Original work by Dimitrios Souflis (dsouflis@acm.org)
  4 |  *   
  5 |  *   Based on MiniScheme (original credits follow)
  6 |  * (MINISCM)               coded by Atsushi Moriwaki (11/5/1989)
  7 |  * (MINISCM)           E-MAIL :  moriwaki@kurims.kurims.kyoto-u.ac.jp
  8 |  * (MINISCM) This version has been modified by R.C. Secrist.
  9 |  * (MINISCM)
 10 |  * (MINISCM) Mini-Scheme is now maintained by Akira KIDA.
 11 |  * (MINISCM)
 12 |  * (MINISCM) This is a revised and modified version by Akira KIDA.
 13 |  * (MINISCM)    current version is 0.85k4 (15 May 1994)
 14 |  * (MINISCM)
 15 |  *
 16 |  */
 17 | 
 18 |  #define _SCHEME_SOURCE
 19 | #include "scheme.h"
 20 | #include "scheme-private.h"
 21 | 
 22 | /*------------------ API Wrappers -------------------------------------*/
 23 | 
 24 | int scheme_retcode(scheme *sc) {
 25 |   return sc->retcode;
 26 | }
 27 | 
 28 | pointer scheme_global_env(scheme *sc) {
 29 |   return sc->global_env;
 30 | }
 31 | 
 32 | pointer scheme_nil(scheme *sc) {
 33 |   return sc->NIL;
 34 | }
 35 | 
 36 | pointer scheme_true(scheme *sc) {
 37 |   return sc->T;
 38 | }
 39 | 
 40 | pointer scheme_false(scheme *sc) {
 41 |   return sc->F;
 42 | }
 43 | 
 44 | pointer scheme_reverse(scheme *sc, pointer a) {
 45 |   return reverse(sc, a);
 46 | }
 47 | 
 48 | pointer scheme_reverse_in_place(scheme *sc, pointer term, pointer list) {
 49 |   return reverse_in_place(sc, term, list);
 50 | }
 51 | 
 52 | pointer scheme_cons(scheme *sc, pointer a, pointer b, int immutable) {
 53 | 	return _cons(sc, a, b, immutable);
 54 | }
 55 | 
 56 | pointer scheme_integer(scheme *sc, long num) {
 57 | 	return mk_integer(sc, num);
 58 | }
 59 | 
 60 | pointer scheme_real(scheme *sc, double num) {
 61 | 	return mk_real(sc, num);
 62 | }
 63 | 
 64 | pointer scheme_symbol(scheme *sc, const char *name) {
 65 | 	return mk_symbol(sc, name);
 66 | }
 67 | 
 68 | pointer scheme_gensym(scheme *sc) {
 69 | 	return gensym(sc);
 70 | }
 71 | 
 72 | pointer scheme_string(scheme *sc, const char *str) {
 73 | 	return mk_string(sc, str);
 74 | }
 75 | 
 76 | pointer scheme_counted_string(scheme *sc, const char *str, int len) {
 77 | 	return mk_counted_string(sc, str, len);
 78 | }
 79 | 
 80 | pointer scheme_empty_string(scheme *sc, int len, char fill) {
 81 | 	return mk_empty_string(sc, len, fill);
 82 | }
 83 | 
 84 | pointer scheme_character(scheme *sc, int c) {
 85 | 	return mk_character(sc, c);
 86 | }
 87 | 
 88 | pointer scheme_foreign_func(scheme *sc, foreign_func f) {
 89 | 	return mk_foreign_func(sc, f);
 90 | }
 91 | 
 92 | void scheme_putstr(scheme *sc, const char *s) {
 93 | 	putstr(sc, s);
 94 | }
 95 | 
 96 | int scheme_list_length(scheme *sc, pointer a) {
 97 | 	return list_length(sc, a);
 98 | }
 99 | 
100 | int scheme_eqv(pointer a, pointer b) {
101 | 	return eqv(a, b);
102 | }
103 | 
104 | int scheme_is_string(pointer p) {
105 | 	return is_string(p);
106 | }
107 | 
108 | char* scheme_string_value(pointer p) {
109 | 	return string_value(p);
110 | }
111 | 
112 | int scheme_is_number(pointer p) {
113 | 	return is_number(p);
114 | }
115 | 
116 | num scheme_nvalue(pointer p) {
117 | 	return nvalue(p);
118 | }
119 | 
120 | long scheme_ivalue(pointer p) {
121 | 	return ivalue(p);
122 | }
123 | 
124 | double scheme_rvalue(pointer p) {
125 | 	return rvalue(p);
126 | }
127 | 
128 | int scheme_is_integer(pointer p) {
129 | 	return is_integer(p);
130 | }
131 | 
132 | int scheme_is_real(pointer p) {
133 | 	return is_real(p);
134 | }
135 | 
136 | int scheme_is_character(pointer p) {
137 | 	return is_character(p);
138 | }
139 | 
140 | long scheme_charvalue(pointer p) {
141 | 	return charvalue(p);
142 | }
143 | 
144 | int scheme_is_vector(pointer p) {
145 | 	return is_vector(p);
146 | }
147 | 
148 | int scheme_is_port(pointer p) {
149 | 	return is_port(p);
150 | }
151 | 
152 | int scheme_is_pair(pointer p) {
153 | 	return is_pair(p);
154 | }
155 | 
156 | pointer scheme_pair_car(pointer p) {
157 | 	return pair_car(p);
158 | }
159 | 
160 | pointer scheme_pair_cdr(pointer p) {
161 | 	return pair_cdr(p);
162 | }
163 | 
164 | pointer scheme_set_car(pointer p, pointer q) {
165 | 	return set_car(p, q);
166 | }
167 | 
168 | pointer scheme_set_cdr(pointer p, pointer q) {
169 | 	return set_cdr(p, q);
170 | }
171 | 
172 | int scheme_is_symbol(pointer p) {
173 | 	return is_symbol(p);
174 | }
175 | 
176 | char* scheme_symname(pointer p) {
177 | 	return symname(p);
178 | }
179 | 
180 | int scheme_is_syntax(pointer p) {
181 | 	return is_syntax(p);
182 | }
183 | 
184 | int scheme_is_proc(pointer p) {
185 | 	return is_proc(p);
186 | }
187 | 
188 | int scheme_is_foreign(pointer p) {
189 | 	return is_foreign(p);
190 | }
191 | 
192 | char* scheme_syntaxname(pointer p) {
193 | 	return syntaxname(p);
194 | }
195 | 
196 | int scheme_is_closure(pointer p) {
197 | 	return is_closure(p);
198 | }
199 | 
200 | #ifdef USE_MACRO
201 | 
202 | SCHEME_EXPORT int scheme_is_macro(pointer p) {
203 | 	return is_macro(p);
204 | }
205 | 
206 | #endif
207 | 
208 | int scheme_is_continuation(pointer p) {
209 | 	return is_continuation(p);
210 | }
211 | 
212 | int scheme_is_promise(pointer p) {
213 | 	return is_promise(p);
214 | }
215 | 
216 | int scheme_is_environment(pointer p) {
217 | 	return is_environment(p);
218 | }
219 | 
220 | int scheme_is_immutable(pointer p) {
221 | 	return is_immutable(p);
222 | }
223 | 
224 | void scheme_setimmutable(pointer p) {
225 | 	setimmutable(p);
226 | }
227 | 
228 | void scheme_fill_vector(pointer vec, pointer obj) {
229 | 	fill_vector(vec, obj);
230 | }
231 | 
232 | pointer scheme_vector_elem(pointer vec, int ielem) {
233 | 	return vector_elem(vec, ielem);
234 | }
235 | 
236 | pointer scheme_set_vector_elem(pointer vec, int ielem, pointer a) {
237 | 	return set_vector_elem(vec, ielem, a);
238 | }
239 | 
240 | void scheme_memory_error(scheme *sc) {
241 | 	sc->no_memory=1;
242 | }


--------------------------------------------------------------------------------
/src/config.h:
--------------------------------------------------------------------------------
  1 | /* T I N Y S C H E M E    1 . 50
  2 |  *   Continued by armornick (March 2016)
  3 |  *   Original work by Dimitrios Souflis (dsouflis@acm.org)
  4 |  *   
  5 |  *   Based on MiniScheme (original credits follow)
  6 |  * (MINISCM)               coded by Atsushi Moriwaki (11/5/1989)
  7 |  * (MINISCM)           E-MAIL :  moriwaki@kurims.kurims.kyoto-u.ac.jp
  8 |  * (MINISCM) This version has been modified by R.C. Secrist.
  9 |  * (MINISCM)
 10 |  * (MINISCM) Mini-Scheme is now maintained by Akira KIDA.
 11 |  * (MINISCM)
 12 |  * (MINISCM) This is a revised and modified version by Akira KIDA.
 13 |  * (MINISCM)    current version is 0.85k4 (15 May 1994)
 14 |  * (MINISCM)
 15 |  *
 16 |  */
 17 | 
 18 | #ifndef _CONFIG_H
 19 | #define _CONFIG_H
 20 | 
 21 | #if defined _WIN32 && !defined SCHEME_STATIC
 22 | # if defined _SCHEME_SOURCE || defined SCHEME_MODULE
 23 | #  define SCHEME_EXPORT __declspec(dllexport)
 24 | # else
 25 | #  define SCHEME_EXPORT __declspec(dllimport)
 26 | # endif
 27 | #else
 28 | # define SCHEME_EXPORT
 29 | #endif
 30 | 
 31 | 
 32 | #ifndef _MSC_VER
 33 | # define USE_STRCASECMP 1
 34 | # ifndef USE_STRLWR
 35 | #   define USE_STRLWR 1
 36 | # endif
 37 | #else
 38 | # define USE_STRCASECMP 0
 39 | # define USE_STRLWR 0
 40 | #endif
 41 | 
 42 | #if USE_NO_FEATURES
 43 | # define USE_MATH 0
 44 | # define USE_CHAR_CLASSIFIERS 0
 45 | # define USE_ASCII_NAMES 0
 46 | # define USE_STRING_PORTS 0
 47 | # define USE_ERROR_HOOK 0
 48 | # define USE_TRACING 0
 49 | # define USE_COLON_HOOK 0
 50 | # define USE_DL 0
 51 | # define USE_PLIST 0
 52 | #endif
 53 | 
 54 | /*
 55 |  * Leave it defined if you want continuations, and also for the Sharp Zaurus.
 56 |  * Undefine it if you only care about faster speed and not strict Scheme compatibility.
 57 |  */
 58 | #define USE_SCHEME_STACK
 59 | 
 60 | #if USE_DL
 61 | # define USE_INTERFACE 1
 62 | #endif
 63 | 
 64 | 
 65 | #ifndef USE_MATH         /* If math support is needed */
 66 | # define USE_MATH 1
 67 | #endif
 68 | 
 69 | #ifndef USE_CHAR_CLASSIFIERS  /* If char classifiers are needed */
 70 | # define USE_CHAR_CLASSIFIERS 1
 71 | #endif
 72 | 
 73 | #ifndef USE_ASCII_NAMES  /* If extended escaped characters are needed */
 74 | # define USE_ASCII_NAMES 1
 75 | #endif
 76 | 
 77 | #ifndef USE_STRING_PORTS      /* Enable string ports */
 78 | # define USE_STRING_PORTS 1
 79 | #endif
 80 | 
 81 | #ifndef USE_TRACING
 82 | # define USE_TRACING 1
 83 | #endif
 84 | 
 85 | #ifndef USE_PLIST
 86 | # define USE_PLIST 0
 87 | #endif
 88 | 
 89 | /* To force system errors through user-defined error handling (see *error-hook*) */
 90 | #ifndef USE_ERROR_HOOK
 91 | # define USE_ERROR_HOOK 1
 92 | #endif
 93 | 
 94 | #ifndef USE_COLON_HOOK   /* Enable qualified qualifier */
 95 | # define USE_COLON_HOOK 1
 96 | #endif
 97 | 
 98 | #ifndef USE_STRCASECMP   /* stricmp for Unix */
 99 | # define USE_STRCASECMP 0
100 | #endif
101 | 
102 | #ifndef USE_STRLWR
103 | # define USE_STRLWR 1
104 | #endif
105 | 
106 | #ifndef STDIO_ADDS_CR    /* Define if DOS/Windows */
107 | # define STDIO_ADDS_CR 0
108 | #endif
109 | 
110 | #ifndef INLINE
111 | # define INLINE
112 | #endif
113 | 
114 | #ifndef USE_INTERFACE
115 | # define USE_INTERFACE 0
116 | #endif
117 | 
118 | #ifndef SHOW_ERROR_LINE   /* Show error line in file */
119 | # define SHOW_ERROR_LINE 1
120 | #endif
121 | 
122 | #endif


--------------------------------------------------------------------------------
/src/dynload.c:
--------------------------------------------------------------------------------
  1 | /* dynload.c Dynamic Loader for TinyScheme */
  2 | /* Original Copyright (c) 1999 Alexander Shendi     */
  3 | /* Modifications for NT and dl_* interface, scm_load_ext: D. Souflis */
  4 | /* Refurbished by Stephen Gildea */
  5 | 
  6 | #define _SCHEME_SOURCE
  7 | #include "dynload.h"
  8 | #include "scheme-private.h"
  9 | #include <string.h>
 10 | #include <stdio.h>
 11 | #include <stdlib.h>
 12 | 
 13 | #ifndef MAXPATHLEN
 14 | # define MAXPATHLEN 1024
 15 | #endif
 16 | 
 17 | static void make_filename(const char *name, char *filename);
 18 | static void make_init_fn(const char *name, char *init_fn);
 19 | 
 20 | #ifdef _WIN32
 21 | # include <windows.h>
 22 | #else
 23 | typedef void *HMODULE;
 24 | typedef void (*FARPROC)();
 25 | #define SUN_DL
 26 | #include <dlfcn.h>
 27 | #endif
 28 | 
 29 | #ifdef _WIN32
 30 | 
 31 | #define PREFIX ""
 32 | #define SUFFIX ".dll"
 33 | 
 34 |  static void display_w32_error_msg(const char *additional_message)
 35 |  {
 36 |    LPVOID msg_buf;
 37 | 
 38 |    FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER,
 39 | 		 NULL, GetLastError(), 0,
 40 | 		 (LPTSTR)&msg_buf, 0, NULL);
 41 |    fprintf(stderr, "scheme load-extension: %s: %s", additional_message, msg_buf);
 42 |    LocalFree(msg_buf);
 43 |  }
 44 | 
 45 | static HMODULE dl_attach(const char *module) {
 46 |   HMODULE dll = LoadLibrary(module);
 47 |   if (!dll) display_w32_error_msg(module);
 48 |   return dll;
 49 | }
 50 | 
 51 | static FARPROC dl_proc(HMODULE mo, const char *proc) {
 52 |   FARPROC procedure = GetProcAddress(mo,proc);
 53 |   if (!procedure) display_w32_error_msg(proc);
 54 |   return procedure;
 55 | }
 56 | 
 57 | static void dl_detach(HMODULE mo) {
 58 |  (void)FreeLibrary(mo);
 59 | }
 60 | 
 61 | #elif defined(SUN_DL)
 62 | 
 63 | #include <dlfcn.h>
 64 | 
 65 | #define PREFIX "lib"
 66 | #define SUFFIX ".so"
 67 | 
 68 | static HMODULE dl_attach(const char *module) {
 69 |   HMODULE so=dlopen(module,RTLD_LAZY);
 70 |   if(!so) {
 71 |     fprintf(stderr, "Error loading scheme extension \"%s\": %s\n", module, dlerror());
 72 |   }
 73 |   return so;
 74 | }
 75 | 
 76 | static FARPROC dl_proc(HMODULE mo, const char *proc) {
 77 |   const char *errmsg;
 78 |   FARPROC fp=(FARPROC)dlsym(mo,proc);
 79 |   if ((errmsg = dlerror()) == 0) {
 80 |     return fp;
 81 |   }
 82 |   fprintf(stderr, "Error initializing scheme module \"%s\": %s\n", proc, errmsg);
 83 |  return 0;
 84 | }
 85 | 
 86 | static void dl_detach(HMODULE mo) {
 87 |  (void)dlclose(mo);
 88 | }
 89 | #endif
 90 | 
 91 | pointer scm_load_ext(scheme *sc, pointer args)
 92 | {
 93 |    pointer first_arg;
 94 |    pointer retval;
 95 |    char filename[MAXPATHLEN], init_fn[MAXPATHLEN+6];
 96 |    char *name;
 97 |    HMODULE dll_handle;
 98 |    void (*module_init)(scheme *sc);
 99 | 
100 |    if ((args != sc->NIL) && is_string((first_arg = pair_car(args)))) {
101 |       name = string_value(first_arg);
102 |       make_filename(name,filename);
103 |       make_init_fn(name,init_fn);
104 |       dll_handle = dl_attach(filename);
105 |       if (dll_handle == 0) {
106 |          retval = sc -> F;
107 |       }
108 |       else {
109 |          module_init = (void(*)(scheme *))dl_proc(dll_handle, init_fn);
110 |          if (module_init != 0) {
111 |             (*module_init)(sc);
112 |             retval = sc -> T;
113 |          }
114 |          else {
115 |             retval = sc->F;
116 |          }
117 |       }
118 |    }
119 |    else {
120 |       retval = sc -> F;
121 |    }
122 | 
123 |   return(retval);
124 | }
125 | 
126 | static void make_filename(const char *name, char *filename) {
127 |  strcpy(filename,name);
128 |  strcat(filename,SUFFIX);
129 | }
130 | 
131 | static void make_init_fn(const char *name, char *init_fn) {
132 |  const char *p=strrchr(name,'/');
133 |  if(p==0) {
134 |      p=name;
135 |  } else {
136 |      p++;
137 |  }
138 |  strcpy(init_fn,"init_");
139 |  strcat(init_fn,p);
140 | }
141 | 
142 | 
143 | /*
144 | Local variables:
145 | c-file-style: "k&r"
146 | End:
147 | */
148 | 


--------------------------------------------------------------------------------
/src/dynload.h:
--------------------------------------------------------------------------------
 1 | /* dynload.h */
 2 | /* Original Copyright (c) 1999 Alexander Shendi     */
 3 | /* Modifications for NT and dl_* interface: D. Souflis */
 4 | 
 5 | #ifndef DYNLOAD_H
 6 | #define DYNLOAD_H
 7 | 
 8 | #include "scheme.h"
 9 | 
10 | SCHEME_EXPORT pointer scm_load_ext(scheme *sc, pointer arglist);
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/src/hack.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                               How to hack TinyScheme
  3 |                               ----------------------
  4 | 
  5 |      TinyScheme is easy to learn and modify. It is structured like a
  6 |      meta-interpreter, only it is written in C. All data are Scheme
  7 |      objects, which facilitates both understanding/modifying the
  8 |      code and reifying the interpreter workings.
  9 | 
 10 |      In place of a dry description, we will pace through the addition
 11 |      of a useful new datatype: garbage-collected memory blocks.
 12 |      The interface will be:
 13 | 
 14 |           (make-block <n> [<fill>]) makes a new block of the specified size
 15 |                optionally filling it with a specified byte
 16 |           (block? <obj>)
 17 |           (block-length <block>)
 18 |           (block-ref <block> <index>) retrieves byte at location
 19 |           (block-set! <block> <index> <byte>) modifies byte at location
 20 |      
 21 |      In the sequel, lines that begin with '>' denote lines to add to the
 22 |      code. Lines that begin with '|' are just citations of existing code.
 23 |      Lines that begin with X denote lines to be removed from the code.
 24 | 
 25 |      First of all, we need to assign a typeid to our new type. Typeids
 26 |      in TinyScheme are small integers declared in the scheme_types enum
 27 |      located near the top of the scheme.c file; it begins with T_STRING.
 28 |      Add a new entry at the end, say T_MEMBLOCK. Remember to adjust the
 29 |      value of T_LAST_SYTEM_TYPE when adding new entries. There can be at
 30 |      most 31 types, but you don't have to worry about that limit yet.
 31 | 
 32 | |       T_ENVIRONMENT=14,
 33 | X       T_LAST_SYSTEM_TYPE=14
 34 | >       T_MEMBLOCK=15,
 35 | >       T_LAST_SYSTEM_TYPE=15
 36 | |     };
 37 | 
 38 | 
 39 |      Then, some helper macros would be useful. Go to where is_string()
 40 |      and the rest are defined and add:
 41 | 
 42 | >     INTERFACE INLINE int is_memblock(pointer p)     { return (type(p)==T_MEMBLOCK); }
 43 | 
 44 |      This actually is a function, because it is meant to be exported by
 45 |      scheme.h. If no foreign function will ever manipulate a memory block,
 46 |      you can instead define it as a macro:
 47 | 
 48 | >     #define is_memblock(p) (type(p)==T_MEMBLOCK)
 49 | 
 50 |      Then we make space for the new type in the main data structure:
 51 |      struct cell. As it happens, the _string part of the union _object
 52 |      (that is used to hold character strings) has two fields that suit us:
 53 | 
 54 | |         struct {
 55 | |              char   *_svalue;
 56 | |              int   _keynum;
 57 | |         } _string;
 58 | 
 59 |      We can use _svalue to hold the actual pointer and _keynum to hold its
 60 |      length. If we couln't reuse existing fields, we could always add other
 61 |      alternatives in union _object.
 62 | 
 63 |      We then proceed to write the function that actually makes a new block.
 64 |      For conformance reasons, we name it mk_memblock
 65 | 
 66 | >     static pointer mk_memblock(scheme *sc, int len, char fill) {
 67 | >          pointer x;
 68 | >          char *p=(char*)sc->malloc(len);
 69 | >
 70 | >          if(p==0) {
 71 | >               return sc->NIL;
 72 | >          }
 73 | >          x = get_cell(sc, sc->NIL, sc->NIL);
 74 | >
 75 | >          typeflag(x) = T_MEMBLOCK|T_ATOM;
 76 | >          strvalue(x)=p;
 77 | >          keynum(x)=len;
 78 | >          memset(p,fill,len);
 79 | >          return (x);
 80 | >     }
 81 | 
 82 |      The memory used by the MEMBLOCK will have to be freed when the cell
 83 |      is reclaimed during garbage collection. There is a placeholder for
 84 |      that staff, function finalize_cell(), currently handling strings only.
 85 | 
 86 | |     static void finalize_cell(scheme *sc, pointer a) {
 87 | |       if(is_string(a)) {
 88 | |          sc->free(strvalue(a));
 89 | >       } else if(is_memblock(a)) {
 90 | >          sc->free(strvalue(a));
 91 | |       } else if(is_port(a)) {
 92 | 
 93 |      There are no MEMBLOCK literals, so we don't concern ourselves with
 94 |      the READER part (yet!). We must cater to the PRINTER, though. We
 95 |      add one case more in atom2str().
 96 | 
 97 | |     } else if (iscontinuation(l)) {
 98 | |          p = "#<CONTINUATION>";
 99 | >     } else if (is_memblock(l)) {
100 | >          p = "#<MEMORY BLOCK>";
101 | |     } else {
102 | 
103 |      Whenever a MEMBLOCK is displayed, it will look like that.
104 |      Now, we must add the interface functions: constructor, predicate,
105 |      accessor, modifier. We must in fact create new op-codes for the virtual
106 |      machine underlying TinyScheme. Since version 1.30, TinyScheme uses
107 |      macros and a single source text to keep the enums and the dispatch table
108 |      in sync. The op-codes are defined in the opdefines.h file with one line
109 |      for each op-code. The lines in the file have six columns between the
110 |      starting _OPDEF( and ending ): A, B, C, D, E, and OP.
111 |      Note that this file uses unusually long lines to accomodate all the
112 |      information; adjust your editor to handle this.
113 | 
114 |      The purpose of the columns is:
115 |        - Column A is the name of the subroutine that handles the op-code.
116 |        - Column B is the name of the op-code function.
117 |        - Columns C and D are the minimum and maximum number of arguments
118 |          that are accepted by the op-code.
119 |        - Column E is a set of flags that tells the interpreter the type of
120 |          each of the arguments expected by the op-code.
121 |        - Column OP is used in the scheme_opcodes enum located in the
122 |          scheme-private.h file.
123 | 
124 |      Op-codes are really just tags for a huge C switch, only this switch
125 |      is broken up in to a number of different opexe_X functions. The
126 |      correspondence is made in table "dispatch_table". There, we assign
127 |      the new op-codes to opexe_2, where the equivalent ones for vectors
128 |      are situated. We also assign a name for them, and specify the minimum
129 |      and maximum arity (number of expected arguments). INF_ARG as a maximum
130 |      arity means "unlimited".
131 | 
132 |      For reasons of consistency, we add the new op-codes right after those
133 |      for vectors:
134 | 
135 | |     _OP_DEF(opexe_2, "vector-set!",                    3,  3,       TST_VECTOR TST_NATURAL TST_ANY,  OP_VECSET           )
136 | >     _OP_DEF(opexe_2, "make-block",                     1,  2,       TST_NATURAL TST_CHAR,            OP_MKBLOCK          )
137 | >     _OP_DEF(opexe_2, "block-length",                   1,  1,       T_MEMBLOCK,                      OP_BLOCKLEN         )
138 | >     _OP_DEF(opexe_2, "block-ref",                      2,  2,       T_MEMBLOCK TST_NATURAL,          OP_BLOCKREF         )
139 | >     _OP_DEF(opexe_2, "block-set!",                     1,  1,       T_MEMBLOCK TST_NATURAL TST_CHAR, OP_BLOCKSET         )
140 | |     _OP_DEF(opexe_3, "not",                            1,  1,       TST_NONE,                        OP_NOT              )
141 | 
142 |      We add the predicate along with the other predicates in opexe_3:
143 | 
144 | |     _OP_DEF(opexe_3, "vector?",                        1,  1,       TST_ANY,                         OP_VECTORP          )
145 | >     _OP_DEF(opexe_3, "block?",                         1,  1,       TST_ANY,                         OP_BLOCKP           )
146 | |     _OP_DEF(opexe_3, "eq?",                            2,  2,       TST_ANY,                         OP_EQ               )
147 | 
148 |      All that remains is to write the actual code to do the processing and
149 |      add it to the switch statement in opexe_2, after the OP_VECSET case.
150 | 
151 | >     case OP_MKBLOCK: { /* make-block */
152 | >          int fill=0;
153 | >          int len;
154 | >
155 | >          if(!isnumber(car(sc->args))) {
156 | >               Error_1(sc,"make-block: not a number:",car(sc->args));
157 | >          }
158 | >          len=ivalue(car(sc->args));
159 | >          if(len<=0) {
160 | >               Error_1(sc,"make-block: not positive:",car(sc->args));
161 | >          }
162 | >
163 | >          if(cdr(sc->args)!=sc->NIL) {
164 | >               if(!isnumber(cadr(sc->args)) || ivalue(cadr(sc->args))<0) {
165 | >                    Error_1(sc,"make-block: not a positive number:",cadr(sc->args));
166 | >               }
167 | >               fill=charvalue(cadr(sc->args))%255;
168 | >          }
169 | >          s_return(sc,mk_memblock(sc,len,(char)fill));
170 | >     }
171 | >
172 | >     case OP_BLOCKLEN:  /* block-length */
173 | >          if(!ismemblock(car(sc->args))) {
174 | >               Error_1(sc,"block-length: not a memory block:",car(sc->args));
175 | >          }
176 | >          s_return(sc,mk_integer(sc,keynum(car(sc->args))));
177 | >
178 | >     case OP_BLOCKREF: { /* block-ref */
179 | >          char *str;
180 | >          int index;
181 | >
182 | >          if(!ismemblock(car(sc->args))) {
183 | >               Error_1(sc,"block-ref: not a memory block:",car(sc->args));
184 | >          }
185 | >          str=strvalue(car(sc->args));
186 | >
187 | >          if(cdr(sc->args)==sc->NIL) {
188 | >               Error_0(sc,"block-ref: needs two arguments");
189 | >          }
190 | >          if(!isnumber(cadr(sc->args))) {
191 | >               Error_1(sc,"block-ref: not a number:",cadr(sc->args));
192 | >          }
193 | >          index=ivalue(cadr(sc->args));
194 | >
195 | >          if(index<0 || index>=keynum(car(sc->args))) {
196 | >               Error_1(sc,"block-ref: out of bounds:",cadr(sc->args));
197 | >          }
198 | >
199 | >          s_return(sc,mk_integer(sc,str[index]));
200 | >     }
201 | >
202 | >     case OP_BLOCKSET: { /* block-set! */
203 | >          char *str;
204 | >          int index;
205 | >          int c;
206 | >
207 | >          if(!ismemblock(car(sc->args))) {
208 | >               Error_1(sc,"block-set!: not a memory block:",car(sc->args));
209 | >          }
210 | >          if(isimmutable(car(sc->args))) {
211 | >               Error_1(sc,"block-set!: unable to alter immutable memory block:",car(sc->args));
212 | >          }
213 | >          str=strvalue(car(sc->args));
214 | >
215 | >          if(cdr(sc->args)==sc->NIL) {
216 | >               Error_0(sc,"block-set!: needs three arguments");
217 | >          }
218 | >          if(!isnumber(cadr(sc->args))) {
219 | >               Error_1(sc,"block-set!: not a number:",cadr(sc->args));
220 | >          }
221 | >          index=ivalue(cadr(sc->args));
222 | >          if(index<0 || index>=keynum(car(sc->args))) {
223 | >               Error_1(sc,"block-set!: out of bounds:",cadr(sc->args));
224 | >          }
225 | >
226 | >          if(cddr(sc->args)==sc->NIL) {
227 | >               Error_0(sc,"block-set!: needs three arguments");
228 | >          }
229 | >          if(!isinteger(caddr(sc->args))) {
230 | >               Error_1(sc,"block-set!: not an integer:",caddr(sc->args));
231 | >          }
232 | >          c=ivalue(caddr(sc->args))%255;
233 | >
234 | >          str[index]=(char)c;
235 | >          s_return(sc,car(sc->args));
236 | >     }
237 | 
238 |      Finally, do the same for the predicate in opexe_3.
239 | 
240 | |     case OP_VECTORP:     /* vector? */
241 | |          s_retbool(is_vector(car(sc->args)));
242 | >     case OP_BLOCKP:     /* block? */
243 | >          s_retbool(is_memblock(car(sc->args)));
244 | |     case OP_EQ:         /* eq? */
245 | 


--------------------------------------------------------------------------------
/src/main.c:
--------------------------------------------------------------------------------
  1 | /* T I N Y S C H E M E    1 . 50
  2 |  *   Continued by armornick (March 2016)
  3 |  *   Original work by Dimitrios Souflis (dsouflis@acm.org)
  4 |  *   
  5 |  *   Based on MiniScheme (original credits follow)
  6 |  * (MINISCM)               coded by Atsushi Moriwaki (11/5/1989)
  7 |  * (MINISCM)           E-MAIL :  moriwaki@kurims.kurims.kyoto-u.ac.jp
  8 |  * (MINISCM) This version has been modified by R.C. Secrist.
  9 |  * (MINISCM)
 10 |  * (MINISCM) Mini-Scheme is now maintained by Akira KIDA.
 11 |  * (MINISCM)
 12 |  * (MINISCM) This is a revised and modified version by Akira KIDA.
 13 |  * (MINISCM)    current version is 0.85k4 (15 May 1994)
 14 |  * (MINISCM)
 15 |  *
 16 |  */
 17 | 
 18 | #include "scheme.h"
 19 | #include "dynload.h"
 20 | 
 21 | #include <string.h>
 22 | #include <stdlib.h>
 23 | 
 24 | 
 25 | #ifndef WIN32
 26 | # include <unistd.h>
 27 | #endif
 28 | #ifdef WIN32
 29 | # include <io.h> /* access */
 30 | # include <sys/types.h> /* off_t */
 31 | #define snprintf _snprintf
 32 | #endif
 33 | 
 34 | #define banner "TinyScheme 1.50"
 35 | 
 36 | #ifndef InitFile
 37 | # define InitFile "init.scm"
 38 | #endif
 39 | 
 40 | 
 41 | #if defined(__APPLE__) && !defined (OSX)
 42 | int main()
 43 | {
 44 |      extern MacTS_main(int argc, char **argv);
 45 |      char**    argv;
 46 |      int argc = ccommand(&argv);
 47 |      MacTS_main(argc,argv);
 48 |      return 0;
 49 | }
 50 | int MacTS_main(int argc, char **argv) {
 51 | #else
 52 | 
 53 | int main(int argc, char **argv) {
 54 | #endif
 55 | 
 56 |   scheme *sc;
 57 |   FILE *fin;
 58 |   char *file_name=InitFile;
 59 |   int retcode;
 60 |   int isfile=1;
 61 | 
 62 |   if(argc==1) {
 63 |     printf("%s\n", banner);
 64 |   }
 65 | 
 66 |   if(argc==2 && strcmp(argv[1],"-?")==0) {
 67 |     printf("Usage: tinyscheme -?\n");
 68 |     printf("or:    tinyscheme [<file1> <file2> ...]\n");
 69 |     printf("followed by\n");
 70 |     printf("          -1 <file> [<arg1> <arg2> ...]\n");
 71 |     printf("          -c <Scheme commands> [<arg1> <arg2> ...]\n");
 72 |     printf("assuming that the executable is named tinyscheme.\n");
 73 |     printf("Use - as filename for stdin.\n");
 74 |     return 1;
 75 |   }
 76 | 
 77 |   // printf("initializing scheme environment\n");
 78 |   if(!scheme_init(&sc)) {
 79 |     fprintf(stderr,"Could not initialize!\n");
 80 |     return 2;
 81 |   }
 82 | 
 83 |   scheme_set_input_port_file(sc, stdin);
 84 |   scheme_set_output_port_file(sc, stdout);
 85 | 
 86 | #if USE_DL
 87 |   // printf("defining extension loader\n");
 88 |   scheme_define(sc,scheme_global_env(sc),scheme_symbol(sc,"load-extension"),scheme_foreign_func(sc, scm_load_ext));
 89 | #endif
 90 | 
 91 |   argv++;
 92 |   if(access(file_name,0)!=0) {
 93 |     char *p=getenv("TINYSCHEMEINIT");
 94 |     if(p!=0) {
 95 |       file_name=p;
 96 |     }
 97 |   }
 98 |   do {
 99 |     if(strcmp(file_name,"-")==0) {
100 |       fin=stdin;
101 |     } else if(strcmp(file_name,"-1")==0 || strcmp(file_name,"-c")==0) {
102 |       pointer args=scheme_nil(sc);
103 |       isfile=file_name[1]=='1';
104 |       file_name=*argv++;
105 |       if(strcmp(file_name,"-")==0) {
106 |         fin=stdin;
107 |       } else if(isfile) {
108 |         fin=fopen(file_name,"r");
109 |       }
110 |       for(;*argv;argv++) {
111 |         pointer value=scheme_string(sc,*argv);
112 |         args=cons(sc,value,args);
113 |       }
114 |       args=scheme_reverse_in_place(sc,scheme_nil(sc),args);
115 |       scheme_define(sc,scheme_global_env(sc),scheme_symbol(sc,"*args*"),args);
116 | 
117 |     } else {
118 |       fin=fopen(file_name,"r");
119 |     }
120 |     if(isfile && fin==0) {
121 |       fprintf(stderr,"Could not open file %s\n",file_name);
122 |     } else {
123 |       if(isfile) {
124 |         scheme_load_named_file(sc,fin,file_name);
125 |       } else {
126 |         scheme_load_string(sc,file_name);
127 |       }
128 |       if(!isfile || fin!=stdin) {
129 |         if(scheme_retcode(sc) != 0) {
130 |           fprintf(stderr,"Errors encountered reading %s\n",file_name);
131 |         }
132 |         if(isfile) {
133 |           fclose(fin);
134 |         }
135 |       }
136 |     }
137 |     file_name=*argv++;
138 |   } while(file_name!=0);
139 |   if(argc==1) {
140 |     scheme_load_named_file(sc,stdin,0);
141 |   }
142 |   retcode=scheme_retcode(sc);
143 |   scheme_deinit(sc);
144 | 
145 |   return retcode;
146 | }
147 | 


--------------------------------------------------------------------------------
/src/re/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | Copyright 1992, 1993, 1994 Henry Spencer.  All rights reserved.
 2 | This software is not subject to any license of the American Telephone
 3 | and Telegraph Company or of the Regents of the University of California.
 4 | 
 5 | Permission is granted to anyone to use this software for any purpose on
 6 | any computer system, and to alter it and redistribute it, subject
 7 | to the following restrictions:
 8 | 
 9 | 1. The author is not responsible for the consequences of use of this
10 |    software, no matter how awful, even if they arise from flaws in it.
11 | 
12 | 2. The origin of this software must not be misrepresented, either by
13 |    explicit claim or by omission.  Since few users ever read sources,
14 |    credits must appear in the documentation.
15 | 
16 | 3. Altered versions must be plainly marked as such, and must not be
17 |    misrepresented as being the original software.  Since few users
18 |    ever read sources, credits must appear in the documentation.
19 | 
20 | 4. This notice may not be removed or altered.
21 | 


--------------------------------------------------------------------------------
/src/re/Makefile.in:
--------------------------------------------------------------------------------
  1 | SHELL = /bin/sh
  2 | 
  3 | srcdir=@srcdir@
  4 | VPATH=@srcdir@
  5 | 
  6 | CC=@CC@
  7 | RANLIB=@RANLIB@
  8 | 
  9 | # You probably want to take -DREDEBUG out of CFLAGS, and put something like
 10 | # -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of
 11 | # internal assertion checking and some debugging facilities).
 12 | # Put -Dconst= in for a pre-ANSI compiler.
 13 | # Do not take -DPOSIX_MISTAKE out.
 14 | # REGCFLAGS isn't important to you (it's for my use in some special contexts).
 15 | CFLAGS=-I$(srcdir) -I. -DPOSIX_MISTAKE @CFLAGS@
 16 | 
 17 | # If you have a pre-ANSI compiler, put -o into MKHFLAGS.  If you want
 18 | # the Berkeley __P macro, put -b in.
 19 | MKHFLAGS=
 20 | 
 21 | # Flags for linking but not compiling, if any.
 22 | LDFLAGS=@LDFLAGS@
 23 | 
 24 | # Extra libraries for linking, if any.
 25 | LIBS=
 26 | 
 27 | # Internal stuff, should not need changing.
 28 | OBJPRODN=regcomp.o regexec.o regerror.o regfree.o
 29 | OBJS=$(OBJPRODN) split.o debug.o main.o
 30 | H=cclass.h cname.h regex2.h utils.h
 31 | REGSRC=regcomp.c regerror.c regexec.c regfree.c
 32 | ALLSRC=$(REGSRC) engine.c debug.c main.c split.c
 33 | 
 34 | # Stuff that matters only if you're trying to lint the package.
 35 | LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG
 36 | LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c main.c
 37 | JUNKLINT=possible pointer alignment|null effect
 38 | 
 39 | # arrangements to build forward-reference header files
 40 | .SUFFIXES:	.ih .h
 41 | .c.ih:
 42 | 	sh $(srcdir)/mkh $(MKHFLAGS) -p $< >$@
 43 | 
 44 | all lib: libregex.a
 45 | 
 46 | libregex.a: $(OBJPRODN)
 47 | 	rm -f libregex.a
 48 | 	ar cr libregex.a $(OBJPRODN)
 49 | 	$(RANLIB) libregex.a
 50 | 
 51 | default:	r
 52 | 
 53 | purge:
 54 | 	rm -f *.o
 55 | 
 56 | # stuff to build regex.h
 57 | REGEXH=regex.h
 58 | REGEXHSRC=regex2.h $(REGSRC)
 59 | $(REGEXH):	$(REGEXHSRC) mkh
 60 | 	sh $(srcdir)/./mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.h
 61 | 	#cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h
 62 | 	#rm -f regex.tmp
 63 | 
 64 | # dependencies
 65 | $(OBJPRODN) debug.o:	utils.h regex.h regex2.h
 66 | regcomp.o:	cclass.h cname.h regcomp.ih
 67 | regexec.o:	engine.c engine.ih
 68 | regerror.o:	regerror.ih
 69 | debug.o:	debug.ih
 70 | main.o:	main.ih
 71 | 
 72 | # tester
 73 | re:	$(OBJS)
 74 | 	$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
 75 | 
 76 | # regression test
 77 | r:	re tests
 78 | 	./re <tests
 79 | 	./re -el <tests
 80 | 	./re -er <tests
 81 | 
 82 | # 57 variants, and other stuff, for development use -- not useful to you
 83 | ra:	./re tests
 84 | 	-./re <tests
 85 | 	-./re -el <tests
 86 | 	-./re -er <tests
 87 | 
 88 | rx:	./re tests
 89 | 	./re -x <tests
 90 | 	./re -x -el <tests
 91 | 	./re -x -er <tests
 92 | 
 93 | t:	./re tests
 94 | 	-time ./re <tests
 95 | 	-time ./re -cs <tests
 96 | 	-time ./re -el <tests
 97 | 	-time ./re -cs -el <tests
 98 | 
 99 | l:	$(LINTC)
100 | 	lint $(LINTFLAGS) -h $(LINTC) 2>&1 | egrep -v '$(JUNKLINT)' | tee lint
101 | 
102 | fullprint:
103 | 	ti README WHATSNEW notes todo | list
104 | 	ti *.h | list
105 | 	list *.c
106 | 	list regex.3 regex.7
107 | 
108 | print:
109 | 	ti README WHATSNEW notes todo | list
110 | 	ti *.h | list
111 | 	list reg*.c engine.c
112 | 
113 | 
114 | mf.tmp:	Makefile
115 | 	sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@
116 | 
117 | DTRH=cclass.h cname.h regex2.h utils.h
118 | PRE=COPYRIGHT README WHATSNEW
119 | POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch]
120 | FILES=$(PRE) Makefile $(POST)
121 | DTR=$(PRE) Makefile=mf.tmp $(POST)
122 | dtr:	$(FILES) mf.tmp
123 | 	makedtr $(DTR) >$@
124 | 	rm mf.tmp
125 | 
126 | cio:	$(FILES)
127 | 	cio $(FILES)
128 | 
129 | rdf:	$(FILES)
130 | 	rcsdiff -c $(FILES) 2>&1 | p
131 | 
132 | # various forms of cleanup
133 | tidy:
134 | 	rm -f junk* core core.* *.core dtr *.tmp lint
135 | 
136 | clean:	tidy
137 | 	rm -f *.o *.s re libregex.a
138 | 
139 | # don't do this one unless you know what you're doing
140 | spotless:	clean
141 | 	rm -f mkh regex.h
142 | 


--------------------------------------------------------------------------------
/src/re/README:
--------------------------------------------------------------------------------
 1 | alpha3.4 release.
 2 | Thu Mar 17 23:17:18 EST 1994
 3 | henry@zoo.toronto.edu
 4 | 
 5 | See WHATSNEW for change listing.
 6 | 
 7 | installation notes:
 8 | --------
 9 | Read the comments at the beginning of Makefile before running.
10 | 
11 | Utils.h contains some things that just might have to be modified on
12 | some systems, as well as a nested include (ugh) of <assert.h>.
13 | 
14 | The "fake" directory contains quick-and-dirty fakes for some header
15 | files and routines that old systems may not have.  Note also that
16 | -DUSEBCOPY will make utils.h substitute bcopy() for memmove().
17 | 
18 | After that, "make r" will build regcomp.o, regexec.o, regfree.o,
19 | and regerror.o (the actual routines), bundle them together into a test
20 | program, and run regression tests on them.  No output is good output.
21 | 
22 | "make lib" builds just the .o files for the actual routines (when
23 | you're happy with testing and have adjusted CFLAGS for production),
24 | and puts them together into libregex.a.  You can pick up either the
25 | library or *.o ("make lib" makes sure there are no other .o files left
26 | around to confuse things).
27 | 
28 | Main.c, debug.c, split.c are used for regression testing but are not part
29 | of the RE routines themselves.
30 | 
31 | Regex.h goes in /usr/include.  All other .h files are internal only.
32 | --------
33 | 


--------------------------------------------------------------------------------
/src/re/README.1st:
--------------------------------------------------------------------------------
 1 |              TinyScheme RE (Regular Expressions) extension
 2 |              ---------------------------------------------
 3 | Version 1.2, August 2002
 4 | 
 5 | The bulk of this directory is the regular expression library written
 6 | by Henry Spencer (see file README and COPYRIGHT).
 7 | 
 8 | Two files were added to produce the TinyScheme regular expression
 9 | library, re.so: re.c and re.makefile. The included re.makefile was contributed
10 | initially by Stephen Gildea and should be adaptable to all Unix systems.
11 | 
12 | The makefile produces a DLL named re.so. For now, it contains just
13 | a single foreign function (re-match <pattern> <string>). It returns
14 | true (string matches pattern) or false. If it is called with an
15 | extra parameter, which should be a vector, overwrites as many elements
16 | of the vector as needed with the strings that matched the corresponding
17 | parenthesized subexpressions inside <pattern>.
18 | 
19 | It is not fully tested, so use with caution.
20 | 
21 | Load the extension from inside TinyScheme using
22 | (load-extension "re/re")
23 | assuming that re.so is in the directory "re".
24 | 
25 | Load "re.scm" if you wish to use v.1.1 behavior.
26 |  
27 | dsouflis@acm.org
28 | 


--------------------------------------------------------------------------------
/src/re/WHATSNEW:
--------------------------------------------------------------------------------
 1 | New in 1.3: Include scheme-private.h in re.c to fix compilation errors
 2 | when used with TinyScheme 1.40 and later.
 3 | 
 4 | New in alpha3.4:  The complex bug alluded to below has been fixed (in a
 5 | slightly kludgey temporary way that may hurt efficiency a bit; this is
 6 | another "get it out the door for 4.4" release).  The tests at the end of
 7 | the tests file have accordingly been uncommented.  The primary sign of
 8 | the bug was that something like a?b matching ab matched b rather than ab.
 9 | (The bug was essentially specific to this exact situation, else it would
10 | have shown up earlier.)
11 | 
12 | New in alpha3.3:  The definition of word boundaries has been altered
13 | slightly, to more closely match the usual programming notion that "_"
14 | is an alphabetic.  Stuff used for pre-ANSI systems is now in a subdir,
15 | and the makefile no longer alludes to it in mysterious ways.  The
16 | makefile has generally been cleaned up some.  Fixes have been made
17 | (again!) so that the regression test will run without -DREDEBUG, at
18 | the cost of weaker checking.  A workaround for a bug in some folks'
19 | <assert.h> has been added.  And some more things have been added to
20 | tests, including a couple right at the end which are commented out
21 | because the code currently flunks them (complex bug; fix coming).
22 | Plus the usual minor cleanup.
23 | 
24 | New in alpha3.2:  Assorted bits of cleanup and portability improvement
25 | (the development base is now a BSDI system using GCC instead of an ancient
26 | Sun system, and the newer compiler exposed some glitches).  Fix for a
27 | serious bug that affected REs using many [] (including REG_ICASE REs
28 | because of the way they are implemented), *sometimes*, depending on
29 | memory-allocation patterns.  The header-file prototypes no longer name
30 | the parameters, avoiding possible name conflicts.  The possibility that
31 | some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is
32 | now handled gracefully.  "uchar" is no longer used as an internal type
33 | name (too many people have the same idea).  Still the same old lousy
34 | performance, alas.
35 | 
36 | New in alpha3.1:  Basically nothing, this release is just a bookkeeping
37 | convenience.  Stay tuned.
38 | 
39 | New in alpha3.0:  Performance is no better, alas, but some fixes have been
40 | made and some functionality has been added.  (This is basically the "get
41 | it out the door in time for 4.4" release.)  One bug fix:  regfree() didn't
42 | free the main internal structure (how embarrassing).  It is now possible
43 | to put NULs in either the RE or the target string, using (resp.) a new
44 | REG_PEND flag and the old REG_STARTEND flag.  The REG_NOSPEC flag to
45 | regcomp() makes all characters ordinary, so you can match a literal
46 | string easily (this will become more useful when performance improves!).
47 | There are now primitives to match beginnings and ends of words, although
48 | the syntax is disgusting and so is the implementation.  The REG_ATOI
49 | debugging interface has changed a bit.  And there has been considerable
50 | internal cleanup of various kinds.
51 | 
52 | New in alpha2.3:  Split change list out of README, and moved flags notes
53 | into Makefile.  Macro-ized the name of regex(7) in regex(3), since it has
54 | to change for 4.4BSD.  Cleanup work in engine.c, and some new regression
55 | tests to catch tricky cases thereof.
56 | 
57 | New in alpha2.2:  Out-of-date manpages updated.  Regerror() acquires two
58 | small extensions -- REG_ITOA and REG_ATOI -- which avoid debugging kludges
59 | in my own test program and might be useful to others for similar purposes.
60 | The regression test will now compile (and run) without REDEBUG.  The
61 | BRE \$ bug is fixed.  Most uses of "uchar" are gone; it's all chars now.
62 | Char/uchar parameters are now written int/unsigned, to avoid possible
63 | portability problems with unpromoted parameters.  Some unsigned casts have
64 | been introduced to minimize portability problems with shifting into sign
65 | bits.
66 | 
67 | New in alpha2.1:  Lots of little stuff, cleanup and fixes.  The one big
68 | thing is that regex.h is now generated, using mkh, rather than being
69 | supplied in the distribution; due to circularities in dependencies,
70 | you have to build regex.h explicitly by "make h".  The two known bugs
71 | have been fixed (and the regression test now checks for them), as has a
72 | problem with assertions not being suppressed in the absence of REDEBUG.
73 | No performance work yet.
74 | 
75 | New in alpha2:  Backslash-anything is an ordinary character, not an
76 | error (except, of course, for the handful of backslashed metacharacters
77 | in BREs), which should reduce script breakage.  The regression test
78 | checks *where* null strings are supposed to match, and has generally
79 | been tightened up somewhat.  Small bug fixes in parameter passing (not
80 | harmful, but technically errors) and some other areas.  Debugging
81 | invoked by defining REDEBUG rather than not defining NDEBUG.
82 | 
83 | New in alpha+3:  full prototyping for internal routines, using a little
84 | helper program, mkh, which extracts prototypes given in stylized comments.
85 | More minor cleanup.  Buglet fix:  it's CHAR_BIT, not CHAR_BITS.  Simple
86 | pre-screening of input when a literal string is known to be part of the
87 | RE; this does wonders for performance.
88 | 
89 | New in alpha+2:  minor bits of cleanup.  Notably, the number "32" for the
90 | word width isn't hardwired into regexec.c any more, the public header
91 | file prototypes the functions if __STDC__ is defined, and some small typos
92 | in the manpages have been fixed.
93 | 
94 | New in alpha+1:  improvements to the manual pages, and an important
95 | extension, the REG_STARTEND option to regexec().
96 | 


--------------------------------------------------------------------------------
/src/re/cclass.h:
--------------------------------------------------------------------------------
 1 | /* character-class table */
 2 | static struct cclass {
 3 | 	char *name;
 4 | 	char *chars;
 5 | 	char *multis;
 6 | } cclasses[] = {
 7 | 	{ "alnum",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
 8 | 0123456789",				"" },
 9 | 	{ "alpha",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
10 | 					"" },
11 | 	{ "blank",	" \t",		"" },
12 | 	{ "cntrl",	"\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
13 | \25\26\27\30\31\32\33\34\35\36\37\177",	"" },
14 | 	{ "digit",	"0123456789",	"" },
15 | 	{ "graph",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
16 | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
17 | 					"" },
18 | 	{ "lower",	"abcdefghijklmnopqrstuvwxyz",
19 | 					"" },
20 | 	{ "print",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
21 | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
22 | 					"" },
23 | 	{ "punct",	"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
24 | 					"" },
25 | 	{ "space",	"\t\n\v\f\r ",	"" },
26 | 	{ "upper",	"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
27 | 					"" },
28 | 	{ "xdigit",	"0123456789ABCDEFabcdef",
29 | 					"" },
30 | 	{ NULL,		0,		"" }
31 | };
32 | 


--------------------------------------------------------------------------------
/src/re/cname.h:
--------------------------------------------------------------------------------
  1 | /* character-name table */
  2 | static struct cname {
  3 | 	char *name;
  4 | 	char code;
  5 | } cnames[] = {
  6 | 	{ "NUL",	'\0' },
  7 | 	{ "SOH",	'\001' },
  8 | 	{ "STX",	'\002' },
  9 | 	{ "ETX",	'\003' },
 10 | 	{ "EOT",	'\004' },
 11 | 	{ "ENQ",	'\005' },
 12 | 	{ "ACK",	'\006' },
 13 | 	{ "BEL",	'\007' },
 14 | 	{ "alert",	'\007' },
 15 | 	{ "BS",		'\010' },
 16 | 	{ "backspace",	'\b' },
 17 | 	{ "HT",		'\011' },
 18 | 	{ "tab",		'\t' },
 19 | 	{ "LF",		'\012' },
 20 | 	{ "newline",	'\n' },
 21 | 	{ "VT",		'\013' },
 22 | 	{ "vertical-tab",	'\v' },
 23 | 	{ "FF",		'\014' },
 24 | 	{ "form-feed",	'\f' },
 25 | 	{ "CR",		'\015' },
 26 | 	{ "carriage-return",	'\r' },
 27 | 	{ "SO",	'\016' },
 28 | 	{ "SI",	'\017' },
 29 | 	{ "DLE",	'\020' },
 30 | 	{ "DC1",	'\021' },
 31 | 	{ "DC2",	'\022' },
 32 | 	{ "DC3",	'\023' },
 33 | 	{ "DC4",	'\024' },
 34 | 	{ "NAK",	'\025' },
 35 | 	{ "SYN",	'\026' },
 36 | 	{ "ETB",	'\027' },
 37 | 	{ "CAN",	'\030' },
 38 | 	{ "EM",	'\031' },
 39 | 	{ "SUB",	'\032' },
 40 | 	{ "ESC",	'\033' },
 41 | 	{ "IS4",	'\034' },
 42 | 	{ "FS",	'\034' },
 43 | 	{ "IS3",	'\035' },
 44 | 	{ "GS",	'\035' },
 45 | 	{ "IS2",	'\036' },
 46 | 	{ "RS",	'\036' },
 47 | 	{ "IS1",	'\037' },
 48 | 	{ "US",	'\037' },
 49 | 	{ "space",		' ' },
 50 | 	{ "exclamation-mark",	'!' },
 51 | 	{ "quotation-mark",	'"' },
 52 | 	{ "number-sign",		'#' },
 53 | 	{ "dollar-sign",		'$' },
 54 | 	{ "percent-sign",		'%' },
 55 | 	{ "ampersand",		'&' },
 56 | 	{ "apostrophe",		'\'' },
 57 | 	{ "left-parenthesis",	'(' },
 58 | 	{ "right-parenthesis",	')' },
 59 | 	{ "asterisk",	'*' },
 60 | 	{ "plus-sign",	'+' },
 61 | 	{ "comma",	',' },
 62 | 	{ "hyphen",	'-' },
 63 | 	{ "hyphen-minus",	'-' },
 64 | 	{ "period",	'.' },
 65 | 	{ "full-stop",	'.' },
 66 | 	{ "slash",	'/' },
 67 | 	{ "solidus",	'/' },
 68 | 	{ "zero",		'0' },
 69 | 	{ "one",		'1' },
 70 | 	{ "two",		'2' },
 71 | 	{ "three",	'3' },
 72 | 	{ "four",		'4' },
 73 | 	{ "five",		'5' },
 74 | 	{ "six",		'6' },
 75 | 	{ "seven",	'7' },
 76 | 	{ "eight",	'8' },
 77 | 	{ "nine",		'9' },
 78 | 	{ "colon",	':' },
 79 | 	{ "semicolon",	';' },
 80 | 	{ "less-than-sign",	'<' },
 81 | 	{ "equals-sign",		'=' },
 82 | 	{ "greater-than-sign",	'>' },
 83 | 	{ "question-mark",	'?' },
 84 | 	{ "commercial-at",	'@' },
 85 | 	{ "left-square-bracket",	'[' },
 86 | 	{ "backslash",		'\\' },
 87 | 	{ "reverse-solidus",	'\\' },
 88 | 	{ "right-square-bracket",	']' },
 89 | 	{ "circumflex",		'^' },
 90 | 	{ "circumflex-accent",	'^' },
 91 | 	{ "underscore",		'_' },
 92 | 	{ "low-line",		'_' },
 93 | 	{ "grave-accent",		'`' },
 94 | 	{ "left-brace",		'{' },
 95 | 	{ "left-curly-bracket",	'{' },
 96 | 	{ "vertical-line",	'|' },
 97 | 	{ "right-brace",		'}' },
 98 | 	{ "right-curly-bracket",	'}' },
 99 | 	{ "tilde",		'~' },
100 | 	{ "DEL",	'\177' },
101 | 	{ NULL,	0 }
102 | };
103 | 


--------------------------------------------------------------------------------
/src/re/debug.c:
--------------------------------------------------------------------------------
  1 | #include <regex.h>
  2 | 
  3 | #include "utils.h"
  4 | #include "regex2.h"
  5 | #include "debug.ih"
  6 | 
  7 | /*
  8 |  - regprint - print a regexp for debugging
  9 |  == void regprint(regex_t *r, FILE *d);
 10 |  */
 11 | void
 12 | regprint(r, d)
 13 | regex_t *r;
 14 | FILE *d;
 15 | {
 16 |      register struct re_guts *g = r->re_g;
 17 |      register int i;
 18 |      register int c;
 19 |      register int last;
 20 |      int nincat[NC];
 21 | 
 22 |      fprintf(d, "%ld states, %d categories", (long)g->nstates,
 23 |                                    g->ncategories);
 24 |      fprintf(d, ", first %ld last %ld", (long)g->firststate,
 25 |                               (long)g->laststate);
 26 |      if (g->iflags&USEBOL)
 27 |           fprintf(d, ", USEBOL");
 28 |      if (g->iflags&USEEOL)
 29 |           fprintf(d, ", USEEOL");
 30 |      if (g->iflags&BAD)
 31 |           fprintf(d, ", BAD");
 32 |      if (g->nsub > 0)
 33 |           fprintf(d, ", nsub=%ld", (long)g->nsub);
 34 |      if (g->must != NULL)
 35 |           fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen,
 36 |                                         g->must);
 37 |      if (g->backrefs)
 38 |           fprintf(d, ", backrefs");
 39 |      if (g->nplus > 0)
 40 |           fprintf(d, ", nplus %ld", (long)g->nplus);
 41 |      fprintf(d, "\n");
 42 |      s_print(g, d);
 43 |      for (i = 0; i < g->ncategories; i++) {
 44 |           nincat[i] = 0;
 45 |           for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 46 |                if (g->categories[c] == i)
 47 |                     nincat[i]++;
 48 |      }
 49 |      fprintf(d, "cc0#%d", nincat[0]);
 50 |      for (i = 1; i < g->ncategories; i++)
 51 |           if (nincat[i] == 1) {
 52 |                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 53 |                     if (g->categories[c] == i)
 54 |                          break;
 55 |                fprintf(d, ", %d=%s", i, regchar(c));
 56 |           }
 57 |      fprintf(d, "\n");
 58 |      for (i = 1; i < g->ncategories; i++)
 59 |           if (nincat[i] != 1) {
 60 |                fprintf(d, "cc%d\t", i);
 61 |                last = -1;
 62 |                for (c = CHAR_MIN; c <= CHAR_MAX+1; c++)     /* +1 does flush */
 63 |                     if (c <= CHAR_MAX && g->categories[c] == i) {
 64 |                          if (last < 0) {
 65 |                               fprintf(d, "%s", regchar(c));
 66 |                               last = c;
 67 |                          }
 68 |                     } else {
 69 |                          if (last >= 0) {
 70 |                               if (last != c-1)
 71 |                                    fprintf(d, "-%s",
 72 |                                         regchar(c-1));
 73 |                               last = -1;
 74 |                          }
 75 |                     }
 76 |                fprintf(d, "\n");
 77 |           }
 78 | }
 79 | 
 80 | /*
 81 |  - s_print - print the strip for debugging
 82 |  == static void s_print(register struct re_guts *g, FILE *d);
 83 |  */
 84 | static void
 85 | s_print(g, d)
 86 | register struct re_guts *g;
 87 | FILE *d;
 88 | {
 89 |      register sop *s;
 90 |      register cset *cs;
 91 |      register int i;
 92 |      register int done = 0;
 93 |      register sop opnd;
 94 |      register int col = 0;
 95 |      register int last;
 96 |      register sopno offset = 2;
 97 | #    define    GAP()     {    if (offset % 5 == 0) { \
 98 |                          if (col > 40) { \
 99 |                               fprintf(d, "\n\t"); \
100 |                               col = 0; \
101 |                          } else { \
102 |                               fprintf(d, " "); \
103 |                               col++; \
104 |                          } \
105 |                     } else \
106 |                          col++; \
107 |                     offset++; \
108 |                }
109 | 
110 |      if (OP(g->strip[0]) != OEND)
111 |           fprintf(d, "missing initial OEND!\n");
112 |      for (s = &g->strip[1]; !done; s++) {
113 |           opnd = OPND(*s);
114 |           switch (OP(*s)) {
115 |           case OEND:
116 |                fprintf(d, "\n");
117 |                done = 1;
118 |                break;
119 |           case OCHAR:
120 |                if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL)
121 |                     fprintf(d, "\\%c", (char)opnd);
122 |                else
123 |                     fprintf(d, "%s", regchar((char)opnd));
124 |                break;
125 |           case OBOL:
126 |                fprintf(d, "^");
127 |                break;
128 |           case OEOL:
129 |                fprintf(d, "$");
130 |                break;
131 |           case OBOW:
132 |                fprintf(d, "\\{");
133 |                break;
134 |           case OEOW:
135 |                fprintf(d, "\\}");
136 |                break;
137 |           case OANY:
138 |                fprintf(d, ".");
139 |                break;
140 |           case OANYOF:
141 |                fprintf(d, "[(%ld)", (long)opnd);
142 |                cs = &g->sets[opnd];
143 |                last = -1;
144 |                for (i = 0; i < g->csetsize+1; i++)     /* +1 flushes */
145 |                     if (CHIN(cs, i) && i < g->csetsize) {
146 |                          if (last < 0) {
147 |                               fprintf(d, "%s", regchar(i));
148 |                               last = i;
149 |                          }
150 |                     } else {
151 |                          if (last >= 0) {
152 |                               if (last != i-1)
153 |                                    fprintf(d, "-%s",
154 |                                         regchar(i-1));
155 |                               last = -1;
156 |                          }
157 |                     }
158 |                fprintf(d, "]");
159 |                break;
160 |           case OBACK_:
161 |                fprintf(d, "(\\<%ld>", (long)opnd);
162 |                break;
163 |           case O_BACK:
164 |                fprintf(d, "<%ld>\\)", (long)opnd);
165 |                break;
166 |           case OPLUS_:
167 |                fprintf(d, "(+");
168 |                if (OP(*(s+opnd)) != O_PLUS)
169 |                     fprintf(d, "<%ld>", (long)opnd);
170 |                break;
171 |           case O_PLUS:
172 |                if (OP(*(s-opnd)) != OPLUS_)
173 |                     fprintf(d, "<%ld>", (long)opnd);
174 |                fprintf(d, "+)");
175 |                break;
176 |           case OQUEST_:
177 |                fprintf(d, "(?");
178 |                if (OP(*(s+opnd)) != O_QUEST)
179 |                     fprintf(d, "<%ld>", (long)opnd);
180 |                break;
181 |           case O_QUEST:
182 |                if (OP(*(s-opnd)) != OQUEST_)
183 |                     fprintf(d, "<%ld>", (long)opnd);
184 |                fprintf(d, "?)");
185 |                break;
186 |           case OLPAREN:
187 |                fprintf(d, "((<%ld>", (long)opnd);
188 |                break;
189 |           case ORPAREN:
190 |                fprintf(d, "<%ld>))", (long)opnd);
191 |                break;
192 |           case OCH_:
193 |                fprintf(d, "<");
194 |                if (OP(*(s+opnd)) != OOR2)
195 |                     fprintf(d, "<%ld>", (long)opnd);
196 |                break;
197 |           case OOR1:
198 |                if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_)
199 |                     fprintf(d, "<%ld>", (long)opnd);
200 |                fprintf(d, "|");
201 |                break;
202 |           case OOR2:
203 |                fprintf(d, "|");
204 |                if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH)
205 |                     fprintf(d, "<%ld>", (long)opnd);
206 |                break;
207 |           case O_CH:
208 |                if (OP(*(s-opnd)) != OOR1)
209 |                     fprintf(d, "<%ld>", (long)opnd);
210 |                fprintf(d, ">");
211 |                break;
212 |           default:
213 |                fprintf(d, "!%ld(%ld)!", OP(*s), opnd);
214 |                break;
215 |           }
216 |           if (!done)
217 |                GAP();
218 |      }
219 | }
220 | 
221 | /*
222 |  - regchar - make a character printable
223 |  == static char *regchar(int ch);
224 |  */
225 | static char *            /* -> representation */
226 | regchar(ch)
227 | int ch;
228 | {
229 |      static char buf[10];
230 | 
231 |      if (isprint(ch) || ch == ' ')
232 |           sprintf(buf, "%c", ch);
233 |      else
234 |           sprintf(buf, "\\%o", ch);
235 |      return(buf);
236 | }
237 | 


--------------------------------------------------------------------------------
/src/re/debug.ih:
--------------------------------------------------------------------------------
 1 | /* ========= begin header generated by ./mkh ========= */
 2 | #ifdef __cplusplus
 3 | extern "C" {
 4 | #endif
 5 | 
 6 | /* === debug.c === */
 7 | void regprint(regex_t *r, FILE *d);
 8 | static void s_print(register struct re_guts *g, FILE *d);
 9 | static char *regchar(int ch);
10 | 
11 | #ifdef __cplusplus
12 | }
13 | #endif
14 | /* ========= end header generated by ./mkh ========= */
15 | 


--------------------------------------------------------------------------------
/src/re/engine.ih:
--------------------------------------------------------------------------------
 1 | /* ========= begin header generated by ./mkh ========= */
 2 | #ifdef __cplusplus
 3 | extern "C" {
 4 | #endif
 5 | 
 6 | /* === engine.c === */
 7 | static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
 8 | static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
 9 | static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
10 | static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
11 | static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
12 | static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft);
13 | #define	BOL	(OUT+1)
14 | #define	EOL	(BOL+1)
15 | #define	BOLEOL	(BOL+2)
16 | #define	NOTHING	(BOL+3)
17 | #define	BOW	(BOL+4)
18 | #define	EOW	(BOL+5)
19 | #define	CODEMAX	(BOL+5)		/* highest code used */
20 | #define	NONCHAR(c)	((c) > CHAR_MAX)
21 | #define	NNONCHAR	(CODEMAX-CHAR_MAX)
22 | #ifdef REDEBUG
23 | static void print(struct match *m, char *caption, states st, int ch, FILE *d);
24 | #endif
25 | #ifdef REDEBUG
26 | static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
27 | #endif
28 | #ifdef REDEBUG
29 | static char *pchar(int ch);
30 | #endif
31 | 
32 | #ifdef __cplusplus
33 | }
34 | #endif
35 | /* ========= end header generated by ./mkh ========= */
36 | 


--------------------------------------------------------------------------------
/src/re/main.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | #include <sys/types.h>
  4 | #include <regex.h>
  5 | #include <assert.h>
  6 | #include <stdlib.h>
  7 | 
  8 | #include "main.ih"
  9 | 
 10 | char *progname;
 11 | int debug = 0;
 12 | int line = 0;
 13 | int status = 0;
 14 | 
 15 | int copts = REG_EXTENDED;
 16 | int eopts = 0;
 17 | regoff_t startoff = 0;
 18 | regoff_t endoff = 0;
 19 | 
 20 | 
 21 | extern int split();
 22 | extern void regprint();
 23 | 
 24 | /*
 25 |  - main - do the simple case, hand off to regress() for regression
 26 |  */
 27 | int main(argc, argv)
 28 | int argc;
 29 | char *argv[];
 30 | {
 31 | 	regex_t re;
 32 | #	define	NS	10
 33 | 	regmatch_t subs[NS];
 34 | 	char erbuf[100];
 35 | 	int err;
 36 | 	size_t len;
 37 | 	int c;
 38 | 	int errflg = 0;
 39 | 	register int i;
 40 | 	extern int optind;
 41 | 	extern char *optarg;
 42 | 
 43 | 	progname = argv[0];
 44 | 
 45 | 	while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
 46 | 		switch (c) {
 47 | 		case 'c':	/* compile options */
 48 | 			copts = options('c', optarg);
 49 | 			break;
 50 | 		case 'e':	/* execute options */
 51 | 			eopts = options('e', optarg);
 52 | 			break;
 53 | 		case 'S':	/* start offset */
 54 | 			startoff = (regoff_t)atoi(optarg);
 55 | 			break;
 56 | 		case 'E':	/* end offset */
 57 | 			endoff = (regoff_t)atoi(optarg);
 58 | 			break;
 59 | 		case 'x':	/* Debugging. */
 60 | 			debug++;
 61 | 			break;
 62 | 		case '?':
 63 | 		default:
 64 | 			errflg++;
 65 | 			break;
 66 | 		}
 67 | 	if (errflg) {
 68 | 		fprintf(stderr, "usage: %s ", progname);
 69 | 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
 70 | 		exit(2);
 71 | 	}
 72 | 
 73 | 	if (optind >= argc) {
 74 | 		regress(stdin);
 75 | 		exit(status);
 76 | 	}
 77 | 
 78 | 	err = regcomp(&re, argv[optind++], copts);
 79 | 	if (err) {
 80 | 		len = regerror(err, &re, erbuf, sizeof(erbuf));
 81 | 		fprintf(stderr, "error %s, %d/%d `%s'\n",
 82 | 			eprint(err), len, sizeof(erbuf), erbuf);
 83 | 		exit(status);
 84 | 	}
 85 | 	regprint(&re, stdout);	
 86 | 
 87 | 	if (optind >= argc) {
 88 | 		regfree(&re);
 89 | 		exit(status);
 90 | 	}
 91 | 
 92 | 	if (eopts&REG_STARTEND) {
 93 | 		subs[0].rm_so = startoff;
 94 | 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
 95 | 	}
 96 | 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
 97 | 	if (err) {
 98 | 		len = regerror(err, &re, erbuf, sizeof(erbuf));
 99 | 		fprintf(stderr, "error %s, %d/%d `%s'\n",
100 | 			eprint(err), len, sizeof(erbuf), erbuf);
101 | 		exit(status);
102 | 	}
103 | 	if (!(copts&REG_NOSUB)) {
104 | 		len = (int)(subs[0].rm_eo - subs[0].rm_so);
105 | 		if (subs[0].rm_so != -1) {
106 | 			if (len != 0)
107 | 				printf("match `%.*s'\n", (int)len,
108 | 					argv[optind] + subs[0].rm_so);
109 | 			else
110 | 				printf("match `'@%.1s\n",
111 | 					argv[optind] + subs[0].rm_so);
112 | 		}
113 | 		for (i = 1; i < NS; i++)
114 | 			if (subs[i].rm_so != -1)
115 | 				printf("(%d) `%.*s'\n", i,
116 | 					(int)(subs[i].rm_eo - subs[i].rm_so),
117 | 					argv[optind] + subs[i].rm_so);
118 | 	}
119 | 	exit(status);
120 | }
121 | 
122 | /*
123 |  - regress - main loop of regression test
124 |  == void regress(FILE *in);
125 |  */
126 | void
127 | regress(in)
128 | FILE *in;
129 | {
130 | 	char inbuf[1000];
131 | #	define	MAXF	10
132 | 	char *f[MAXF];
133 | 	int nf;
134 | 	int i;
135 | 	char erbuf[100];
136 | 	size_t ne;
137 | 	char *badpat = "invalid regular expression";
138 | #	define	SHORT	10
139 | 	char *bpname = "REG_BADPAT";
140 | 	regex_t re;
141 | 
142 | 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
143 | 		line++;
144 | 		if (inbuf[0] == '#' || inbuf[0] == '\n')
145 | 			continue;			/* NOTE CONTINUE */
146 | 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
147 | 		if (debug)
148 | 			fprintf(stdout, "%d:\n", line);
149 | 		nf = split(inbuf, f, MAXF, "\t\t");
150 | 		if (nf < 3) {
151 | 			fprintf(stderr, "bad input, line %d\n", line);
152 | 			exit(1);
153 | 		}
154 | 		for (i = 0; i < nf; i++)
155 | 			if (strcmp(f[i], "\"\"") == 0)
156 | 				f[i] = "";
157 | 		if (nf <= 3)
158 | 			f[3] = NULL;
159 | 		if (nf <= 4)
160 | 			f[4] = NULL;
161 | 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
162 | 		if (opt('&', f[1]))	/* try with either type of RE */
163 | 			try(f[0], f[1], f[2], f[3], f[4],
164 | 					options('c', f[1]) &~ REG_EXTENDED);
165 | 	}
166 | 
167 | 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
168 | 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
169 | 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
170 | 							erbuf, badpat);
171 | 		status = 1;
172 | 	}
173 | 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
174 | 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
175 | 						ne != strlen(badpat)+1) {
176 | 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
177 | 						erbuf, SHORT-1, badpat);
178 | 		status = 1;
179 | 	}
180 | 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
181 | 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
182 | 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
183 | 						erbuf, bpname);
184 | 		status = 1;
185 | 	}
186 | 	re.re_endp = bpname;
187 | 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
188 | 	if (atoi(erbuf) != (int)REG_BADPAT) {
189 | 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
190 | 						erbuf, (long)REG_BADPAT);
191 | 		status = 1;
192 | 	} else if (ne != strlen(erbuf)+1) {
193 | 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
194 | 						erbuf, (long)REG_BADPAT);
195 | 		status = 1;
196 | 	}
197 | }
198 | 
199 | /*
200 |  - try - try it, and report on problems
201 |  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
202 |  */
203 | void
204 | try(f0, f1, f2, f3, f4, opts)
205 | char *f0;
206 | char *f1;
207 | char *f2;
208 | char *f3;
209 | char *f4;
210 | int opts;			/* may not match f1 */
211 | {
212 | 	regex_t re;
213 | #	define	NSUBS	10
214 | 	regmatch_t subs[NSUBS];
215 | #	define	NSHOULD	15
216 | 	char *should[NSHOULD];
217 | 	int nshould;
218 | 	char erbuf[100];
219 | 	int err;
220 | 	int len;
221 | 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
222 | 	register int i;
223 | 	char *grump;
224 | 	char f0copy[1000];
225 | 	char f2copy[1000];
226 | 
227 | 	strcpy(f0copy, f0);
228 | 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
229 | 	fixstr(f0copy);
230 | 	err = regcomp(&re, f0copy, opts);
231 | 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
232 | 		/* unexpected error or wrong error */
233 | 		len = regerror(err, &re, erbuf, sizeof(erbuf));
234 | 		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
235 | 					line, type, eprint(err), len,
236 | 					sizeof(erbuf), erbuf);
237 | 		status = 1;
238 | 	} else if (err == 0 && opt('C', f1)) {
239 | 		/* unexpected success */
240 | 		fprintf(stderr, "%d: %s should have given REG_%s\n",
241 | 						line, type, f2);
242 | 		status = 1;
243 | 		err = 1;	/* so we won't try regexec */
244 | 	}
245 | 
246 | 	if (err != 0) {
247 | 		regfree(&re);
248 | 		return;
249 | 	}
250 | 
251 | 	strcpy(f2copy, f2);
252 | 	fixstr(f2copy);
253 | 
254 | 	if (options('e', f1)&REG_STARTEND) {
255 | 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
256 | 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
257 | 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
258 | 		subs[0].rm_eo = strchr(f2, ')') - f2;
259 | 	}
260 | 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
261 | 
262 | 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
263 | 		/* unexpected error or wrong error */
264 | 		len = regerror(err, &re, erbuf, sizeof(erbuf));
265 | 		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
266 | 					line, type, eprint(err), len,
267 | 					sizeof(erbuf), erbuf);
268 | 		status = 1;
269 | 	} else if (err != 0) {
270 | 		/* nothing more to check */
271 | 	} else if (f3 == NULL) {
272 | 		/* unexpected success */
273 | 		fprintf(stderr, "%d: %s exec should have failed\n",
274 | 						line, type);
275 | 		status = 1;
276 | 		err = 1;		/* just on principle */
277 | 	} else if (opts&REG_NOSUB) {
278 | 		/* nothing more to check */
279 | 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
280 | 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
281 | 		status = 1;
282 | 		err = 1;
283 | 	}
284 | 
285 | 	if (err != 0 || f4 == NULL) {
286 | 		regfree(&re);
287 | 		return;
288 | 	}
289 | 
290 | 	for (i = 1; i < NSHOULD; i++)
291 | 		should[i] = NULL;
292 | 	nshould = split(f4, should+1, NSHOULD-1, ",");
293 | 	if (nshould == 0) {
294 | 		nshould = 1;
295 | 		should[1] = "";
296 | 	}
297 | 	for (i = 1; i < NSUBS; i++) {
298 | 		grump = check(f2, subs[i], should[i]);
299 | 		if (grump != NULL) {
300 | 			fprintf(stderr, "%d: %s $%d %s\n", line,
301 | 							type, i, grump);
302 | 			status = 1;
303 | 			err = 1;
304 | 		}
305 | 	}
306 | 
307 | 	regfree(&re);
308 | }
309 | 
310 | /*
311 |  - options - pick options out of a regression-test string
312 |  == int options(int type, char *s);
313 |  */
314 | int
315 | options(type, s)
316 | int type;			/* 'c' compile, 'e' exec */
317 | char *s;
318 | {
319 | 	register char *p;
320 | 	register int o = (type == 'c') ? copts : eopts;
321 | 	register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
322 | 
323 | 	for (p = s; *p != '\0'; p++)
324 | 		if (strchr(legal, *p) != NULL)
325 | 			switch (*p) {
326 | 			case 'b':
327 | 				o &= ~REG_EXTENDED;
328 | 				break;
329 | 			case 'i':
330 | 				o |= REG_ICASE;
331 | 				break;
332 | 			case 's':
333 | 				o |= REG_NOSUB;
334 | 				break;
335 | 			case 'n':
336 | 				o |= REG_NEWLINE;
337 | 				break;
338 | 			case 'm':
339 | 				o &= ~REG_EXTENDED;
340 | 				o |= REG_NOSPEC;
341 | 				break;
342 | 			case 'p':
343 | 				o |= REG_PEND;
344 | 				break;
345 | 			case '^':
346 | 				o |= REG_NOTBOL;
347 | 				break;
348 | 			case '$':
349 | 				o |= REG_NOTEOL;
350 | 				break;
351 | 			case '#':
352 | 				o |= REG_STARTEND;
353 | 				break;
354 | 			case 't':	/* trace */
355 | 				o |= REG_TRACE;
356 | 				break;
357 | 			case 'l':	/* force long representation */
358 | 				o |= REG_LARGE;
359 | 				break;
360 | 			case 'r':	/* force backref use */
361 | 				o |= REG_BACKR;
362 | 				break;
363 | 			}
364 | 	return(o);
365 | }
366 | 
367 | /*
368 |  - opt - is a particular option in a regression string?
369 |  == int opt(int c, char *s);
370 |  */
371 | int				/* predicate */
372 | opt(c, s)
373 | int c;
374 | char *s;
375 | {
376 | 	return(strchr(s, c) != NULL);
377 | }
378 | 
379 | /*
380 |  - fixstr - transform magic characters in strings
381 |  == void fixstr(register char *p);
382 |  */
383 | void
384 | fixstr(p)
385 | register char *p;
386 | {
387 | 	if (p == NULL)
388 | 		return;
389 | 
390 | 	for (; *p != '\0'; p++)
391 | 		if (*p == 'N')
392 | 			*p = '\n';
393 | 		else if (*p == 'T')
394 | 			*p = '\t';
395 | 		else if (*p == 'S')
396 | 			*p = ' ';
397 | 		else if (*p == 'Z')
398 | 			*p = '\0';
399 | }
400 | 
401 | /*
402 |  - check - check a substring match
403 |  == char *check(char *str, regmatch_t sub, char *should);
404 |  */
405 | char *				/* NULL or complaint */
406 | check(str, sub, should)
407 | char *str;
408 | regmatch_t sub;
409 | char *should;
410 | {
411 | 	register int len;
412 | 	register int shlen;
413 | 	register char *p;
414 | 	static char grump[500];
415 | 	register char *at = NULL;
416 | 
417 | 	if (should != NULL && strcmp(should, "-") == 0)
418 | 		should = NULL;
419 | 	if (should != NULL && should[0] == '@') {
420 | 		at = should + 1;
421 | 		should = "";
422 | 	}
423 | 
424 | 	/* check rm_so and rm_eo for consistency */
425 | 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
426 | 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
427 | 				(sub.rm_so != -1 && sub.rm_so < 0) ||
428 | 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
429 | 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
430 | 							(long)sub.rm_eo);
431 | 		return(grump);
432 | 	}
433 | 
434 | 	/* check for no match */
435 | 	if (sub.rm_so == -1 && should == NULL)
436 | 		return(NULL);
437 | 	if (sub.rm_so == -1)
438 | 		return("did not match");
439 | 
440 | 	/* check for in range */
441 | 	if (sub.rm_eo > strlen(str)) {
442 | 		sprintf(grump, "start %ld end %ld, past end of string",
443 | 					(long)sub.rm_so, (long)sub.rm_eo);
444 | 		return(grump);
445 | 	}
446 | 
447 | 	len = (int)(sub.rm_eo - sub.rm_so);
448 | 	shlen = (int)strlen(should);
449 | 	p = str + sub.rm_so;
450 | 
451 | 	/* check for not supposed to match */
452 | 	if (should == NULL) {
453 | 		sprintf(grump, "matched `%.*s'", len, p);
454 | 		return(grump);
455 | 	}
456 | 
457 | 	/* check for wrong match */
458 | 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
459 | 		sprintf(grump, "matched `%.*s' instead", len, p);
460 | 		return(grump);
461 | 	}
462 | 	if (shlen > 0)
463 | 		return(NULL);
464 | 
465 | 	/* check null match in right place */
466 | 	if (at == NULL)
467 | 		return(NULL);
468 | 	shlen = strlen(at);
469 | 	if (shlen == 0)
470 | 		shlen = 1;	/* force check for end-of-string */
471 | 	if (strncmp(p, at, shlen) != 0) {
472 | 		sprintf(grump, "matched null at `%.20s'", p);
473 | 		return(grump);
474 | 	}
475 | 	return(NULL);
476 | }
477 | 
478 | /*
479 |  - eprint - convert error number to name
480 |  == static char *eprint(int err);
481 |  */
482 | static char *
483 | eprint(err)
484 | int err;
485 | {
486 | 	static char epbuf[100];
487 | 	size_t len;
488 | 
489 | 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
490 | 	assert(len <= sizeof(epbuf));
491 | 	return(epbuf);
492 | }
493 | 
494 | /*
495 |  - efind - convert error name to number
496 |  == static int efind(char *name);
497 |  */
498 | static int
499 | efind(name)
500 | char *name;
501 | {
502 | 	static char efbuf[100];
503 | 	regex_t re;
504 | 
505 | 	sprintf(efbuf, "REG_%s", name);
506 | 	assert(strlen(efbuf) < sizeof(efbuf));
507 | 	re.re_endp = efbuf;
508 | 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
509 | 	return(atoi(efbuf));
510 | }
511 | 


--------------------------------------------------------------------------------
/src/re/main.ih:
--------------------------------------------------------------------------------
 1 | /* ========= begin header generated by ./mkh ========= */
 2 | #ifdef __cplusplus
 3 | extern "C" {
 4 | #endif
 5 | 
 6 | /* === main.c === */
 7 | void regress(FILE *in);
 8 | void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
 9 | int options(int type, char *s);
10 | int opt(int c, char *s);
11 | void fixstr(register char *p);
12 | char *check(char *str, regmatch_t sub, char *should);
13 | static char *eprint(int err);
14 | static int efind(char *name);
15 | 
16 | #ifdef __cplusplus
17 | }
18 | #endif
19 | /* ========= end header generated by ./mkh ========= */
20 | 


--------------------------------------------------------------------------------
/src/re/mkh:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | # mkh - pull headers out of C source
 3 | PATH=/bin:/usr/bin ; export PATH
 4 | 
 5 | # egrep pattern to pick out marked lines
 6 | egrep='^ =([ 	]|$)'
 7 | 
 8 | # Sed program to process marked lines into lines for the header file.
 9 | # The markers have already been removed.  Two things are done here:  removal
10 | # of backslashed newlines, and some fudging of comments.  The first is done
11 | # because -o needs to have prototypes on one line to strip them down.
12 | # Getting comments into the output is tricky; we turn C++-style // comments
13 | # into /* */ comments, after altering any existing */'s to avoid trouble.
14 | peel='	/\\$/N
15 | 	/\\\n[ 	]*/s///g
16 | 	/\/\//s;\*/;* /;g
17 | 	/\/\//s;//\(.*\);/*\1 */;'
18 | 
19 | for a
20 | do
21 | 	case "$a" in
22 | 	-o)	# old (pre-function-prototype) compiler
23 | 		# add code to comment out argument lists
24 | 		peel="$peel
25 | 			"'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1(/*\2*/);'
26 | 		shift
27 | 		;;
28 | 	-b)	# funny Berkeley __P macro
29 | 		peel="$peel
30 | 			"'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1 __P((\2));'
31 | 		shift
32 | 		;;
33 | 	-s)	# compiler doesn't like `static foo();'
34 | 		# add code to get rid of the `static'
35 | 		peel="$peel
36 | 			"'/^static[ 	][^\/]*[a-zA-Z0-9_)](.*)/s;static.;;'
37 | 		shift
38 | 		;;
39 | 	-p)	# private declarations
40 | 		egrep='^ ==([ 	]|$)'
41 | 		shift
42 | 		;;
43 | 	-i)	# wrap in #ifndef, argument is name
44 | 		ifndef="$2"
45 | 		shift ; shift
46 | 		;;
47 | 	*)	break
48 | 		;;
49 | 	esac
50 | done
51 | 
52 | if test " $ifndef" != " "
53 | then
54 | 	echo "#ifndef $ifndef"
55 | 	echo "#define	$ifndef	/* never again */"
56 | fi
57 | echo "/* ========= begin header generated by $0 ========= */"
58 | echo '#ifdef __cplusplus'
59 | echo 'extern "C" {'
60 | echo '#endif'
61 | for f
62 | do
63 | 	echo
64 | 	echo "/* === $f === */"
65 | 	egrep "$egrep" $f | sed 's/^ ==*[ 	]//;s/^ ==*$//' | sed "$peel"
66 | 	echo
67 | done
68 | echo '#ifdef __cplusplus'
69 | echo '}'
70 | echo '#endif'
71 | echo "/* ========= end header generated by $0 ========= */"
72 | if test " $ifndef" != " "
73 | then
74 | 	echo "#endif"
75 | fi
76 | exit 0
77 | 


--------------------------------------------------------------------------------
/src/re/re.c:
--------------------------------------------------------------------------------
 1 | /* re.c */
 2 | /* Henry Spencer's implementation of Regular Expressions,
 3 |    used for TinyScheme */
 4 | /* Refurbished by Stephen Gildea */
 5 | /* Updated by armornick */
 6 | 
 7 | /* this defintion is to export the init function */
 8 | #define SCHEME_MODULE
 9 | #include "regex.h"
10 | #include <scheme.h>
11 | 
12 | pointer foreign_re_match(scheme *sc, pointer args) {
13 |   pointer retval=scheme_false(sc);
14 |   int retcode;
15 |   regex_t rt;
16 |   pointer first_arg, second_arg;
17 |   pointer third_arg=scheme_nil(sc);
18 |   char *string;
19 |   char *pattern;
20 |   int num=0;
21 | 
22 |   if(!((args != scheme_nil(sc)) && scheme_is_string((first_arg = scheme_pair_car(args)))
23 |        && (args=scheme_pair_cdr(args))
24 |        && scheme_is_pair(args) && scheme_is_string((second_arg = scheme_pair_car(args))))) {
25 |     return scheme_false(sc);
26 |   }
27 |   pattern = scheme_string_value(first_arg);
28 |   string = scheme_string_value(second_arg);
29 | 
30 |   args=scheme_pair_cdr(args);
31 |   if(args!=scheme_nil(sc)) {
32 |     if(!(scheme_is_pair(args) && scheme_is_vector((third_arg = scheme_pair_car(args))))) {
33 |       return scheme_false(sc);
34 |     } else {
35 |       num = scheme_ivalue(third_arg);
36 |     }
37 |   }
38 | 
39 | 
40 |   if(regcomp(&rt,pattern,REG_EXTENDED)!=0) {
41 |     return scheme_false(sc);
42 |   }
43 | 
44 |   if(num==0) {
45 |     retcode=regexec(&rt,string,0,0,0);
46 |   } else {
47 |     regmatch_t *pmatch=malloc((num+1)*sizeof(regmatch_t));
48 |     if(pmatch!=0) {
49 |       retcode=regexec(&rt,string,num+1,pmatch,0);
50 |       if(retcode==0) {
51 |     int i;
52 |     for(i=0; i<num; i++) {
53 |       scheme_set_vector_elem(third_arg, i,
54 |               cons(sc, scheme_integer(sc, pmatch[i].rm_so),
55 |                           scheme_integer(sc, pmatch[i].rm_eo)));
56 | 
57 |     }
58 |       }
59 |       free(pmatch);
60 |     } else {
61 |       scheme_memory_error(sc);
62 |       retcode=-1;
63 |     }
64 |   }
65 | 
66 |   if(retcode==0) {
67 |     retval=scheme_true(sc);
68 |   }
69 | 
70 |   regfree(&rt);
71 | 
72 |   return(retval);
73 | }
74 | 
75 | static char* utilities=";; return the substring of STRING matched in MATCH-VECTOR, \n"
76 | ";; the Nth subexpression match (default 0).\n"
77 | "(define (re-match-nth string match-vector . n)\n"
78 | "  (let ((n (if (pair? n) (car n) 0)))\n"
79 | "    (substring string (car (vector-ref match-vector n))\n"
80 | "                    (cdr (vector-ref match-vector n)))))\n"
81 | "(define (re-before-nth string match-vector . n)\n"
82 | "  (let ((n (if (pair? n) (car n) 0)))\n"
83 | "    (substring string 0 (car (vector-ref match-vector n)))))\n"
84 | "(define (re-after-nth string match-vector . n)\n"
85 | "  (let ((n (if (pair? n) (car n) 0)))\n"
86 | "    (substring string (cdr (vector-ref match-vector n))\n"
87 | "             (string-length string))))\n";
88 | 
89 | SCHEME_EXPORT void init_re(scheme *sc) {
90 |   scheme_define(sc, scheme_global_env(sc), scheme_symbol(sc, "re-match"), scheme_foreign_func(sc, foreign_re_match));
91 |   // printf("RE: adding utility functions\n");
92 |   // scheme_load_string(sc, utilities);
93 | }
94 | 


--------------------------------------------------------------------------------
/src/re/re.makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for TinyScheme's extension library "re" (regular expressions) 
 2 | # Time-stamp: <2002-06-24 14:13:42 gildea> 
 3 |  
 4 | SCHEME_H_DIR=../tinyscheme-1.30 
 5 |  
 6 | # Windows/2000 
 7 | # CC = cl -nologo 
 8 | # DEBUG = -W3 -Z7 -MD 
 9 | # SYS_LIBS= 
10 | # Osuf=obj 
11 | # SOsuf=dll 
12 | # LD = link -nologo 
13 | # LDFLAGS = -debug -map -dll -incremental:no 
14 | # OUT = -out:$@ 
15 |  
16 | # Unix, generally 
17 | CC = gcc -fpic 
18 | DEBUG=-g -Wall -Wno-char-subscripts -O 
19 | Osuf=o 
20 | SOsuf=so 
21 | OUT = -o $@ 
22 |  
23 | # Linux 
24 | LD = gcc 
25 | LDFLAGS = -shared 
26 | SYS_LIBS= 
27 |  
28 | # Solaris 
29 | # LD = /usr/ccs/bin/ld 
30 | ## -Bsymbolic nec. because we have fns w same name as in libc. 
31 | # LDFLAGS = -G -Bsymbolic -z text 
32 | # SYS_LIBS= -lc 
33 |  
34 | SRCS = re.c debug.c regcomp.c regerror.c regexec.c regfree.c split.c 
35 | OBJS = $(SRCS:.c=.$(Osuf)) 
36 |  
37 | all: re.$(SOsuf) 
38 |  
39 | %.$(Osuf): %.c 
40 | 	$(CC) -c $(DEBUG) -DUSE_DL -I. -I$(SCHEME_H_DIR) $+ 
41 |  
42 | re.$(SOsuf): $(OBJS) 
43 | 	$(LD) $(LDFLAGS) $(OUT) $+ $(SYS_LIBS) 
44 |  
45 | clean: 
46 | 	-rm -f $(OBJS) re.$(SOsuf) 
47 | 


--------------------------------------------------------------------------------
/src/re/re.scm:
--------------------------------------------------------------------------------
 1 | ;; return the substring of STRING matched in MATCH-VECTOR, 
 2 | ;; the Nth subexpression match (default 0). 
 3 | (define (re-match-nth string match-vector . n) 
 4 |   (let ((n (if (pair? n) (car n) 0))) 
 5 |     (substring string (car (vector-ref match-vector n)) 
 6 |                     (cdr (vector-ref match-vector n))))) 
 7 | 
 8 | (define (re-before-nth string match-vector . n) 
 9 |   (let ((n (if (pair? n) (car n) 0))) 
10 |     (substring string 0 (car (vector-ref match-vector n))))) 
11 | 
12 | (define (re-after-nth string match-vector . n) 
13 |   (let ((n (if (pair? n) (car n) 0))) 
14 |     (substring string (cdr (vector-ref match-vector n)) 
15 |              (string-length string)))) 


--------------------------------------------------------------------------------
/src/re/regcomp.ih:
--------------------------------------------------------------------------------
 1 | /* ========= begin header generated by ./mkh ========= */
 2 | #ifdef __cplusplus
 3 | extern "C" {
 4 | #endif
 5 | 
 6 | /* === regcomp.c === */
 7 | static void p_ere(register struct parse *p, int stop);
 8 | static void p_ere_exp(register struct parse *p);
 9 | static void p_str(register struct parse *p);
10 | static void p_bre(register struct parse *p, register int end1, register int end2);
11 | static int p_simp_re(register struct parse *p, int starordinary);
12 | static int p_count(register struct parse *p);
13 | static void p_bracket(register struct parse *p);
14 | static void p_b_term(register struct parse *p, register cset *cs);
15 | static void p_b_cclass(register struct parse *p, register cset *cs);
16 | static void p_b_eclass(register struct parse *p, register cset *cs);
17 | static char p_b_symbol(register struct parse *p);
18 | static char p_b_coll_elem(register struct parse *p, int endc);
19 | static char othercase(int ch);
20 | static void bothcases(register struct parse *p, int ch);
21 | static void ordinary(register struct parse *p, register int ch);
22 | static void nonnewline(register struct parse *p);
23 | static void repeat(register struct parse *p, sopno start, int from, int to);
24 | static int seterr(register struct parse *p, int e);
25 | static cset *allocset(register struct parse *p);
26 | static void freeset(register struct parse *p, register cset *cs);
27 | static int freezeset(register struct parse *p, register cset *cs);
28 | static int firstch(register struct parse *p, register cset *cs);
29 | static int nch(register struct parse *p, register cset *cs);
30 | static void mcadd(register struct parse *p, register cset *cs, register char *cp);
31 | static void mcinvert(register struct parse *p, register cset *cs);
32 | static void mccase(register struct parse *p, register cset *cs);
33 | static int isinsets(register struct re_guts *g, int c);
34 | static int samesets(register struct re_guts *g, int c1, int c2);
35 | static void categorize(struct parse *p, register struct re_guts *g);
36 | static sopno dupl(register struct parse *p, sopno start, sopno finish);
37 | static void doemit(register struct parse *p, sop op, size_t opnd);
38 | static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
39 | static void dofwd(register struct parse *p, sopno pos, sop value);
40 | static void enlarge(register struct parse *p, sopno size);
41 | static void stripsnug(register struct parse *p, register struct re_guts *g);
42 | static void findmust(register struct parse *p, register struct re_guts *g);
43 | static sopno pluscount(register struct parse *p, register struct re_guts *g);
44 | 
45 | #ifdef __cplusplus
46 | }
47 | #endif
48 | /* ========= end header generated by ./mkh ========= */
49 | 


--------------------------------------------------------------------------------
/src/re/regerror.c:
--------------------------------------------------------------------------------
  1 | #include <regex.h>
  2 | 
  3 | #include "utils.h"
  4 | #include "regerror.ih"
  5 | 
  6 | /*
  7 |  = #define     REG_NOMATCH     1
  8 |  = #define     REG_BADPAT      2
  9 |  = #define     REG_ECOLLATE    3
 10 |  = #define     REG_ECTYPE      4
 11 |  = #define     REG_EESCAPE     5
 12 |  = #define     REG_ESUBREG     6
 13 |  = #define     REG_EBRACK      7
 14 |  = #define     REG_EPAREN      8
 15 |  = #define     REG_EBRACE      9
 16 |  = #define     REG_BADBR 10
 17 |  = #define     REG_ERANGE     11
 18 |  = #define     REG_ESPACE     12
 19 |  = #define     REG_BADRPT     13
 20 |  = #define     REG_EMPTY 14
 21 |  = #define     REG_ASSERT     15
 22 |  = #define     REG_INVARG     16
 23 |  = #define     REG_ATOI  255  // convert name to number (!)
 24 |  = #define     REG_ITOA  0400 // convert number to name (!)
 25 |  */
 26 | static struct rerr {
 27 |      int code;
 28 |      char *name;
 29 |      char *explain;
 30 | } rerrs[] = {
 31 |      { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" },
 32 |      { REG_BADPAT,  "REG_BADPAT",  "invalid regular expression" },
 33 |      { REG_ECOLLATE,     "REG_ECOLLATE",     "invalid collating element" },
 34 |      { REG_ECTYPE,  "REG_ECTYPE",  "invalid character class" },
 35 |      { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" },
 36 |      { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
 37 |      { REG_EBRACK,  "REG_EBRACK",  "brackets ([ ]) not balanced" },
 38 |      { REG_EPAREN,  "REG_EPAREN",  "parentheses not balanced" },
 39 |      { REG_EBRACE,  "REG_EBRACE",  "braces not balanced" },
 40 |      { REG_BADBR,   "REG_BADBR",   "invalid repetition count(s)" },
 41 |      { REG_ERANGE,  "REG_ERANGE",  "invalid character range" },
 42 |      { REG_ESPACE,  "REG_ESPACE",  "out of memory" },
 43 |      { REG_BADRPT,  "REG_BADRPT",  "repetition-operator operand invalid" },
 44 |      { REG_EMPTY,   "REG_EMPTY",   "empty (sub)expression" },
 45 |      { REG_ASSERT,  "REG_ASSERT",  "\"can't happen\" -- you found a bug" },
 46 |      { REG_INVARG,  "REG_INVARG",  "invalid argument to regex routine" },
 47 |      { 0,      "",       "*** unknown regexp error code ***" }
 48 | };
 49 | 
 50 | /*
 51 |  - regerror - the interface to error numbers
 52 |  = API_EXPORT(size_t) regerror(int, const regex_t *, char *, size_t);
 53 |  */
 54 | /* ARGSUSED */
 55 | API_EXPORT(size_t)
 56 | regerror(errcode, preg, errbuf, errbuf_size)
 57 | int errcode;
 58 | const regex_t *preg;
 59 | char *errbuf;
 60 | size_t errbuf_size;
 61 | {
 62 |      register struct rerr *r;
 63 |      register size_t len;
 64 |      register int target = errcode &~ REG_ITOA;
 65 |      register char *s;
 66 |      char convbuf[50];
 67 | 
 68 |      if (errcode == REG_ATOI)
 69 |           s = regatoi(preg, convbuf);
 70 |      else {
 71 |           for (r = rerrs; r->code != 0; r++)
 72 |                if (r->code == target)
 73 |                     break;
 74 |      
 75 |           if (errcode&REG_ITOA) {
 76 |                if (r->code != 0)
 77 |                     (void) strcpy(convbuf, r->name);
 78 |                else
 79 |                     sprintf(convbuf, "REG_0x%x", target);
 80 |                assert(strlen(convbuf) < sizeof(convbuf));
 81 |                s = convbuf;
 82 |           } else
 83 |                s = r->explain;
 84 |      }
 85 | 
 86 |      len = strlen(s) + 1;
 87 |      if (errbuf_size > 0) {
 88 |           if (errbuf_size > len)
 89 |                (void) strcpy(errbuf, s);
 90 |           else {
 91 |                (void) strncpy(errbuf, s, errbuf_size-1);
 92 |                errbuf[errbuf_size-1] = '\0';
 93 |           }
 94 |      }
 95 | 
 96 |      return(len);
 97 | }
 98 | 
 99 | /*
100 |  - regatoi - internal routine to implement REG_ATOI
101 |  == static char *regatoi(const regex_t *preg, char *localbuf);
102 |  */
103 | static char *
104 | regatoi(preg, localbuf)
105 | const regex_t *preg;
106 | char *localbuf;
107 | {
108 |      register struct rerr *r;
109 | 
110 |      for (r = rerrs; r->code != 0; r++)
111 |           if (strcmp(r->name, preg->re_endp) == 0)
112 |                break;
113 |      if (r->code == 0)
114 |           return("0");
115 | 
116 |      sprintf(localbuf, "%d", r->code);
117 |      return(localbuf);
118 | }
119 | 


--------------------------------------------------------------------------------
/src/re/regerror.ih:
--------------------------------------------------------------------------------
 1 | /* ========= begin header generated by ./mkh ========= */
 2 | #ifdef __cplusplus
 3 | extern "C" {
 4 | #endif
 5 | 
 6 | /* === regerror.c === */
 7 | static char *regatoi(const regex_t *preg, char *localbuf);
 8 | 
 9 | #ifdef __cplusplus
10 | }
11 | #endif
12 | /* ========= end header generated by ./mkh ========= */
13 | 


--------------------------------------------------------------------------------
/src/re/regex.001:
--------------------------------------------------------------------------------
  1 | # Microsoft Developer Studio Project File - Name="regex" - Package Owner=<4>
  2 | # Microsoft Developer Studio Generated Build File, Format Version 5.00
  3 | # ** DO NOT EDIT **
  4 | 
  5 | # TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102
  6 | 
  7 | CFG=regex - Win32 Debug
  8 | !MESSAGE This is not a valid makefile. To build this project using NMAKE,
  9 | !MESSAGE use the Export Makefile command and run
 10 | !MESSAGE 
 11 | !MESSAGE NMAKE /f "regex.mak".
 12 | !MESSAGE 
 13 | !MESSAGE You can specify a configuration when running NMAKE
 14 | !MESSAGE by defining the macro CFG on the command line. For example:
 15 | !MESSAGE 
 16 | !MESSAGE NMAKE /f "regex.mak" CFG="regex - Win32 Debug"
 17 | !MESSAGE 
 18 | !MESSAGE Possible choices for configuration are:
 19 | !MESSAGE 
 20 | !MESSAGE "regex - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library")
 21 | !MESSAGE "regex - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library")
 22 | !MESSAGE 
 23 | 
 24 | # Begin Project
 25 | # PROP Scc_ProjName ""
 26 | # PROP Scc_LocalPath ""
 27 | CPP=cl.exe
 28 | MTL=midl.exe
 29 | RSC=rc.exe
 30 | 
 31 | !IF  "$(CFG)" == "regex - Win32 Release"
 32 | 
 33 | # PROP BASE Use_MFC 0
 34 | # PROP BASE Use_Debug_Libraries 0
 35 | # PROP BASE Output_Dir "Release"
 36 | # PROP BASE Intermediate_Dir "Release"
 37 | # PROP BASE Target_Dir ""
 38 | # PROP Use_MFC 0
 39 | # PROP Use_Debug_Libraries 0
 40 | # PROP Output_Dir "Release"
 41 | # PROP Intermediate_Dir "Release"
 42 | # PROP Ignore_Export_Lib 0
 43 | # PROP Target_Dir ""
 44 | # ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c
 45 | # ADD CPP /nologo /MT /W3 /GX /O2 /I "." /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c
 46 | # ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32
 47 | # ADD MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32
 48 | # ADD BASE RSC /l 0x409 /d "NDEBUG"
 49 | # ADD RSC /l 0x409 /d "NDEBUG"
 50 | BSC32=bscmake.exe
 51 | # ADD BASE BSC32 /nologo
 52 | # ADD BSC32 /nologo
 53 | LINK32=link.exe
 54 | # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386
 55 | # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386
 56 | 
 57 | !ELSEIF  "$(CFG)" == "regex - Win32 Debug"
 58 | 
 59 | # PROP BASE Use_MFC 0
 60 | # PROP BASE Use_Debug_Libraries 1
 61 | # PROP BASE Output_Dir "Debug"
 62 | # PROP BASE Intermediate_Dir "Debug"
 63 | # PROP BASE Target_Dir ""
 64 | # PROP Use_MFC 0
 65 | # PROP Use_Debug_Libraries 1
 66 | # PROP Output_Dir "Debug"
 67 | # PROP Intermediate_Dir "Debug"
 68 | # PROP Ignore_Export_Lib 0
 69 | # PROP Target_Dir ""
 70 | # ADD BASE CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c
 71 | # ADD CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /I "." /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c
 72 | # ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32
 73 | # ADD MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32
 74 | # ADD BASE RSC /l 0x409 /d "_DEBUG"
 75 | # ADD RSC /l 0x409 /d "_DEBUG"
 76 | BSC32=bscmake.exe
 77 | # ADD BASE BSC32 /nologo
 78 | # ADD BSC32 /nologo
 79 | LINK32=link.exe
 80 | # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /pdbtype:sept
 81 | # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /pdbtype:sept
 82 | 
 83 | !ENDIF 
 84 | 
 85 | # Begin Target
 86 | 
 87 | # Name "regex - Win32 Release"
 88 | # Name "regex - Win32 Debug"
 89 | # Begin Source File
 90 | 
 91 | SOURCE=.\regcomp.c
 92 | # End Source File
 93 | # Begin Source File
 94 | 
 95 | SOURCE=.\regerror.c
 96 | # End Source File
 97 | # Begin Source File
 98 | 
 99 | SOURCE=.\regexec.c
100 | # End Source File
101 | # Begin Source File
102 | 
103 | SOURCE=.\regfree.c
104 | # End Source File
105 | # End Target
106 | # End Project
107 | 


--------------------------------------------------------------------------------
/src/re/regex.7:
--------------------------------------------------------------------------------
  1 | .TH REGEX 7 "7 Feb 1994"
  2 | .BY "Henry Spencer"
  3 | .SH NAME
  4 | regex \- POSIX 1003.2 regular expressions
  5 | .SH DESCRIPTION
  6 | Regular expressions (``RE''s),
  7 | as defined in POSIX 1003.2, come in two forms:
  8 | modern REs (roughly those of
  9 | .IR egrep ;
 10 | 1003.2 calls these ``extended'' REs)
 11 | and obsolete REs (roughly those of
 12 | .IR ed ;
 13 | 1003.2 ``basic'' REs).
 14 | Obsolete REs mostly exist for backward compatibility in some old programs;
 15 | they will be discussed at the end.
 16 | 1003.2 leaves some aspects of RE syntax and semantics open;
 17 | `\(dg' marks decisions on these aspects that
 18 | may not be fully portable to other 1003.2 implementations.
 19 | .PP
 20 | A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR,
 21 | separated by `|'.
 22 | It matches anything that matches one of the branches.
 23 | .PP
 24 | A branch is one\(dg or more \fIpieces\fR, concatenated.
 25 | It matches a match for the first, followed by a match for the second, etc.
 26 | .PP
 27 | A piece is an \fIatom\fR possibly followed
 28 | by a single\(dg `*', `+', `?', or \fIbound\fR.
 29 | An atom followed by `*' matches a sequence of 0 or more matches of the atom.
 30 | An atom followed by `+' matches a sequence of 1 or more matches of the atom.
 31 | An atom followed by `?' matches a sequence of 0 or 1 matches of the atom.
 32 | .PP
 33 | A \fIbound\fR is `{' followed by an unsigned decimal integer,
 34 | possibly followed by `,'
 35 | possibly followed by another unsigned decimal integer,
 36 | always followed by `}'.
 37 | The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive,
 38 | and if there are two of them, the first may not exceed the second.
 39 | An atom followed by a bound containing one integer \fIi\fR
 40 | and no comma matches
 41 | a sequence of exactly \fIi\fR matches of the atom.
 42 | An atom followed by a bound
 43 | containing one integer \fIi\fR and a comma matches
 44 | a sequence of \fIi\fR or more matches of the atom.
 45 | An atom followed by a bound
 46 | containing two integers \fIi\fR and \fIj\fR matches
 47 | a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom.
 48 | .PP
 49 | An atom is a regular expression enclosed in `()' (matching a match for the
 50 | regular expression),
 51 | an empty set of `()' (matching the null string)\(dg,
 52 | a \fIbracket expression\fR (see below), `.'
 53 | (matching any single character), `^' (matching the null string at the
 54 | beginning of a line), `$' (matching the null string at the
 55 | end of a line), a `\e' followed by one of the characters
 56 | `^.[$()|*+?{\e'
 57 | (matching that character taken as an ordinary character),
 58 | a `\e' followed by any other character\(dg
 59 | (matching that character taken as an ordinary character,
 60 | as if the `\e' had not been present\(dg),
 61 | or a single character with no other significance (matching that character).
 62 | A `{' followed by a character other than a digit is an ordinary
 63 | character, not the beginning of a bound\(dg.
 64 | It is illegal to end an RE with `\e'.
 65 | .PP
 66 | A \fIbracket expression\fR is a list of characters enclosed in `[]'.
 67 | It normally matches any single character from the list (but see below).
 68 | If the list begins with `^',
 69 | it matches any single character
 70 | (but see below) \fInot\fR from the rest of the list.
 71 | If two characters in the list are separated by `\-', this is shorthand
 72 | for the full \fIrange\fR of characters between those two (inclusive) in the
 73 | collating sequence,
 74 | e.g. `[0-9]' in ASCII matches any decimal digit.
 75 | It is illegal\(dg for two ranges to share an
 76 | endpoint, e.g. `a-c-e'.
 77 | Ranges are very collating-sequence-dependent,
 78 | and portable programs should avoid relying on them.
 79 | .PP
 80 | To include a literal `]' in the list, make it the first character
 81 | (following a possible `^').
 82 | To include a literal `\-', make it the first or last character,
 83 | or the second endpoint of a range.
 84 | To use a literal `\-' as the first endpoint of a range,
 85 | enclose it in `[.' and `.]' to make it a collating element (see below).
 86 | With the exception of these and some combinations using `[' (see next
 87 | paragraphs), all other special characters, including `\e', lose their
 88 | special significance within a bracket expression.
 89 | .PP
 90 | Within a bracket expression, a collating element (a character,
 91 | a multi-character sequence that collates as if it were a single character,
 92 | or a collating-sequence name for either)
 93 | enclosed in `[.' and `.]' stands for the
 94 | sequence of characters of that collating element.
 95 | The sequence is a single element of the bracket expression's list.
 96 | A bracket expression containing a multi-character collating element 
 97 | can thus match more than one character,
 98 | e.g. if the collating sequence includes a `ch' collating element,
 99 | then the RE `[[.ch.]]*c' matches the first five characters
100 | of `chchcc'.
101 | .PP
102 | Within a bracket expression, a collating element enclosed in `[=' and
103 | `=]' is an equivalence class, standing for the sequences of characters
104 | of all collating elements equivalent to that one, including itself.
105 | (If there are no other equivalent collating elements,
106 | the treatment is as if the enclosing delimiters were `[.' and `.]'.)
107 | For example, if o and \o'o^' are the members of an equivalence class,
108 | then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous.
109 | An equivalence class may not\(dg be an endpoint
110 | of a range.
111 | .PP
112 | Within a bracket expression, the name of a \fIcharacter class\fR enclosed
113 | in `[:' and `:]' stands for the list of all characters belonging to that
114 | class.
115 | Standard character class names are:
116 | .PP
117 | .RS
118 | .nf
119 | .ta 3c 6c 9c
120 | alnum	digit	punct
121 | alpha	graph	space
122 | blank	lower	upper
123 | cntrl	print	xdigit
124 | .fi
125 | .RE
126 | .PP
127 | These stand for the character classes defined in
128 | .IR ctype (3).
129 | A locale may provide others.
130 | A character class may not be used as an endpoint of a range.
131 | .PP
132 | There are two special cases\(dg of bracket expressions:
133 | the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at
134 | the beginning and end of a word respectively.
135 | A word is defined as a sequence of
136 | word characters
137 | which is neither preceded nor followed by
138 | word characters.
139 | A word character is an
140 | .I alnum
141 | character (as defined by
142 | .IR ctype (3))
143 | or an underscore.
144 | This is an extension,
145 | compatible with but not specified by POSIX 1003.2,
146 | and should be used with
147 | caution in software intended to be portable to other systems.
148 | .PP
149 | In the event that an RE could match more than one substring of a given
150 | string,
151 | the RE matches the one starting earliest in the string.
152 | If the RE could match more than one substring starting at that point,
153 | it matches the longest.
154 | Subexpressions also match the longest possible substrings, subject to
155 | the constraint that the whole match be as long as possible,
156 | with subexpressions starting earlier in the RE taking priority over
157 | ones starting later.
158 | Note that higher-level subexpressions thus take priority over
159 | their lower-level component subexpressions.
160 | .PP
161 | Match lengths are measured in characters, not collating elements.
162 | A null string is considered longer than no match at all.
163 | For example,
164 | `bb*' matches the three middle characters of `abbbc',
165 | `(wee|week)(knights|nights)' matches all ten characters of `weeknights',
166 | when `(.*).*' is matched against `abc' the parenthesized subexpression
167 | matches all three characters, and
168 | when `(a*)*' is matched against `bc' both the whole RE and the parenthesized
169 | subexpression match the null string.
170 | .PP
171 | If case-independent matching is specified,
172 | the effect is much as if all case distinctions had vanished from the
173 | alphabet.
174 | When an alphabetic that exists in multiple cases appears as an
175 | ordinary character outside a bracket expression, it is effectively
176 | transformed into a bracket expression containing both cases,
177 | e.g. `x' becomes `[xX]'.
178 | When it appears inside a bracket expression, all case counterparts
179 | of it are added to the bracket expression, so that (e.g.) `[x]'
180 | becomes `[xX]' and `[^x]' becomes `[^xX]'.
181 | .PP
182 | No particular limit is imposed on the length of REs\(dg.
183 | Programs intended to be portable should not employ REs longer
184 | than 256 bytes,
185 | as an implementation can refuse to accept such REs and remain
186 | POSIX-compliant.
187 | .PP
188 | Obsolete (``basic'') regular expressions differ in several respects.
189 | `|', `+', and `?' are ordinary characters and there is no equivalent
190 | for their functionality.
191 | The delimiters for bounds are `\e{' and `\e}',
192 | with `{' and `}' by themselves ordinary characters.
193 | The parentheses for nested subexpressions are `\e(' and `\e)',
194 | with `(' and `)' by themselves ordinary characters.
195 | `^' is an ordinary character except at the beginning of the
196 | RE or\(dg the beginning of a parenthesized subexpression,
197 | `$' is an ordinary character except at the end of the
198 | RE or\(dg the end of a parenthesized subexpression,
199 | and `*' is an ordinary character if it appears at the beginning of the
200 | RE or the beginning of a parenthesized subexpression
201 | (after a possible leading `^').
202 | Finally, there is one new type of atom, a \fIback reference\fR:
203 | `\e' followed by a non-zero decimal digit \fId\fR
204 | matches the same sequence of characters
205 | matched by the \fId\fRth parenthesized subexpression
206 | (numbering subexpressions by the positions of their opening parentheses,
207 | left to right),
208 | so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'.
209 | .SH SEE ALSO
210 | regex(3)
211 | .PP
212 | POSIX 1003.2, section 2.8 (Regular Expression Notation).
213 | .SH BUGS
214 | Having two kinds of REs is a botch.
215 | .PP
216 | The current 1003.2 spec says that `)' is an ordinary character in
217 | the absence of an unmatched `(';
218 | this was an unintentional result of a wording error,
219 | and change is likely.
220 | Avoid relying on it.
221 | .PP
222 | Back references are a dreadful botch,
223 | posing major problems for efficient implementations.
224 | They are also somewhat vaguely defined
225 | (does
226 | `a\e(\e(b\e)*\e2\e)*d' match `abbbd'?).
227 | Avoid using them.
228 | .PP
229 | 1003.2's specification of case-independent matching is vague.
230 | The ``one case implies all cases'' definition given above
231 | is current consensus among implementors as to the right interpretation.
232 | .PP
233 | The syntax for word boundaries is incredibly ugly.
234 | 


--------------------------------------------------------------------------------
/src/re/regex.h:
--------------------------------------------------------------------------------
 1 | #ifndef _REGEX_H_
 2 | #define   _REGEX_H_ /* never again */
 3 | 
 4 | /* Added by dsouflis - include files should be self contained */
 5 | #include <stdio.h>
 6 | #include <string.h>
 7 | #include <ctype.h>
 8 | #include <limits.h>
 9 | #include <stdlib.h>
10 | #include <sys/types.h>
11 | 
12 | /* ========= begin header generated by ././mkh ========= */
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | /* === regex2.h === */
18 | #ifdef WIN32
19 | #define API_EXPORT(type)    __declspec(dllexport) type __stdcall
20 | #else
21 | #define API_EXPORT(type)    type
22 | #endif
23 | 
24 | typedef off_t regoff_t;
25 | typedef struct {
26 |      int re_magic;
27 |      size_t re_nsub;          /* number of parenthesized subexpressions */
28 |      const char *re_endp;     /* end pointer for REG_PEND */
29 |      struct re_guts *re_g;    /* none of your business :-) */
30 | } regex_t;
31 | typedef struct {
32 |      regoff_t rm_so;          /* start of match */
33 |      regoff_t rm_eo;          /* end of match */
34 | } regmatch_t;
35 | 
36 | 
37 | /* === regcomp.c === */
38 | API_EXPORT(int) regcomp(regex_t *, const char *, int);
39 | #define   REG_BASIC 0000
40 | #define   REG_EXTENDED   0001
41 | #define   REG_ICASE 0002
42 | #define   REG_NOSUB 0004
43 | #define   REG_NEWLINE    0010
44 | #define   REG_NOSPEC     0020
45 | #define   REG_PEND  0040
46 | #define   REG_DUMP  0200
47 | 
48 | 
49 | /* === regerror.c === */
50 | #define   REG_NOMATCH     1
51 | #define   REG_BADPAT      2
52 | #define   REG_ECOLLATE    3
53 | #define   REG_ECTYPE      4
54 | #define   REG_EESCAPE     5
55 | #define   REG_ESUBREG     6
56 | #define   REG_EBRACK      7
57 | #define   REG_EPAREN      8
58 | #define   REG_EBRACE      9
59 | #define   REG_BADBR 10
60 | #define   REG_ERANGE     11
61 | #define   REG_ESPACE     12
62 | #define   REG_BADRPT     13
63 | #define   REG_EMPTY 14
64 | #define   REG_ASSERT     15
65 | #define   REG_INVARG     16
66 | #define   REG_ATOI  255  /* convert name to number (!) */
67 | #define   REG_ITOA  0400 /* convert number to name (!) */
68 | API_EXPORT(size_t) regerror(int, const regex_t *, char *, size_t);
69 | 
70 | 
71 | /* === regexec.c === */
72 | API_EXPORT(int) regexec(const regex_t *, const char *, size_t, regmatch_t [], int);
73 | #define   REG_NOTBOL     00001
74 | #define   REG_NOTEOL     00002
75 | #define   REG_STARTEND   00004
76 | #define   REG_TRACE 00400     /* tracing of execution */
77 | #define   REG_LARGE 01000     /* force large representation */
78 | #define   REG_BACKR 02000     /* force use of backref code */
79 | 
80 | 
81 | /* === regfree.c === */
82 | API_EXPORT(void) regfree(regex_t *);
83 | 
84 | #ifdef __cplusplus
85 | }
86 | #endif
87 | /* ========= end header generated by ././mkh ========= */
88 | #endif
89 | 


--------------------------------------------------------------------------------
/src/re/regex2.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * First, the stuff that ends up in the outside-world include file
  3 |  = #ifdef WIN32
  4 |  = #define API_EXPORT(type)    __declspec(dllexport) type __stdcall
  5 |  = #else
  6 |  = #define API_EXPORT(type)    type
  7 |  = #endif
  8 |  =
  9 |  = typedef off_t regoff_t;
 10 |  = typedef struct {
 11 |  = 	int re_magic;
 12 |  = 	size_t re_nsub;		// number of parenthesized subexpressions
 13 |  = 	const char *re_endp;	// end pointer for REG_PEND
 14 |  = 	struct re_guts *re_g;	// none of your business :-)
 15 |  = } regex_t;
 16 |  = typedef struct {
 17 |  = 	regoff_t rm_so;		// start of match
 18 |  = 	regoff_t rm_eo;		// end of match
 19 |  = } regmatch_t;
 20 |  */
 21 | /*
 22 |  * internals of regex_t
 23 |  */
 24 | #define	MAGIC1	((('r'^0200)<<8) | 'e')
 25 | 
 26 | /*
 27 |  * The internal representation is a *strip*, a sequence of
 28 |  * operators ending with an endmarker.  (Some terminology etc. is a
 29 |  * historical relic of earlier versions which used multiple strips.)
 30 |  * Certain oddities in the representation are there to permit running
 31 |  * the machinery backwards; in particular, any deviation from sequential
 32 |  * flow must be marked at both its source and its destination.  Some
 33 |  * fine points:
 34 |  *
 35 |  * - OPLUS_ and O_PLUS are *inside* the loop they create.
 36 |  * - OQUEST_ and O_QUEST are *outside* the bypass they create.
 37 |  * - OCH_ and O_CH are *outside* the multi-way branch they create, while
 38 |  *   OOR1 and OOR2 are respectively the end and the beginning of one of
 39 |  *   the branches.  Note that there is an implicit OOR2 following OCH_
 40 |  *   and an implicit OOR1 preceding O_CH.
 41 |  *
 42 |  * In state representations, an operator's bit is on to signify a state
 43 |  * immediately *preceding* "execution" of that operator.
 44 |  */
 45 | typedef unsigned long sop;	/* strip operator */
 46 | typedef long sopno;
 47 | #define	OPRMASK	0xf8000000
 48 | #define	OPDMASK	0x07ffffff
 49 | #define	OPSHIFT	((unsigned)27)
 50 | #define	OP(n)	((n)&OPRMASK)
 51 | #define	OPND(n)	((n)&OPDMASK)
 52 | #define	SOP(op, opnd)	((op)|(opnd))
 53 | /* operators			   meaning	operand			*/
 54 | /*						(back, fwd are offsets)	*/
 55 | #define	OEND	(1<<OPSHIFT)	/* endmarker	-			*/
 56 | #define	OCHAR	(2<<OPSHIFT)	/* character	unsigned char		*/
 57 | #define	OBOL	(3<<OPSHIFT)	/* left anchor	-			*/
 58 | #define	OEOL	(4<<OPSHIFT)	/* right anchor	-			*/
 59 | #define	OANY	(5<<OPSHIFT)	/* .		-			*/
 60 | #define	OANYOF	(6<<OPSHIFT)	/* [...]	set number		*/
 61 | #define	OBACK_	(7<<OPSHIFT)	/* begin \d	paren number		*/
 62 | #define	O_BACK	(8<<OPSHIFT)	/* end \d	paren number		*/
 63 | #define	OPLUS_	(9<<OPSHIFT)	/* + prefix	fwd to suffix		*/
 64 | #define	O_PLUS	(10<<OPSHIFT)	/* + suffix	back to prefix		*/
 65 | #define	OQUEST_	(11<<OPSHIFT)	/* ? prefix	fwd to suffix		*/
 66 | #define	O_QUEST	(12<<OPSHIFT)	/* ? suffix	back to prefix		*/
 67 | #define	OLPAREN	(13<<OPSHIFT)	/* (		fwd to )		*/
 68 | #define	ORPAREN	(14<<OPSHIFT)	/* )		back to (		*/
 69 | #define	OCH_	(15<<OPSHIFT)	/* begin choice	fwd to OOR2		*/
 70 | #define	OOR1	(16u<<OPSHIFT)	/* | pt. 1	back to OOR1 or OCH_	*/
 71 | #define	OOR2	(17u<<OPSHIFT)	/* | pt. 2	fwd to OOR2 or O_CH	*/
 72 | #define	O_CH	(18u<<OPSHIFT)	/* end choice	back to OOR1		*/
 73 | #define	OBOW	(19u<<OPSHIFT)	/* begin word	-			*/
 74 | #define	OEOW	(20u<<OPSHIFT)	/* end word	-			*/
 75 | 
 76 | /*
 77 |  * Structure for [] character-set representation.  Character sets are
 78 |  * done as bit vectors, grouped 8 to a byte vector for compactness.
 79 |  * The individual set therefore has both a pointer to the byte vector
 80 |  * and a mask to pick out the relevant bit of each byte.  A hash code
 81 |  * simplifies testing whether two sets could be identical.
 82 |  *
 83 |  * This will get trickier for multicharacter collating elements.  As
 84 |  * preliminary hooks for dealing with such things, we also carry along
 85 |  * a string of multi-character elements, and decide the size of the
 86 |  * vectors at run time.
 87 |  */
 88 | typedef struct {
 89 | 	uch *ptr;		/* -> uch [csetsize] */
 90 | 	uch mask;		/* bit within array */
 91 | 	uch hash;		/* hash code */
 92 | 	size_t smultis;
 93 | 	char *multis;		/* -> char[smulti]  ab\0cd\0ef\0\0 */
 94 | } cset;
 95 | /* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
 96 | #define	CHadd(cs, c)	((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
 97 | #define	CHsub(cs, c)	((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
 98 | #define	CHIN(cs, c)	((cs)->ptr[(uch)(c)] & (cs)->mask)
 99 | #define	MCadd(p, cs, cp)	mcadd(p, cs, cp)	/* regcomp() internal fns */
100 | 
101 | /* stuff for character categories */
102 | typedef unsigned char cat_t;
103 | 
104 | /*
105 |  * main compiled-expression structure
106 |  */
107 | struct re_guts {
108 | 	int magic;
109 | #		define	MAGIC2	((('R'^0200)<<8)|'E')
110 | 	sop *strip;		/* malloced area for strip */
111 | 	int csetsize;		/* number of bits in a cset vector */
112 | 	int ncsets;		/* number of csets in use */
113 | 	cset *sets;		/* -> cset [ncsets] */
114 | 	uch *setbits;		/* -> uch[csetsize][ncsets/CHAR_BIT] */
115 | 	int cflags;		/* copy of regcomp() cflags argument */
116 | 	sopno nstates;		/* = number of sops */
117 | 	sopno firststate;	/* the initial OEND (normally 0) */
118 | 	sopno laststate;	/* the final OEND */
119 | 	int iflags;		/* internal flags */
120 | #		define	USEBOL	01	/* used ^ */
121 | #		define	USEEOL	02	/* used $ */
122 | #		define	BAD	04	/* something wrong */
123 | 	int nbol;		/* number of ^ used */
124 | 	int neol;		/* number of $ used */
125 | 	int ncategories;	/* how many character categories */
126 | 	cat_t *categories;	/* ->catspace[-CHAR_MIN] */
127 | 	char *must;		/* match must contain this string */
128 | 	int mlen;		/* length of must */
129 | 	size_t nsub;		/* copy of re_nsub */
130 | 	int backrefs;		/* does it use back references? */
131 | 	sopno nplus;		/* how deep does it nest +s? */
132 | 	/* catspace must be last */
133 | 	cat_t catspace[1];	/* actually [NC] */
134 | };
135 | 
136 | /* misc utilities */
137 | #define	OUT	(CHAR_MAX+1)	/* a non-character value */
138 | #define	ISWORD(c)	(isalnum(c) || (c) == '_')
139 | 


--------------------------------------------------------------------------------
/src/re/regexec.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * the outer shell of regexec()
  3 |  *
  4 |  * This file includes engine.c *twice*, after muchos fiddling with the
  5 |  * macros that code uses.  This lets the same code operate on two different
  6 |  * representations for state sets.
  7 |  */
  8 | #include <regex.h>
  9 | 
 10 | #include "utils.h"
 11 | #include "regex2.h"
 12 | 
 13 | #ifndef NDEBUG
 14 | static int nope = 0;          /* for use in asserts; shuts lint up */
 15 | #endif
 16 | 
 17 | /* macros for manipulating states, small version */
 18 | #define   states    long
 19 | #define   states1   states         /* for later use in regexec() decision */
 20 | #define   CLEAR(v)  ((v) = 0)
 21 | #define   SET0(v, n)     ((v) &= ~(1 << (n)))
 22 | #define   SET1(v, n)     ((v) |= 1 << (n))
 23 | #define   ISSET(v, n)    ((v) & (1 << (n)))
 24 | #define   ASSIGN(d, s)   ((d) = (s))
 25 | #define   EQ(a, b)  ((a) == (b))
 26 | #define   STATEVARS int dummy /* dummy version */
 27 | #define   STATESETUP(m, n)    /* nothing */
 28 | #define   STATETEARDOWN(m)    /* nothing */
 29 | #define   SETUP(v)  ((v) = 0)
 30 | #define   onestate  int
 31 | #define   INIT(o, n)     ((o) = (unsigned)1 << (n))
 32 | #define   INC(o)    ((o) <<= 1)
 33 | #define   ISSTATEIN(v, o)     ((v) & (o))
 34 | /* some abbreviations; note that some of these know variable names! */
 35 | /* do "if I'm here, I can also be there" etc without branches */
 36 | #define   FWD(dst, src, n)    ((dst) |= ((unsigned)(src)&(here)) << (n))
 37 | #define   BACK(dst, src, n)   ((dst) |= ((unsigned)(src)&(here)) >> (n))
 38 | #define   ISSETBACK(v, n)     ((v) & ((unsigned)here >> (n)))
 39 | /* function names */
 40 | #define SNAMES           /* engine.c looks after details */
 41 | 
 42 | #include "engine.c"
 43 | 
 44 | /* now undo things */
 45 | #undef    states
 46 | #undef    CLEAR
 47 | #undef    SET0
 48 | #undef    SET1
 49 | #undef    ISSET
 50 | #undef    ASSIGN
 51 | #undef    EQ
 52 | #undef    STATEVARS
 53 | #undef    STATESETUP
 54 | #undef    STATETEARDOWN
 55 | #undef    SETUP
 56 | #undef    onestate
 57 | #undef    INIT
 58 | #undef    INC
 59 | #undef    ISSTATEIN
 60 | #undef    FWD
 61 | #undef    BACK
 62 | #undef    ISSETBACK
 63 | #undef    SNAMES
 64 | 
 65 | /* macros for manipulating states, large version */
 66 | #define   states    char *
 67 | #define   CLEAR(v)  memset(v, 0, m->g->nstates)
 68 | #define   SET0(v, n)     ((v)[n] = 0)
 69 | #define   SET1(v, n)     ((v)[n] = 1)
 70 | #define   ISSET(v, n)    ((v)[n])
 71 | #define   ASSIGN(d, s)   memcpy(d, s, m->g->nstates)
 72 | #define   EQ(a, b)  (memcmp(a, b, m->g->nstates) == 0)
 73 | #define   STATEVARS int vn; char *space
 74 | #define   STATESETUP(m, nv)   { (m)->space = malloc((nv)*(m)->g->nstates); \
 75 |                     if ((m)->space == NULL) return(REG_ESPACE); \
 76 |                     (m)->vn = 0; }
 77 | #define   STATETEARDOWN(m)    { free((m)->space); }
 78 | #define   SETUP(v)  ((v) = &m->space[m->vn++ * m->g->nstates])
 79 | #define   onestate  int
 80 | #define   INIT(o, n)     ((o) = (n))
 81 | #define   INC(o)    ((o)++)
 82 | #define   ISSTATEIN(v, o)     ((v)[o])
 83 | /* some abbreviations; note that some of these know variable names! */
 84 | /* do "if I'm here, I can also be there" etc without branches */
 85 | #define   FWD(dst, src, n)    ((dst)[here+(n)] |= (src)[here])
 86 | #define   BACK(dst, src, n)   ((dst)[here-(n)] |= (src)[here])
 87 | #define   ISSETBACK(v, n)     ((v)[here - (n)])
 88 | /* function names */
 89 | #define   LNAMES              /* flag */
 90 | 
 91 | #include "engine.c"
 92 | 
 93 | /*
 94 |  - regexec - interface for matching
 95 |  = API_EXPORT(int) regexec(const regex_t *, const char *, size_t, \
 96 |  =                       regmatch_t [], int);
 97 |  = #define     REG_NOTBOL     00001
 98 |  = #define     REG_NOTEOL     00002
 99 |  = #define     REG_STARTEND   00004
100 |  = #define     REG_TRACE 00400     // tracing of execution
101 |  = #define     REG_LARGE 01000     // force large representation
102 |  = #define     REG_BACKR 02000     // force use of backref code
103 |  *
104 |  * We put this here so we can exploit knowledge of the state representation
105 |  * when choosing which matcher to call.  Also, by this point the matchers
106 |  * have been prototyped.
107 |  */
108 | API_EXPORT(int)                    /* 0 success, REG_NOMATCH failure */
109 | regexec(preg, string, nmatch, pmatch, eflags)
110 | const regex_t *preg;
111 | const char *string;
112 | size_t nmatch;
113 | regmatch_t pmatch[];
114 | int eflags;
115 | {
116 |      register struct re_guts *g = preg->re_g;
117 | #ifdef REDEBUG
118 | #    define    GOODFLAGS(f)   (f)
119 | #else
120 | #    define    GOODFLAGS(f)   ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
121 | #endif
122 | 
123 |      if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
124 |           return(REG_BADPAT);
125 |      assert(!(g->iflags&BAD));
126 |      if (g->iflags&BAD)       /* backstop for no-debug case */
127 |           return(REG_BADPAT);
128 |      eflags = GOODFLAGS(eflags);
129 | 
130 |      if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
131 |           return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
132 |      else
133 |           return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
134 | }
135 | 


--------------------------------------------------------------------------------
/src/re/regfree.c:
--------------------------------------------------------------------------------
 1 | #include <regex.h>
 2 | 
 3 | #include "utils.h"
 4 | #include "regex2.h"
 5 | 
 6 | /*
 7 |  - regfree - free everything
 8 |  = API_EXPORT(void) regfree(regex_t *);
 9 |  */
10 | API_EXPORT(void)
11 | regfree(preg)
12 | regex_t *preg;
13 | {
14 |      register struct re_guts *g;
15 | 
16 |      if (preg->re_magic != MAGIC1) /* oops */
17 |           return;             /* nice to complain, but hard */
18 | 
19 |      g = preg->re_g;
20 |      if (g == NULL || g->magic != MAGIC2)    /* oops again */
21 |           return;
22 |      preg->re_magic = 0;      /* mark it invalid */
23 |      g->magic = 0;            /* mark it invalid */
24 | 
25 |      if (g->strip != NULL)
26 |           free((char *)g->strip);
27 |      if (g->sets != NULL)
28 |           free((char *)g->sets);
29 |      if (g->setbits != NULL)
30 |           free((char *)g->setbits);
31 |      if (g->must != NULL)
32 |           free(g->must);
33 |      free((char *)g);
34 | }
35 | 


--------------------------------------------------------------------------------
/src/re/split.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | 
  4 | /*
  5 |  - split - divide a string into fields, like awk split()
  6 |  = int split(char *string, char *fields[], int nfields, char *sep);
  7 |  */
  8 | int				/* number of fields, including overflow */
  9 | split(string, fields, nfields, sep)
 10 | char *string;
 11 | char *fields[];			/* list is not NULL-terminated */
 12 | int nfields;			/* number of entries available in fields[] */
 13 | char *sep;			/* "" white, "c" single char, "ab" [ab]+ */
 14 | {
 15 | 	register char *p = string;
 16 | 	register char c;			/* latest character */
 17 | 	register char sepc = sep[0];
 18 | 	register char sepc2;
 19 | 	register int fn;
 20 | 	register char **fp = fields;
 21 | 	register char *sepp;
 22 | 	register int trimtrail;
 23 | 
 24 | 	/* white space */
 25 | 	if (sepc == '\0') {
 26 | 		while ((c = *p++) == ' ' || c == '\t')
 27 | 			continue;
 28 | 		p--;
 29 | 		trimtrail = 1;
 30 | 		sep = " \t";	/* note, code below knows this is 2 long */
 31 | 		sepc = ' ';
 32 | 	} else
 33 | 		trimtrail = 0;
 34 | 	sepc2 = sep[1];		/* now we can safely pick this up */
 35 | 
 36 | 	/* catch empties */
 37 | 	if (*p == '\0')
 38 | 		return(0);
 39 | 
 40 | 	/* single separator */
 41 | 	if (sepc2 == '\0') {
 42 | 		fn = nfields;
 43 | 		for (;;) {
 44 | 			*fp++ = p;
 45 | 			fn--;
 46 | 			if (fn == 0)
 47 | 				break;
 48 | 			while ((c = *p++) != sepc)
 49 | 				if (c == '\0')
 50 | 					return(nfields - fn);
 51 | 			*(p-1) = '\0';
 52 | 		}
 53 | 		/* we have overflowed the fields vector -- just count them */
 54 | 		fn = nfields;
 55 | 		for (;;) {
 56 | 			while ((c = *p++) != sepc)
 57 | 				if (c == '\0')
 58 | 					return(fn);
 59 | 			fn++;
 60 | 		}
 61 | 		/* not reached */
 62 | 	}
 63 | 
 64 | 	/* two separators */
 65 | 	if (sep[2] == '\0') {
 66 | 		fn = nfields;
 67 | 		for (;;) {
 68 | 			*fp++ = p;
 69 | 			fn--;
 70 | 			while ((c = *p++) != sepc && c != sepc2)
 71 | 				if (c == '\0') {
 72 | 					if (trimtrail && **(fp-1) == '\0')
 73 | 						fn++;
 74 | 					return(nfields - fn);
 75 | 				}
 76 | 			if (fn == 0)
 77 | 				break;
 78 | 			*(p-1) = '\0';
 79 | 			while ((c = *p++) == sepc || c == sepc2)
 80 | 				continue;
 81 | 			p--;
 82 | 		}
 83 | 		/* we have overflowed the fields vector -- just count them */
 84 | 		fn = nfields;
 85 | 		while (c != '\0') {
 86 | 			while ((c = *p++) == sepc || c == sepc2)
 87 | 				continue;
 88 | 			p--;
 89 | 			fn++;
 90 | 			while ((c = *p++) != '\0' && c != sepc && c != sepc2)
 91 | 				continue;
 92 | 		}
 93 | 		/* might have to trim trailing white space */
 94 | 		if (trimtrail) {
 95 | 			p--;
 96 | 			while ((c = *--p) == sepc || c == sepc2)
 97 | 				continue;
 98 | 			p++;
 99 | 			if (*p != '\0') {
100 | 				if (fn == nfields+1)
101 | 					*p = '\0';
102 | 				fn--;
103 | 			}
104 | 		}
105 | 		return(fn);
106 | 	}
107 | 
108 | 	/* n separators */
109 | 	fn = 0;
110 | 	for (;;) {
111 | 		if (fn < nfields)
112 | 			*fp++ = p;
113 | 		fn++;
114 | 		for (;;) {
115 | 			c = *p++;
116 | 			if (c == '\0')
117 | 				return(fn);
118 | 			sepp = sep;
119 | 			while ((sepc = *sepp++) != '\0' && sepc != c)
120 | 				continue;
121 | 			if (sepc != '\0')	/* it was a separator */
122 | 				break;
123 | 		}
124 | 		if (fn < nfields)
125 | 			*(p-1) = '\0';
126 | 		for (;;) {
127 | 			c = *p++;
128 | 			sepp = sep;
129 | 			while ((sepc = *sepp++) != '\0' && sepc != c)
130 | 				continue;
131 | 			if (sepc == '\0')	/* it wasn't a separator */
132 | 				break;
133 | 		}
134 | 		p--;
135 | 	}
136 | 
137 | 	/* not reached */
138 | }
139 | 
140 | #ifdef TEST_SPLIT
141 | 
142 | 
143 | /*
144 |  * test program
145 |  * pgm		runs regression
146 |  * pgm sep	splits stdin lines by sep
147 |  * pgm str sep	splits str by sep
148 |  * pgm str sep n	splits str by sep n times
149 |  */
150 | int
151 | main(argc, argv)
152 | int argc;
153 | char *argv[];
154 | {
155 | 	char buf[512];
156 | 	register int n;
157 | #	define	MNF	10
158 | 	char *fields[MNF];
159 | 
160 | 	if (argc > 4)
161 | 		for (n = atoi(argv[3]); n > 0; n--) {
162 | 			(void) strcpy(buf, argv[1]);
163 | 		}
164 | 	else if (argc > 3)
165 | 		for (n = atoi(argv[3]); n > 0; n--) {
166 | 			(void) strcpy(buf, argv[1]);
167 | 			(void) split(buf, fields, MNF, argv[2]);
168 | 		}
169 | 	else if (argc > 2)
170 | 		dosplit(argv[1], argv[2]);
171 | 	else if (argc > 1)
172 | 		while (fgets(buf, sizeof(buf), stdin) != NULL) {
173 | 			buf[strlen(buf)-1] = '\0';	/* stomp newline */
174 | 			dosplit(buf, argv[1]);
175 | 		}
176 | 	else
177 | 		regress();
178 | 
179 | 	exit(0);
180 | }
181 | 
182 | dosplit(string, seps)
183 | char *string;
184 | char *seps;
185 | {
186 | #	define	NF	5
187 | 	char *fields[NF];
188 | 	register int nf;
189 | 
190 | 	nf = split(string, fields, NF, seps);
191 | 	print(nf, NF, fields);
192 | }
193 | 
194 | print(nf, nfp, fields)
195 | int nf;
196 | int nfp;
197 | char *fields[];
198 | {
199 | 	register int fn;
200 | 	register int bound;
201 | 
202 | 	bound = (nf > nfp) ? nfp : nf;
203 | 	printf("%d:\t", nf);
204 | 	for (fn = 0; fn < bound; fn++)
205 | 		printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
206 | }
207 | 
208 | #define	RNF	5		/* some table entries know this */
209 | struct {
210 | 	char *str;
211 | 	char *seps;
212 | 	int nf;
213 | 	char *fi[RNF];
214 | } tests[] = {
215 | 	"",		" ",	0,	{ "" },
216 | 	" ",		" ",	2,	{ "", "" },
217 | 	"x",		" ",	1,	{ "x" },
218 | 	"xy",		" ",	1,	{ "xy" },
219 | 	"x y",		" ",	2,	{ "x", "y" },
220 | 	"abc def  g ",	" ",	5,	{ "abc", "def", "", "g", "" },
221 | 	"  a bcd",	" ",	4,	{ "", "", "a", "bcd" },
222 | 	"a b c d e f",	" ",	6,	{ "a", "b", "c", "d", "e f" },
223 | 	" a b c d ",	" ",	6,	{ "", "a", "b", "c", "d " },
224 | 
225 | 	"",		" _",	0,	{ "" },
226 | 	" ",		" _",	2,	{ "", "" },
227 | 	"x",		" _",	1,	{ "x" },
228 | 	"x y",		" _",	2,	{ "x", "y" },
229 | 	"ab _ cd",	" _",	2,	{ "ab", "cd" },
230 | 	" a_b  c ",	" _",	5,	{ "", "a", "b", "c", "" },
231 | 	"a b c_d e f",	" _",	6,	{ "a", "b", "c", "d", "e f" },
232 | 	" a b c d ",	" _",	6,	{ "", "a", "b", "c", "d " },
233 | 
234 | 	"",		" _~",	0,	{ "" },
235 | 	" ",		" _~",	2,	{ "", "" },
236 | 	"x",		" _~",	1,	{ "x" },
237 | 	"x y",		" _~",	2,	{ "x", "y" },
238 | 	"ab _~ cd",	" _~",	2,	{ "ab", "cd" },
239 | 	" a_b  c~",	" _~",	5,	{ "", "a", "b", "c", "" },
240 | 	"a b_c d~e f",	" _~",	6,	{ "a", "b", "c", "d", "e f" },
241 | 	"~a b c d ",	" _~",	6,	{ "", "a", "b", "c", "d " },
242 | 
243 | 	"",		" _~-",	0,	{ "" },
244 | 	" ",		" _~-",	2,	{ "", "" },
245 | 	"x",		" _~-",	1,	{ "x" },
246 | 	"x y",		" _~-",	2,	{ "x", "y" },
247 | 	"ab _~- cd",	" _~-",	2,	{ "ab", "cd" },
248 | 	" a_b  c~",	" _~-",	5,	{ "", "a", "b", "c", "" },
249 | 	"a b_c-d~e f",	" _~-",	6,	{ "a", "b", "c", "d", "e f" },
250 | 	"~a-b c d ",	" _~-",	6,	{ "", "a", "b", "c", "d " },
251 | 
252 | 	"",		"  ",	0,	{ "" },
253 | 	" ",		"  ",	2,	{ "", "" },
254 | 	"x",		"  ",	1,	{ "x" },
255 | 	"xy",		"  ",	1,	{ "xy" },
256 | 	"x y",		"  ",	2,	{ "x", "y" },
257 | 	"abc def  g ",	"  ",	4,	{ "abc", "def", "g", "" },
258 | 	"  a bcd",	"  ",	3,	{ "", "a", "bcd" },
259 | 	"a b c d e f",	"  ",	6,	{ "a", "b", "c", "d", "e f" },
260 | 	" a b c d ",	"  ",	6,	{ "", "a", "b", "c", "d " },
261 | 
262 | 	"",		"",	0,	{ "" },
263 | 	" ",		"",	0,	{ "" },
264 | 	"x",		"",	1,	{ "x" },
265 | 	"xy",		"",	1,	{ "xy" },
266 | 	"x y",		"",	2,	{ "x", "y" },
267 | 	"abc def  g ",	"",	3,	{ "abc", "def", "g" },
268 | 	"\t a bcd",	"",	2,	{ "a", "bcd" },
269 | 	"  a \tb\t c ",	"",	3,	{ "a", "b", "c" },
270 | 	"a b c d e ",	"",	5,	{ "a", "b", "c", "d", "e" },
271 | 	"a b\tc d e f",	"",	6,	{ "a", "b", "c", "d", "e f" },
272 | 	" a b c d e f ",	"",	6,	{ "a", "b", "c", "d", "e f " },
273 | 
274 | 	NULL,		NULL,	0,	{ NULL },
275 | };
276 | 
277 | regress()
278 | {
279 | 	char buf[512];
280 | 	register int n;
281 | 	char *fields[RNF+1];
282 | 	register int nf;
283 | 	register int i;
284 | 	register int printit;
285 | 	register char *f;
286 | 
287 | 	for (n = 0; tests[n].str != NULL; n++) {
288 | 		(void) strcpy(buf, tests[n].str);
289 | 		fields[RNF] = NULL;
290 | 		nf = split(buf, fields, RNF, tests[n].seps);
291 | 		printit = 0;
292 | 		if (nf != tests[n].nf) {
293 | 			printf("split `%s' by `%s' gave %d fields, not %d\n",
294 | 				tests[n].str, tests[n].seps, nf, tests[n].nf);
295 | 			printit = 1;
296 | 		} else if (fields[RNF] != NULL) {
297 | 			printf("split() went beyond array end\n");
298 | 			printit = 1;
299 | 		} else {
300 | 			for (i = 0; i < nf && i < RNF; i++) {
301 | 				f = fields[i];
302 | 				if (f == NULL)
303 | 					f = "(NULL)";
304 | 				if (strcmp(f, tests[n].fi[i]) != 0) {
305 | 					printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
306 | 						tests[n].str, tests[n].seps,
307 | 						i, fields[i], tests[n].fi[i]);
308 | 					printit = 1;
309 | 				}
310 | 			}
311 | 		}
312 | 		if (printit)
313 | 			print(nf, RNF, fields);
314 | 	}
315 | }
316 | #endif
317 | 


--------------------------------------------------------------------------------
/src/re/tests:
--------------------------------------------------------------------------------
  1 | # regular expression test set
  2 | # Lines are at least three fields, separated by one or more tabs.  "" stands
  3 | # for an empty field.  First field is an RE.  Second field is flags.  If
  4 | # C flag given, regcomp() is expected to fail, and the third field is the
  5 | # error name (minus the leading REG_).
  6 | #
  7 | # Otherwise it is expected to succeed, and the third field is the string to
  8 | # try matching it against.  If there is no fourth field, the match is
  9 | # expected to fail.  If there is a fourth field, it is the substring that
 10 | # the RE is expected to match.  If there is a fifth field, it is a comma-
 11 | # separated list of what the subexpressions should match, with - indicating
 12 | # no match for that one.  In both the fourth and fifth fields, a (sub)field
 13 | # starting with @ indicates that the (sub)expression is expected to match
 14 | # a null string followed by the stuff after the @; this provides a way to
 15 | # test where null strings match.  The character `N' in REs and strings
 16 | # is newline, `S' is space, `T' is tab, `Z' is NUL.
 17 | #
 18 | # The full list of flags:
 19 | #	-	placeholder, does nothing
 20 | #	b	RE is a BRE, not an ERE
 21 | #	&	try it as both an ERE and a BRE
 22 | #	C	regcomp() error expected, third field is error name
 23 | #	i	REG_ICASE
 24 | #	m	("mundane") REG_NOSPEC
 25 | #	s	REG_NOSUB (not really testable)
 26 | #	n	REG_NEWLINE
 27 | #	^	REG_NOTBOL
 28 | #	$	REG_NOTEOL
 29 | #	#	REG_STARTEND (see below)
 30 | #	p	REG_PEND
 31 | #
 32 | # For REG_STARTEND, the start/end offsets are those of the substring
 33 | # enclosed in ().
 34 | 
 35 | # basics
 36 | a		&	a	a
 37 | abc		&	abc	abc
 38 | abc|de		-	abc	abc
 39 | a|b|c		-	abc	a
 40 | 
 41 | # parentheses and perversions thereof
 42 | a(b)c		-	abc	abc
 43 | a\(b\)c		b	abc	abc
 44 | a(		C	EPAREN
 45 | a(		b	a(	a(
 46 | a\(		-	a(	a(
 47 | a\(		bC	EPAREN
 48 | a\(b		bC	EPAREN
 49 | a(b		C	EPAREN
 50 | a(b		b	a(b	a(b
 51 | # gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
 52 | a)		-	a)	a)
 53 | )		-	)	)
 54 | # end gagging (in a just world, those *should* give EPAREN)
 55 | a)		b	a)	a)
 56 | a\)		bC	EPAREN
 57 | \)		bC	EPAREN
 58 | a()b		-	ab	ab
 59 | a\(\)b		b	ab	ab
 60 | 
 61 | # anchoring and REG_NEWLINE
 62 | ^abc$		&	abc	abc
 63 | a^b		-	a^b
 64 | a^b		b	a^b	a^b
 65 | a$b		-	a$b
 66 | a$b		b	a$b	a$b
 67 | ^		&	abc	@abc
 68 | $		&	abc	@
 69 | ^$		&	""	@
 70 | $^		-	""	@
 71 | \($\)\(^\)	b	""	@
 72 | # stop retching, those are legitimate (although disgusting)
 73 | ^^		-	""	@
 74 | $$		-	""	@
 75 | b$		&	abNc
 76 | b$		&n	abNc	b
 77 | ^b$		&	aNbNc
 78 | ^b$		&n	aNbNc	b
 79 | ^$		&n	aNNb	@Nb
 80 | ^$		n	abc
 81 | ^$		n	abcN	@
 82 | $^		n	aNNb	@Nb
 83 | \($\)\(^\)	bn	aNNb	@Nb
 84 | ^^		n^	aNNb	@Nb
 85 | $$		n	aNNb	@NN
 86 | ^a		^	a
 87 | a$		$	a
 88 | ^a		^n	aNb
 89 | ^b		^n	aNb	b
 90 | a$		$n	bNa
 91 | b$		$n	bNa	b
 92 | a*(^b$)c*	-	b	b
 93 | a*\(^b$\)c*	b	b	b
 94 | 
 95 | # certain syntax errors and non-errors
 96 | |		C	EMPTY
 97 | |		b	|	|
 98 | *		C	BADRPT
 99 | *		b	*	*
100 | +		C	BADRPT
101 | ?		C	BADRPT
102 | ""		&C	EMPTY
103 | ()		-	abc	@abc
104 | \(\)		b	abc	@abc
105 | a||b		C	EMPTY
106 | |ab		C	EMPTY
107 | ab|		C	EMPTY
108 | (|a)b		C	EMPTY
109 | (a|)b		C	EMPTY
110 | (*a)		C	BADRPT
111 | (+a)		C	BADRPT
112 | (?a)		C	BADRPT
113 | ({1}a)		C	BADRPT
114 | \(\{1\}a\)	bC	BADRPT
115 | (a|*b)		C	BADRPT
116 | (a|+b)		C	BADRPT
117 | (a|?b)		C	BADRPT
118 | (a|{1}b)	C	BADRPT
119 | ^*		C	BADRPT
120 | ^*		b	*	*
121 | ^+		C	BADRPT
122 | ^?		C	BADRPT
123 | ^{1}		C	BADRPT
124 | ^\{1\}		bC	BADRPT
125 | 
126 | # metacharacters, backslashes
127 | a.c		&	abc	abc
128 | a[bc]d		&	abd	abd
129 | a\*c		&	a*c	a*c
130 | a\\b		&	a\b	a\b
131 | a\\\*b		&	a\*b	a\*b
132 | a\bc		&	abc	abc
133 | a\		&C	EESCAPE
134 | a\\bc		&	a\bc	a\bc
135 | \{		bC	BADRPT
136 | a\[b		&	a[b	a[b
137 | a[b		&C	EBRACK
138 | # trailing $ is a peculiar special case for the BRE code
139 | a$		&	a	a
140 | a$		&	a$
141 | a\$		&	a
142 | a\$		&	a$	a$
143 | a\\$		&	a
144 | a\\$		&	a$
145 | a\\$		&	a\$
146 | a\\$		&	a\	a\
147 | 
148 | # back references, ugh
149 | a\(b\)\2c	bC	ESUBREG
150 | a\(b\1\)c	bC	ESUBREG
151 | a\(b*\)c\1d	b	abbcbbd	abbcbbd	bb
152 | a\(b*\)c\1d	b	abbcbd
153 | a\(b*\)c\1d	b	abbcbbbd
154 | ^\(.\)\1	b	abc
155 | a\([bc]\)\1d	b	abcdabbd	abbd	b
156 | a\(\([bc]\)\2\)*d	b	abbccd	abbccd
157 | a\(\([bc]\)\2\)*d	b	abbcbd
158 | # actually, this next one probably ought to fail, but the spec is unclear
159 | a\(\(b\)*\2\)*d		b	abbbd	abbbd
160 | # here is a case that no NFA implementation does right
161 | \(ab*\)[ab]*\1	b	ababaaa	ababaaa	a
162 | # check out normal matching in the presence of back refs
163 | \(a\)\1bcd	b	aabcd	aabcd
164 | \(a\)\1bc*d	b	aabcd	aabcd
165 | \(a\)\1bc*d	b	aabd	aabd
166 | \(a\)\1bc*d	b	aabcccd	aabcccd
167 | \(a\)\1bc*[ce]d	b	aabcccd	aabcccd
168 | ^\(a\)\1b\(c\)*cd$	b	aabcccd	aabcccd
169 | 
170 | # ordinary repetitions
171 | ab*c		&	abc	abc
172 | ab+c		-	abc	abc
173 | ab?c		-	abc	abc
174 | a\(*\)b		b	a*b	a*b
175 | a\(**\)b	b	ab	ab
176 | a\(***\)b	bC	BADRPT
177 | *a		b	*a	*a
178 | **a		b	a	a
179 | ***a		bC	BADRPT
180 | 
181 | # the dreaded bounded repetitions
182 | {		&	{	{
183 | {abc		&	{abc	{abc
184 | {1		C	BADRPT
185 | {1}		C	BADRPT
186 | a{b		&	a{b	a{b
187 | a{1}b		-	ab	ab
188 | a\{1\}b		b	ab	ab
189 | a{1,}b		-	ab	ab
190 | a\{1,\}b	b	ab	ab
191 | a{1,2}b		-	aab	aab
192 | a\{1,2\}b	b	aab	aab
193 | a{1		C	EBRACE
194 | a\{1		bC	EBRACE
195 | a{1a		C	EBRACE
196 | a\{1a		bC	EBRACE
197 | a{1a}		C	BADBR
198 | a\{1a\}		bC	BADBR
199 | a{,2}		-	a{,2}	a{,2}
200 | a\{,2\}		bC	BADBR
201 | a{,}		-	a{,}	a{,}
202 | a\{,\}		bC	BADBR
203 | a{1,x}		C	BADBR
204 | a\{1,x\}	bC	BADBR
205 | a{1,x		C	EBRACE
206 | a\{1,x		bC	EBRACE
207 | a{300}		C	BADBR
208 | a\{300\}	bC	BADBR
209 | a{1,0}		C	BADBR
210 | a\{1,0\}	bC	BADBR
211 | ab{0,0}c	-	abcac	ac
212 | ab\{0,0\}c	b	abcac	ac
213 | ab{0,1}c	-	abcac	abc
214 | ab\{0,1\}c	b	abcac	abc
215 | ab{0,3}c	-	abbcac	abbc
216 | ab\{0,3\}c	b	abbcac	abbc
217 | ab{1,1}c	-	acabc	abc
218 | ab\{1,1\}c	b	acabc	abc
219 | ab{1,3}c	-	acabc	abc
220 | ab\{1,3\}c	b	acabc	abc
221 | ab{2,2}c	-	abcabbc	abbc
222 | ab\{2,2\}c	b	abcabbc	abbc
223 | ab{2,4}c	-	abcabbc	abbc
224 | ab\{2,4\}c	b	abcabbc	abbc
225 | ((a{1,10}){1,10}){1,10}	-	a	a	a,a
226 | 
227 | # multiple repetitions
228 | a**		&C	BADRPT
229 | a++		C	BADRPT
230 | a??		C	BADRPT
231 | a*+		C	BADRPT
232 | a*?		C	BADRPT
233 | a+*		C	BADRPT
234 | a+?		C	BADRPT
235 | a?*		C	BADRPT
236 | a?+		C	BADRPT
237 | a{1}{1}		C	BADRPT
238 | a*{1}		C	BADRPT
239 | a+{1}		C	BADRPT
240 | a?{1}		C	BADRPT
241 | a{1}*		C	BADRPT
242 | a{1}+		C	BADRPT
243 | a{1}?		C	BADRPT
244 | a*{b}		-	a{b}	a{b}
245 | a\{1\}\{1\}	bC	BADRPT
246 | a*\{1\}		bC	BADRPT
247 | a\{1\}*		bC	BADRPT
248 | 
249 | # brackets, and numerous perversions thereof
250 | a[b]c		&	abc	abc
251 | a[ab]c		&	abc	abc
252 | a[^ab]c		&	adc	adc
253 | a[]b]c		&	a]c	a]c
254 | a[[b]c		&	a[c	a[c
255 | a[-b]c		&	a-c	a-c
256 | a[^]b]c		&	adc	adc
257 | a[^-b]c		&	adc	adc
258 | a[b-]c		&	a-c	a-c
259 | a[b		&C	EBRACK
260 | a[]		&C	EBRACK
261 | a[1-3]c		&	a2c	a2c
262 | a[3-1]c		&C	ERANGE
263 | a[1-3-5]c	&C	ERANGE
264 | a[[.-.]--]c	&	a-c	a-c
265 | a[1-		&C	ERANGE
266 | a[[.		&C	EBRACK
267 | a[[.x		&C	EBRACK
268 | a[[.x.		&C	EBRACK
269 | a[[.x.]		&C	EBRACK
270 | a[[.x.]]	&	ax	ax
271 | a[[.x,.]]	&C	ECOLLATE
272 | a[[.one.]]b	&	a1b	a1b
273 | a[[.notdef.]]b	&C	ECOLLATE
274 | a[[.].]]b	&	a]b	a]b
275 | a[[:alpha:]]c	&	abc	abc
276 | a[[:notdef:]]c	&C	ECTYPE
277 | a[[:		&C	EBRACK
278 | a[[:alpha	&C	EBRACK
279 | a[[:alpha:]	&C	EBRACK
280 | a[[:alpha,:]	&C	ECTYPE
281 | a[[:]:]]b	&C	ECTYPE
282 | a[[:-:]]b	&C	ECTYPE
283 | a[[:alph:]]	&C	ECTYPE
284 | a[[:alphabet:]]	&C	ECTYPE
285 | [[:alnum:]]+	-	-%@a0X-	a0X
286 | [[:alpha:]]+	-	-%@aX0-	aX
287 | [[:blank:]]+	-	aSSTb	SST
288 | [[:cntrl:]]+	-	aNTb	NT
289 | [[:digit:]]+	-	a019b	019
290 | [[:graph:]]+	-	Sa%bS	a%b
291 | [[:lower:]]+	-	AabC	ab
292 | [[:print:]]+	-	NaSbN	aSb
293 | [[:punct:]]+	-	S%-&T	%-&
294 | [[:space:]]+	-	aSNTb	SNT
295 | [[:upper:]]+	-	aBCd	BC
296 | [[:xdigit:]]+	-	p0f3Cq	0f3C
297 | a[[=b=]]c	&	abc	abc
298 | a[[=		&C	EBRACK
299 | a[[=b		&C	EBRACK
300 | a[[=b=		&C	EBRACK
301 | a[[=b=]		&C	EBRACK
302 | a[[=b,=]]	&C	ECOLLATE
303 | a[[=one=]]b	&	a1b	a1b
304 | 
305 | # complexities
306 | a(((b)))c	-	abc	abc
307 | a(b|(c))d	-	abd	abd
308 | a(b*|c)d	-	abbd	abbd
309 | # just gotta have one DFA-buster, of course
310 | a[ab]{20}	-	aaaaabaaaabaaaabaaaab	aaaaabaaaabaaaabaaaab
311 | # and an inline expansion in case somebody gets tricky
312 | a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]	-	aaaaabaaaabaaaabaaaab	aaaaabaaaabaaaabaaaab
313 | # and in case somebody just slips in an NFA...
314 | a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)	-	aaaaabaaaabaaaabaaaabweeknights	aaaaabaaaabaaaabaaaabweeknights
315 | # fish for anomalies as the number of states passes 32
316 | 12345678901234567890123456789	-	a12345678901234567890123456789b	12345678901234567890123456789
317 | 123456789012345678901234567890	-	a123456789012345678901234567890b	123456789012345678901234567890
318 | 1234567890123456789012345678901	-	a1234567890123456789012345678901b	1234567890123456789012345678901
319 | 12345678901234567890123456789012	-	a12345678901234567890123456789012b	12345678901234567890123456789012
320 | 123456789012345678901234567890123	-	a123456789012345678901234567890123b	123456789012345678901234567890123
321 | # and one really big one, beyond any plausible word width
322 | 1234567890123456789012345678901234567890123456789012345678901234567890	-	a1234567890123456789012345678901234567890123456789012345678901234567890b	1234567890123456789012345678901234567890123456789012345678901234567890
323 | # fish for problems as brackets go past 8
324 | [ab][cd][ef][gh][ij][kl][mn]	-	xacegikmoq	acegikm
325 | [ab][cd][ef][gh][ij][kl][mn][op]	-	xacegikmoq	acegikmo
326 | [ab][cd][ef][gh][ij][kl][mn][op][qr]	-	xacegikmoqy	acegikmoq
327 | [ab][cd][ef][gh][ij][kl][mn][op][q]	-	xacegikmoqy	acegikmoq
328 | 
329 | # subtleties of matching
330 | abc		&	xabcy	abc
331 | a\(b\)?c\1d	b	acd
332 | aBc		i	Abc	Abc
333 | a[Bc]*d		i	abBCcd	abBCcd
334 | 0[[:upper:]]1	&i	0a1	0a1
335 | 0[[:lower:]]1	&i	0A1	0A1
336 | a[^b]c		&i	abc
337 | a[^b]c		&i	aBc
338 | a[^b]c		&i	adc	adc
339 | [a]b[c]		-	abc	abc
340 | [a]b[a]		-	aba	aba
341 | [abc]b[abc]	-	abc	abc
342 | [abc]b[abd]	-	abd	abd
343 | a(b?c)+d	-	accd	accd
344 | (wee|week)(knights|night)	-	weeknights	weeknights
345 | (we|wee|week|frob)(knights|night|day)	-	weeknights	weeknights
346 | a[bc]d		-	xyzaaabcaababdacd	abd
347 | a[ab]c		-	aaabc	abc
348 | abc		s	abc	abc
349 | a*		&	b	@b
350 | 
351 | # Let's have some fun -- try to match a C comment.
352 | # first the obvious, which looks okay at first glance...
353 | /\*.*\*/	-	/*x*/	/*x*/
354 | # but...
355 | /\*.*\*/	-	/*x*/y/*z*/	/*x*/y/*z*/
356 | # okay, we must not match */ inside; try to do that...
357 | /\*([^*]|\*[^/])*\*/	-	/*x*/	/*x*/
358 | /\*([^*]|\*[^/])*\*/	-	/*x*/y/*z*/	/*x*/
359 | # but...
360 | /\*([^*]|\*[^/])*\*/	-	/*x**/y/*z*/	/*x**/y/*z*/
361 | # and a still fancier version, which does it right (I think)...
362 | /\*([^*]|\*+[^*/])*\*+/	-	/*x*/	/*x*/
363 | /\*([^*]|\*+[^*/])*\*+/	-	/*x*/y/*z*/	/*x*/
364 | /\*([^*]|\*+[^*/])*\*+/	-	/*x**/y/*z*/	/*x**/
365 | /\*([^*]|\*+[^*/])*\*+/	-	/*x****/y/*z*/	/*x****/
366 | /\*([^*]|\*+[^*/])*\*+/	-	/*x**x*/y/*z*/	/*x**x*/
367 | /\*([^*]|\*+[^*/])*\*+/	-	/*x***x/y/*z*/	/*x***x/y/*z*/
368 | 
369 | # subexpressions
370 | a(b)(c)d	-	abcd	abcd	b,c
371 | a(((b)))c	-	abc	abc	b,b,b
372 | a(b|(c))d	-	abd	abd	b,-
373 | a(b*|c|e)d	-	abbd	abbd	bb
374 | a(b*|c|e)d	-	acd	acd	c
375 | a(b*|c|e)d	-	ad	ad	@d
376 | a(b?)c		-	abc	abc	b
377 | a(b?)c		-	ac	ac	@c
378 | a(b+)c		-	abc	abc	b
379 | a(b+)c		-	abbbc	abbbc	bbb
380 | a(b*)c		-	ac	ac	@c
381 | (a|ab)(bc([de]+)f|cde)	-	abcdef	abcdef	a,bcdef,de
382 | # the regression tester only asks for 9 subexpressions
383 | a(b)(c)(d)(e)(f)(g)(h)(i)(j)k	-	abcdefghijk	abcdefghijk	b,c,d,e,f,g,h,i,j
384 | a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l	-	abcdefghijkl	abcdefghijkl	b,c,d,e,f,g,h,i,j,k
385 | a([bc]?)c	-	abc	abc	b
386 | a([bc]?)c	-	ac	ac	@c
387 | a([bc]+)c	-	abc	abc	b
388 | a([bc]+)c	-	abcc	abcc	bc
389 | a([bc]+)bc	-	abcbc	abcbc	bc
390 | a(bb+|b)b	-	abb	abb	b
391 | a(bbb+|bb+|b)b	-	abb	abb	b
392 | a(bbb+|bb+|b)b	-	abbb	abbb	bb
393 | a(bbb+|bb+|b)bb	-	abbb	abbb	b
394 | (.*).*		-	abcdef	abcdef	abcdef
395 | (a*)*		-	bc	@b	@b
396 | 
397 | # do we get the right subexpression when it is used more than once?
398 | a(b|c)*d	-	ad	ad	-
399 | a(b|c)*d	-	abcd	abcd	c
400 | a(b|c)+d	-	abd	abd	b
401 | a(b|c)+d	-	abcd	abcd	c
402 | a(b|c?)+d	-	ad	ad	@d
403 | a(b|c?)+d	-	abcd	abcd	@d
404 | a(b|c){0,0}d	-	ad	ad	-
405 | a(b|c){0,1}d	-	ad	ad	-
406 | a(b|c){0,1}d	-	abd	abd	b
407 | a(b|c){0,2}d	-	ad	ad	-
408 | a(b|c){0,2}d	-	abcd	abcd	c
409 | a(b|c){0,}d	-	ad	ad	-
410 | a(b|c){0,}d	-	abcd	abcd	c
411 | a(b|c){1,1}d	-	abd	abd	b
412 | a(b|c){1,1}d	-	acd	acd	c
413 | a(b|c){1,2}d	-	abd	abd	b
414 | a(b|c){1,2}d	-	abcd	abcd	c
415 | a(b|c){1,}d	-	abd	abd	b
416 | a(b|c){1,}d	-	abcd	abcd	c
417 | a(b|c){2,2}d	-	acbd	acbd	b
418 | a(b|c){2,2}d	-	abcd	abcd	c
419 | a(b|c){2,4}d	-	abcd	abcd	c
420 | a(b|c){2,4}d	-	abcbd	abcbd	b
421 | a(b|c){2,4}d	-	abcbcd	abcbcd	c
422 | a(b|c){2,}d	-	abcd	abcd	c
423 | a(b|c){2,}d	-	abcbd	abcbd	b
424 | a(b+|((c)*))+d	-	abd	abd	@d,@d,-
425 | a(b+|((c)*))+d	-	abcd	abcd	@d,@d,-
426 | 
427 | # check out the STARTEND option
428 | [abc]		&#	a(b)c	b
429 | [abc]		&#	a(d)c
430 | [abc]		&#	a(bc)d	b
431 | [abc]		&#	a(dc)d	c
432 | .		&#	a()c
433 | b.*c		&#	b(bc)c	bc
434 | b.*		&#	b(bc)c	bc
435 | .*c		&#	b(bc)c	bc
436 | 
437 | # plain strings, with the NOSPEC flag
438 | abc		m	abc	abc
439 | abc		m	xabcy	abc
440 | abc		m	xyz
441 | a*b		m	aba*b	a*b
442 | a*b		m	ab
443 | ""		mC	EMPTY
444 | 
445 | # cases involving NULs
446 | aZb		&	a	a
447 | aZb		&p	a
448 | aZb		&p#	(aZb)	aZb
449 | aZ*b		&p#	(ab)	ab
450 | a.b		&#	(aZb)	aZb
451 | a.*		&#	(aZb)c	aZb
452 | 
453 | # word boundaries (ick)
454 | [[:<:]]a	&	a	a
455 | [[:<:]]a	&	ba
456 | [[:<:]]a	&	-a	a
457 | a[[:>:]]	&	a	a
458 | a[[:>:]]	&	ab
459 | a[[:>:]]	&	a-	a
460 | [[:<:]]a.c[[:>:]]	&	axcd-dayc-dazce-abc	abc
461 | [[:<:]]a.c[[:>:]]	&	axcd-dayc-dazce-abc-q	abc
462 | [[:<:]]a.c[[:>:]]	&	axc-dayc-dazce-abc	axc
463 | [[:<:]]b.c[[:>:]]	&	a_bxc-byc_d-bzc-q	bzc
464 | [[:<:]].x..[[:>:]]	&	y_xa_-_xb_y-_xc_-axdc	_xc_
465 | [[:<:]]a_b[[:>:]]	&	x_a_b
466 | 
467 | # past problems, and suspected problems
468 | (A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A])	-	A1	A1
469 | abcdefghijklmnop	i	abcdefghijklmnop	abcdefghijklmnop
470 | abcdefghijklmnopqrstuv	i	abcdefghijklmnopqrstuv	abcdefghijklmnopqrstuv
471 | (ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN])	-	CC11	CC11
472 | CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a	-	CC11	CC11
473 | Char \([a-z0-9_]*\)\[.*	b	Char xyz[k	Char xyz[k	xyz
474 | a?b	-	ab	ab
475 | -\{0,1\}[0-9]*$	b	-5	-5
476 | 


--------------------------------------------------------------------------------
/src/re/utils.h:
--------------------------------------------------------------------------------
 1 | /* utility definitions */
 2 | #ifndef _POSIX2_RE_DUP_MAX
 3 | #define _POSIX2_RE_DUP_MAX   255
 4 | #endif
 5 | 
 6 | #define	DUPMAX		_POSIX2_RE_DUP_MAX	/* xxx is this right? */
 7 | #define	INFINITY	(DUPMAX + 1)
 8 | #define	NC		(CHAR_MAX - CHAR_MIN + 1)
 9 | typedef unsigned char uch;
10 | 
11 | /* switch off assertions (if not already off) if no REDEBUG */
12 | #ifndef REDEBUG
13 | #ifndef NDEBUG
14 | #define	NDEBUG	/* no assertions please */
15 | #endif
16 | #endif
17 | #include <assert.h>
18 | 
19 | /* for old systems with bcopy() but no memmove() */
20 | #ifdef USEBCOPY
21 | #define	memmove(d, s, c)	bcopy(s, d, c)
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/scheme-private.h:
--------------------------------------------------------------------------------
  1 | /* T I N Y S C H E M E    1 . 50
  2 |  *   Continued by armornick (March 2016)
  3 |  *   Original work by Dimitrios Souflis (dsouflis@acm.org)
  4 |  *   
  5 |  *   Based on MiniScheme (original credits follow)
  6 |  * (MINISCM)               coded by Atsushi Moriwaki (11/5/1989)
  7 |  * (MINISCM)           E-MAIL :  moriwaki@kurims.kurims.kyoto-u.ac.jp
  8 |  * (MINISCM) This version has been modified by R.C. Secrist.
  9 |  * (MINISCM)
 10 |  * (MINISCM) Mini-Scheme is now maintained by Akira KIDA.
 11 |  * (MINISCM)
 12 |  * (MINISCM) This is a revised and modified version by Akira KIDA.
 13 |  * (MINISCM)    current version is 0.85k4 (15 May 1994)
 14 |  * (MINISCM)
 15 |  *
 16 |  */
 17 | 
 18 | /* scheme-private.h */
 19 | 
 20 | #ifndef _SCHEME_PRIVATE_H
 21 | #define _SCHEME_PRIVATE_H
 22 | 
 23 | #include "scheme.h"
 24 | /*------------------ Ugly internals -----------------------------------*/
 25 | /*------------------ Of interest only to FFI users --------------------*/
 26 | 
 27 | #ifdef __cplusplus
 28 | extern "C" {
 29 | #endif
 30 | 
 31 | enum scheme_port_kind {
 32 |   port_free=0,
 33 |   port_file=1,
 34 |   port_string=2,
 35 |   port_srfi6=4,
 36 |   port_input=16,
 37 |   port_output=32,
 38 |   port_saw_EOF=64
 39 | };
 40 | 
 41 | typedef struct port {
 42 |   unsigned char kind;
 43 |   union {
 44 |     struct {
 45 |       FILE *file;
 46 |       int closeit;
 47 | #if SHOW_ERROR_LINE
 48 |       int curr_line;
 49 |       char *filename;
 50 | #endif
 51 |     } stdio;
 52 |     struct {
 53 |       char *start;
 54 |       char *past_the_end;
 55 |       char *curr;
 56 |     } string;
 57 |   } rep;
 58 | } port;
 59 | 
 60 | /* cell structure */
 61 | struct cell {
 62 |   unsigned int _flag;
 63 |   union {
 64 |     struct {
 65 |       char   *_svalue;
 66 |       int   _length;
 67 |     } _string;
 68 |     num _number;
 69 |     port *_port;
 70 |     foreign_func _ff;
 71 |     struct {
 72 |       struct cell *_car;
 73 |       struct cell *_cdr;
 74 |     } _cons;
 75 |   } _object;
 76 | };
 77 | 
 78 | struct scheme {
 79 |   /* arrays for segments */
 80 |   func_alloc malloc;
 81 |   func_dealloc free;
 82 | 
 83 |   /* return code */
 84 |   int retcode;
 85 |   int tracing;
 86 | 
 87 | 
 88 |   #define CELL_SEGSIZE    5000  /* # of cells in one segment */
 89 |   #define CELL_NSEGMENT   10    /* # of segments for cells */
 90 |   char *alloc_seg[CELL_NSEGMENT];
 91 |   pointer cell_seg[CELL_NSEGMENT];
 92 |   int     last_cell_seg;
 93 | 
 94 |   /* We use 4 registers. */
 95 |   pointer args;            /* register for arguments of function */
 96 |   pointer envir;           /* stack register for current environment */
 97 |   pointer code;            /* register for current code */
 98 |   pointer dump;            /* stack register for next evaluation */
 99 | 
100 |   int interactive_repl;    /* are we in an interactive REPL? */
101 | 
102 |   struct cell _sink;
103 |   pointer sink;            /* when mem. alloc. fails */
104 |   struct cell _NIL;
105 |   pointer NIL;             /* special cell representing empty cell */
106 |   struct cell _HASHT;
107 |   pointer T;               /* special cell representing #t */
108 |   struct cell _HASHF;
109 |   pointer F;               /* special cell representing #f */
110 |   struct cell _EOF_OBJ;
111 |   pointer EOF_OBJ;         /* special cell representing end-of-file object */
112 |   pointer oblist;          /* pointer to symbol table */
113 |   pointer global_env;      /* pointer to global environment */
114 |   pointer c_nest;          /* stack for nested calls from C */
115 | 
116 |   /* global pointers to special symbols */
117 |   pointer LAMBDA;               /* pointer to syntax lambda */
118 |   pointer QUOTE;           /* pointer to syntax quote */
119 | 
120 |   pointer QQUOTE;               /* pointer to symbol quasiquote */
121 |   pointer UNQUOTE;         /* pointer to symbol unquote */
122 |   pointer UNQUOTESP;       /* pointer to symbol unquote-splicing */
123 |   pointer FEED_TO;         /* => */
124 |   pointer COLON_HOOK;      /* *colon-hook* */
125 |   pointer ERROR_HOOK;      /* *error-hook* */
126 |   pointer SHARP_HOOK;  /* *sharp-hook* */
127 |   pointer COMPILE_HOOK;  /* *compile-hook* */
128 | 
129 |   pointer free_cell;       /* pointer to top of free cells */
130 |   long    fcells;          /* # of free cells */
131 | 
132 |   pointer inport;
133 |   pointer outport;
134 |   pointer save_inport;
135 |   pointer loadport;
136 | 
137 |   #define MAXFIL 64
138 |   port load_stack[MAXFIL];     /* Stack of open files for port -1 (LOADing) */
139 |   int nesting_stack[MAXFIL];
140 |   int file_i;
141 |   int nesting;
142 | 
143 |   char    gc_verbose;      /* if gc_verbose is not zero, print gc status */
144 |   char    no_memory;       /* Whether mem. alloc. has failed */
145 | 
146 |   #define LINESIZE 1024
147 |   char    linebuff[LINESIZE];
148 |   #define STRBUFFSIZE 256
149 |   char    strbuff[STRBUFFSIZE];
150 | 
151 |   FILE *tmpfp;
152 |   int tok;
153 |   int print_flag;
154 |   pointer value;
155 |   int op;
156 | 
157 |   void *ext_data;     /* For the benefit of foreign functions */
158 |   long gensym_cnt;
159 | 
160 |   struct scheme_interface *vptr;
161 |   void *dump_base;    /* pointer to base of allocated dump stack */
162 |   int dump_size;      /* number of frames allocated for dump stack */
163 | };
164 | 
165 | /* operator code */
166 | enum scheme_opcodes {
167 | #define _OP_DEF(A,B,C,D,E,OP) OP,
168 | #include "opdefines.h"
169 |   OP_MAXDEFINED
170 | };
171 | 
172 | 
173 | pointer _cons(scheme *sc, pointer a, pointer b, int immutable);
174 | pointer mk_integer(scheme *sc, long num);
175 | pointer mk_real(scheme *sc, double num);
176 | pointer mk_symbol(scheme *sc, const char *name);
177 | pointer gensym(scheme *sc);
178 | pointer mk_string(scheme *sc, const char *str);
179 | pointer mk_counted_string(scheme *sc, const char *str, int len);
180 | pointer mk_empty_string(scheme *sc, int len, char fill);
181 | pointer mk_character(scheme *sc, int c);
182 | pointer mk_foreign_func(scheme *sc, foreign_func f);
183 | void putstr(scheme *sc, const char *s);
184 | int list_length(scheme *sc, pointer a);
185 | int eqv(pointer a, pointer b);
186 | 
187 | 
188 | int is_string(pointer p);
189 | char *string_value(pointer p);
190 | int is_number(pointer p);
191 | num nvalue(pointer p);
192 | long ivalue(pointer p);
193 | double rvalue(pointer p);
194 | int is_integer(pointer p);
195 | int is_real(pointer p);
196 | int is_character(pointer p);
197 | long charvalue(pointer p);
198 | int is_vector(pointer p);
199 | 
200 | int is_port(pointer p);
201 | 
202 | int is_pair(pointer p);
203 | pointer pair_car(pointer p);
204 | pointer pair_cdr(pointer p);
205 | pointer set_car(pointer p, pointer q);
206 | pointer set_cdr(pointer p, pointer q);
207 | 
208 | int is_symbol(pointer p);
209 | char *symname(pointer p);
210 | int hasprop(pointer p);
211 | 
212 | int is_syntax(pointer p);
213 | int is_proc(pointer p);
214 | int is_foreign(pointer p);
215 | char *syntaxname(pointer p);
216 | int is_closure(pointer p);
217 | #ifdef USE_MACRO
218 | int is_macro(pointer p);
219 | #endif
220 | pointer closure_code(pointer p);
221 | pointer closure_env(pointer p);
222 | 
223 | int is_continuation(pointer p);
224 | int is_promise(pointer p);
225 | int is_environment(pointer p);
226 | int is_immutable(pointer p);
227 | void setimmutable(pointer p);
228 | 
229 | pointer reverse(scheme *sc, pointer a);
230 | pointer reverse_in_place(scheme *sc, pointer term, pointer list);
231 | void fill_vector(pointer vec, pointer obj);
232 | pointer vector_elem(pointer vec, int ielem);
233 | pointer set_vector_elem(pointer vec, int ielem, pointer a);
234 | 
235 | #ifdef __cplusplus
236 | }
237 | #endif
238 | 
239 | #endif
240 | 
241 | 


--------------------------------------------------------------------------------
/src/scheme.h:
--------------------------------------------------------------------------------
  1 | /* T I N Y S C H E M E    1 . 50
  2 |  *   Continued by armornick (March 2016)
  3 |  *   Original work by Dimitrios Souflis (dsouflis@acm.org)
  4 |  *   
  5 |  *   Based on MiniScheme (original credits follow)
  6 |  * (MINISCM)               coded by Atsushi Moriwaki (11/5/1989)
  7 |  * (MINISCM)           E-MAIL :  moriwaki@kurims.kurims.kyoto-u.ac.jp
  8 |  * (MINISCM) This version has been modified by R.C. Secrist.
  9 |  * (MINISCM)
 10 |  * (MINISCM) Mini-Scheme is now maintained by Akira KIDA.
 11 |  * (MINISCM)
 12 |  * (MINISCM) This is a revised and modified version by Akira KIDA.
 13 |  * (MINISCM)    current version is 0.85k4 (15 May 1994)
 14 |  * (MINISCM)
 15 |  *
 16 |  */
 17 | 
 18 | #ifndef _SCHEME_H
 19 | #define _SCHEME_H
 20 | 
 21 | #include "config.h"
 22 | 
 23 | #include <stdio.h>
 24 | 
 25 | #ifdef __cplusplus
 26 | extern "C" {
 27 | #endif
 28 | 
 29 | typedef struct scheme scheme;
 30 | typedef struct cell *pointer;
 31 | 
 32 | typedef void * (*func_alloc)(size_t);
 33 | typedef void (*func_dealloc)(void *);
 34 | 
 35 | /* num, for generic arithmetic */
 36 | typedef struct num {
 37 |      char is_fixnum;
 38 |      union {
 39 |           long ivalue;
 40 |           double rvalue;
 41 |      } value;
 42 | } num;
 43 | 
 44 | SCHEME_EXPORT scheme *scheme_init_new();
 45 | SCHEME_EXPORT scheme *scheme_init_new_custom_alloc(func_alloc malloc, func_dealloc free);
 46 | SCHEME_EXPORT int scheme_init(scheme **sc);
 47 | SCHEME_EXPORT int scheme_init_custom_alloc(scheme **sc, func_alloc, func_dealloc);
 48 | SCHEME_EXPORT void scheme_deinit(scheme *sc);
 49 | SCHEME_EXPORT void scheme_set_input_port_file(scheme *sc, FILE *fin);
 50 | SCHEME_EXPORT void scheme_set_input_port_string(scheme *sc, char *start, char *past_the_end);
 51 | SCHEME_EXPORT void scheme_set_output_port_file(scheme *sc, FILE *fin);
 52 | SCHEME_EXPORT void scheme_set_output_port_string(scheme *sc, char *start, char *past_the_end);
 53 | SCHEME_EXPORT void scheme_load_file(scheme *sc, FILE *fin);
 54 | SCHEME_EXPORT void scheme_load_named_file(scheme *sc, FILE *fin, const char *filename);
 55 | SCHEME_EXPORT void scheme_load_string(scheme *sc, const char *cmd);
 56 | SCHEME_EXPORT pointer scheme_apply0(scheme *sc, const char *procname);
 57 | SCHEME_EXPORT pointer scheme_call(scheme *sc, pointer func, pointer args);
 58 | SCHEME_EXPORT pointer scheme_eval(scheme *sc, pointer obj);
 59 | SCHEME_EXPORT void scheme_set_external_data(scheme *sc, void *p);
 60 | SCHEME_EXPORT void scheme_define(scheme *sc, pointer env, pointer symbol, pointer value);
 61 | 
 62 | typedef pointer (*foreign_func)(scheme *, pointer);
 63 | 
 64 | SCHEME_EXPORT pointer scheme_cons(scheme *sc, pointer a, pointer b, int immutable);
 65 | SCHEME_EXPORT pointer scheme_integer(scheme *sc, long num);
 66 | SCHEME_EXPORT pointer scheme_real(scheme *sc, double num);
 67 | SCHEME_EXPORT pointer scheme_symbol(scheme *sc, const char *name);
 68 | SCHEME_EXPORT pointer scheme_gensym(scheme *sc);
 69 | SCHEME_EXPORT pointer scheme_string(scheme *sc, const char *str);
 70 | SCHEME_EXPORT pointer scheme_counted_string(scheme *sc, const char *str, int len);
 71 | SCHEME_EXPORT pointer scheme_empty_string(scheme *sc, int len, char fill);
 72 | SCHEME_EXPORT pointer scheme_character(scheme *sc, int c);
 73 | SCHEME_EXPORT pointer scheme_foreign_func(scheme *sc, foreign_func f);
 74 | SCHEME_EXPORT void scheme_putstr(scheme *sc, const char *s);
 75 | SCHEME_EXPORT int scheme_list_length(scheme *sc, pointer a);
 76 | SCHEME_EXPORT int scheme_eqv(pointer a, pointer b);
 77 | 
 78 | SCHEME_EXPORT int scheme_is_string(pointer p);
 79 | SCHEME_EXPORT char* scheme_string_value(pointer p);
 80 | SCHEME_EXPORT int scheme_is_number(pointer p);
 81 | SCHEME_EXPORT num scheme_nvalue(pointer p);
 82 | SCHEME_EXPORT long scheme_ivalue(pointer p);
 83 | SCHEME_EXPORT double scheme_rvalue(pointer p);
 84 | SCHEME_EXPORT int scheme_is_integer(pointer p);
 85 | SCHEME_EXPORT int scheme_is_real(pointer p);
 86 | SCHEME_EXPORT int scheme_is_character(pointer p);
 87 | SCHEME_EXPORT long scheme_charvalue(pointer p);
 88 | SCHEME_EXPORT int scheme_is_vector(pointer p);
 89 | 
 90 | SCHEME_EXPORT int scheme_is_port(pointer p);
 91 | 
 92 | SCHEME_EXPORT int scheme_is_pair(pointer p);
 93 | SCHEME_EXPORT pointer scheme_pair_car(pointer p);
 94 | SCHEME_EXPORT pointer scheme_pair_cdr(pointer p);
 95 | SCHEME_EXPORT pointer scheme_set_car(pointer p, pointer q);
 96 | SCHEME_EXPORT pointer scheme_set_cdr(pointer p, pointer q);
 97 | 
 98 | SCHEME_EXPORT int scheme_is_symbol(pointer p);
 99 | SCHEME_EXPORT char* scheme_symname(pointer p);
100 | 
101 | SCHEME_EXPORT int scheme_is_syntax(pointer p);
102 | SCHEME_EXPORT int scheme_is_proc(pointer p);
103 | SCHEME_EXPORT int scheme_is_foreign(pointer p);
104 | SCHEME_EXPORT char* scheme_syntaxname(pointer p);
105 | SCHEME_EXPORT int scheme_is_closure(pointer p);
106 | 
107 | #ifdef USE_MACRO
108 | SCHEME_EXPORT int scheme_is_macro(pointer p);
109 | #endif
110 | 
111 | SCHEME_EXPORT int scheme_is_continuation(pointer p);
112 | SCHEME_EXPORT int scheme_is_promise(pointer p);
113 | SCHEME_EXPORT int scheme_is_environment(pointer p);
114 | SCHEME_EXPORT int scheme_is_immutable(pointer p);
115 | SCHEME_EXPORT void scheme_setimmutable(pointer p);
116 | 
117 | 
118 | SCHEME_EXPORT int scheme_retcode(scheme *sc);
119 | SCHEME_EXPORT pointer scheme_global_env(scheme *sc);
120 | SCHEME_EXPORT pointer scheme_nil(scheme *sc);
121 | SCHEME_EXPORT pointer scheme_true(scheme *sc);
122 | SCHEME_EXPORT pointer scheme_false(scheme *sc);
123 | SCHEME_EXPORT void scheme_memory_error(scheme *sc);
124 | 
125 | SCHEME_EXPORT pointer scheme_reverse(scheme *sc, pointer a);
126 | SCHEME_EXPORT pointer scheme_reverse_in_place(scheme *sc, pointer term, pointer list);
127 | SCHEME_EXPORT void scheme_fill_vector(pointer vec, pointer obj);
128 | SCHEME_EXPORT pointer scheme_vector_elem(pointer vec, int ielem);
129 | SCHEME_EXPORT pointer scheme_set_vector_elem(pointer vec, int ielem, pointer a);
130 | 
131 | 
132 | 
133 | #define cons(sc,a,b) scheme_cons(sc,a,b,0)
134 | #define immutable_cons(sc,a,b) scheme_cons(sc,a,b,1)
135 | 
136 | 
137 | struct scheme_interface {
138 |   void (*scheme_define)(scheme *sc, pointer env, pointer symbol, pointer value);
139 |   pointer (*cons)(scheme *sc, pointer a, pointer b);
140 |   pointer (*immutable_cons)(scheme *sc, pointer a, pointer b);
141 |   pointer (*reserve_cells)(scheme *sc, int n);
142 |   pointer (*mk_integer)(scheme *sc, long num);
143 |   pointer (*mk_real)(scheme *sc, double num);
144 |   pointer (*mk_symbol)(scheme *sc, const char *name);
145 |   pointer (*gensym)(scheme *sc);
146 |   pointer (*mk_string)(scheme *sc, const char *str);
147 |   pointer (*mk_counted_string)(scheme *sc, const char *str, int len);
148 |   pointer (*mk_character)(scheme *sc, int c);
149 |   pointer (*mk_vector)(scheme *sc, int len);
150 |   pointer (*mk_foreign_func)(scheme *sc, foreign_func f);
151 |   void (*putstr)(scheme *sc, const char *s);
152 |   void (*putcharacter)(scheme *sc, int c);
153 | 
154 |   int (*is_string)(pointer p);
155 |   char *(*string_value)(pointer p);
156 |   int (*is_number)(pointer p);
157 |   num (*nvalue)(pointer p);
158 |   long (*ivalue)(pointer p);
159 |   double (*rvalue)(pointer p);
160 |   int (*is_integer)(pointer p);
161 |   int (*is_real)(pointer p);
162 |   int (*is_character)(pointer p);
163 |   long (*charvalue)(pointer p);
164 |   int (*is_list)(scheme *sc, pointer p);
165 |   int (*is_vector)(pointer p);
166 |   int (*list_length)(scheme *sc, pointer vec);
167 |   long (*vector_length)(pointer vec);
168 |   void (*fill_vector)(pointer vec, pointer elem);
169 |   pointer (*vector_elem)(pointer vec, int ielem);
170 |   pointer (*set_vector_elem)(pointer vec, int ielem, pointer newel);
171 |   int (*is_port)(pointer p);
172 | 
173 |   int (*is_pair)(pointer p);
174 |   pointer (*pair_car)(pointer p);
175 |   pointer (*pair_cdr)(pointer p);
176 |   pointer (*set_car)(pointer p, pointer q);
177 |   pointer (*set_cdr)(pointer p, pointer q);
178 | 
179 |   int (*is_symbol)(pointer p);
180 |   char *(*symname)(pointer p);
181 | 
182 |   int (*is_syntax)(pointer p);
183 |   int (*is_proc)(pointer p);
184 |   int (*is_foreign)(pointer p);
185 |   char *(*syntaxname)(pointer p);
186 |   int (*is_closure)(pointer p);
187 |   int (*is_macro)(pointer p);
188 |   pointer (*closure_code)(pointer p);
189 |   pointer (*closure_env)(pointer p);
190 | 
191 |   int (*is_continuation)(pointer p);
192 |   int (*is_promise)(pointer p);
193 |   int (*is_environment)(pointer p);
194 |   int (*is_immutable)(pointer p);
195 |   void (*setimmutable)(pointer p);
196 |   void (*load_file)(scheme *sc, FILE *fin);
197 |   void (*load_string)(scheme *sc, const char *input);
198 | };
199 | 
200 | typedef struct scheme_interface scheme_interface;
201 | 
202 | SCHEME_EXPORT void scheme_init_interface(scheme_interface *sci);
203 | 
204 | typedef struct scheme_registerable
205 | {
206 |   foreign_func  f;
207 |   const char *  name;
208 | } scheme_registerable;
209 | 
210 | void scheme_register_foreign_func_list(scheme * sc,
211 |                                        scheme_registerable * list,
212 |                                        int n);
213 | 
214 | #ifdef __cplusplus
215 | }
216 | #endif
217 | 
218 | #endif
219 | 


--------------------------------------------------------------------------------