├── .gitignore ├── CHANGES ├── CMakeLists.txt ├── LICENSE ├── README.md ├── docs ├── Manual.txt ├── hack.txt └── regex-module.txt └── src ├── BUILDING ├── CHANGES ├── COPYING ├── Manual.txt ├── MiniSCHEMETribute.txt ├── api.c ├── config.h ├── dynload.c ├── dynload.h ├── hack.txt ├── init.scm ├── main.c ├── opdefines.h ├── re ├── COPYRIGHT ├── Makefile.in ├── README ├── README.1st ├── WHATSNEW ├── cclass.h ├── cname.h ├── debug.c ├── debug.ih ├── engine.c ├── engine.ih ├── main.c ├── main.ih ├── mkh ├── re.c ├── re.makefile ├── re.scm ├── regcomp.c ├── regcomp.ih ├── regerror.c ├── regerror.ih ├── regex.001 ├── regex.3 ├── regex.7 ├── regex.h ├── regex2.h ├── regexec.c ├── regfree.c ├── split.c ├── tests └── utils.h ├── scheme-private.h ├── scheme.c └── scheme.h /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/windows,linux,cmake,c++,c 3 | 4 | ### Windows ### 5 | # Windows image file caches 6 | Thumbs.db 7 | ehthumbs.db 8 | 9 | # Folder config file 10 | Desktop.ini 11 | 12 | # Recycle Bin used on file shares 13 | $RECYCLE.BIN/ 14 | 15 | # Windows Installer files 16 | *.cab 17 | *.msi 18 | *.msm 19 | *.msp 20 | 21 | # Windows shortcuts 22 | *.lnk 23 | 24 | 25 | ### Linux ### 26 | *~ 27 | 28 | # temporary files which can be created if a process still has a handle open of a deleted file 29 | .fuse_hidden* 30 | 31 | # KDE directory preferences 32 | .directory 33 | 34 | # Linux trash folder which might appear on any partition or disk 35 | .Trash-* 36 | 37 | 38 | ### CMake ### 39 | CMakeCache.txt 40 | CMakeFiles 41 | CMakeScripts 42 | Makefile 43 | cmake_install.cmake 44 | install_manifest.txt 45 | 46 | 47 | ### C++ ### 48 | # Compiled Object files 49 | *.slo 50 | *.lo 51 | *.o 52 | *.obj 53 | 54 | # Precompiled Headers 55 | *.gch 56 | *.pch 57 | 58 | # Compiled Dynamic libraries 59 | *.so 60 | *.dylib 61 | *.dll 62 | 63 | # Fortran module files 64 | *.mod 65 | 66 | # Compiled Static libraries 67 | *.lai 68 | *.la 69 | *.a 70 | *.lib 71 | 72 | # Executables 73 | *.exe 74 | *.out 75 | *.app 76 | 77 | 78 | ### C ### 79 | # Object files 80 | *.o 81 | *.ko 82 | *.obj 83 | *.elf 84 | 85 | # Precompiled Headers 86 | *.gch 87 | *.pch 88 | 89 | # Libraries 90 | *.lib 91 | *.a 92 | *.la 93 | *.lo 94 | 95 | # Shared objects (inc. Windows DLLs) 96 | *.dll 97 | *.so 98 | *.so.* 99 | *.dylib 100 | 101 | # Executables 102 | *.exe 103 | *.out 104 | *.app 105 | *.i*86 106 | *.x86_64 107 | *.hex 108 | 109 | # Debug files 110 | *.dSYM/ 111 | 112 | # ------------------------------------------------------- 113 | # Project specific ignores 114 | 115 | # out-of-source build directories 116 | build-*/ 117 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | Change Log 2 | ---------- 3 | 4 | Version 1.50 5 | - extended the public API and properly namespaced everything 6 | - separated the main tinyscheme shell into a separate file 7 | - made source compatible with Visual C++ (and consequently C89) 8 | - added regex module and updated it to use the new API 9 | 10 | Version 1.41 11 | Bugs fixed: 12 | #3020389 - Added makefile section for Mac OS X (SL) 13 | #3286135 - Fixed num_mod routine which caused errors in use of modulo 14 | #3290232 - Corrected version number shown on startup (GM) 15 | #3394882 - Added missing #if in opdefines.h around get and put (DC) 16 | #3395547 - Fix for the modulo procedure (DC) 17 | #3400290 - Optimized append to make it an O(n) operation (DC) 18 | #3493926 - Corrected flag used when building shared files on OSX (J) 19 | 20 | R5RS related changes: 21 | #2866196 - Parser does not handle delimiters correctly 22 | #3395548 - Add a decimal point to inexact numbers in atom2str (DC) 23 | #3399331 - Make min/max return inexact when any argument is inexact 24 | #3399332 - Compatability fix for expt. 25 | #3399335 - Optional radix for string->number and number->string (DC) 26 | #3400202 - Append with one argument should not return a list (DC) 27 | #3400284 - Compatability fix for integer? 28 | 29 | Other changes: 30 | - Added flags to makefile for MinGW/MSYS (TC) 31 | - Moved variable declarations to avoid warnings with some compilers 32 | - Don't print space after initial #( when printing vectors. 33 | - Minor optimization for is_nonneg(). 34 | - No need to round integers in OP_ROUND (#3400284) 35 | - Fixes to code that reports line number with error (RC) 36 | 37 | Contributors: 38 | Kevin Cozens, Gordon McNutt, Doug Currie, Sean Long, Tim Cas, Joey, 39 | Richard Copley, and CMarinier. 40 | 41 | Version 1.40 42 | Bugs fixed: 43 | #1964950 - Stop core dumps due to bad syntax in LET (and variants) 44 | #2826594 - allow reverse to work on empty list (Tony Garnock-Jones) 45 | Potential problem of arglist to foreign calls being wrongly GC'ed. 46 | Fixed bug that read could loop forever (tehom). 47 | 48 | API changes: 49 | Exposed is_list and list_length. 50 | Added scheme_register_foreign_func_list and declarations for it (tehom) 51 | Defined *compile-hook* (tehom) 52 | 53 | Other changes: 54 | Updated is_list and list_length to handle circular lists. 55 | Nested calling thru C has been made now safer (tehom) 56 | Peter Michaux cleaned up port_rep_from_file 57 | Added unwind-protect (tehom) 58 | Some cleanups to in/outport and Eval_Cycle by Peter Michaux 59 | Report error line number (Mostly by Sanel Zukan, back-compatibility by Tehom) 60 | 61 | Contributors: 62 | Kevin Cozens, Dimitrios Souflis, Tom Breton, Peter Michaux, Sanel Zukan, 63 | and Tony Garnock-Jones. 64 | 65 | Version 1.39 66 | Bugs fixed: 67 | Fix for the load bug 68 | Fixed parsing of octal coded characters. Fixes bug #1818018. 69 | Added tests for when mk_vector is out of memory. Can't rely on sc->sink. 70 | Fix for bug #1794369 71 | Finished feature-request 1599947: scheme_apply0 etc return values. 72 | Partly provided feature-request 1599947: Expose list_length, eqv, etc 73 | Provided feature-request 1599945, Scheme->C->Scheme calling. 74 | Fix for bug 1593861 (behavior of is_integer) 75 | Fix for bug 1589711 76 | Error checking of binding spec syntax in LET and LETREC. The bad syntax 77 | was causing a segmentation fault in Linux. Complete fixes for bug #1817986. 78 | Error checking of binding spec syntax in LET* 79 | Bad syntax was causing core dump in Linux. 80 | Fix for nasty gc bug 81 | 82 | R5RS changes: 83 | R5RS requires numbers to be of equal value AND of the same type (ie. both 84 | exact or inexact) in order to return #t from eqv?. R5RS compliance fix. 85 | String output ports now conform to SRFI-6 86 | 87 | Other changes: 88 | Drew Yao fixed buffer overflow problems in mk_sharp_const. 89 | put OP_T0LVL in charge of reacting to EOF 90 | file_push checks array bounds (patch from Ray Lehtiniemi) 91 | Changed to always use snprintf (Patch due to Ramiro bsd1628) 92 | Updated usage information using text from the Manual.txt file. 93 | 94 | Version 1.38 95 | Interim release until the rewrite, mostly incorporating modifications 96 | from Kevin Cozens. Small addition for Cygwin in the makefile, and 97 | modifications by Andrew Guenther for Apple platforms. 98 | 99 | Version 1.37 100 | Joe Buehler submitted reserve_cells. 101 | 102 | Version 1.36 103 | Joe Buehler fixed a patch in the allocator. 104 | Alexander Shendi moved the comment handling in the scanner, which 105 | fixed an obscure bug for which Mike E had provided a patch as well. 106 | Kevin Cozens has submitted some fixes and modifications which have 107 | not been incorporated yet in their entirety. 108 | 109 | Version 1.35 110 | Todd Showalter discovered that the number of free cells reported 111 | after GC was incorrect, which could also cause unnecessary allocations. 112 | 113 | Version 1.34 114 | Long missing version. Lots of bugfixes have accumulated in my email, so 115 | I had to start using them. In this version, Keenan Pepper has submitted 116 | a bugfix for the string comparison library procedure, Wouter Boeke 117 | modified some code that was casting to the wrong type and crashed on 118 | some machines, "SheppardCo" submitted a replacement "modulo" code and 119 | Scott Fenton submitted lots of corrections that shut up some compiler 120 | warnings. Brian Maher submitted instructions on how to build on OS-X. 121 | I have to dig deeper into my mailbox and find earlier emails, too. 122 | 123 | Version 1.33 124 | Charles Hayden fixed a nasty GC bug of the new stack frame, while in 125 | the process of porting TinyScheme to C++. He also submitted other 126 | changes, and other people also had comments or requests, but the GC 127 | bug was so important that this version is put through the door to 128 | correct it. 129 | 130 | Version 1.32 131 | Stephen Gildea put some quality time on TinyScheme again, and made 132 | a whole lot of changes to the interpreter that made it noticeably 133 | faster. 134 | 135 | Version 1.31 136 | Patches to the hastily-done version 1.30. Stephen Gildea fixed 137 | some things done wrongly, and Richard Russo fixed the makefile 138 | for building on Windows. Property lists (heritage from MiniScheme) 139 | are now optional and have dissappeared from the interface. They 140 | should be considered as deprecated. 141 | 142 | Version 1.30 143 | After many months, I followed Preston Bannister's advice of 144 | using macros and a single source text to keep the enums and the 145 | dispatch table in sync, and I used his contributed "opdefines.h". 146 | Timothy Downs contributed a helpful function, "scheme_call". 147 | Stephen Gildea contributed new versions of the makefile and 148 | practically all other sources. He created a built-in STRING-APPEND, 149 | and fixed a lot of other bugs. 150 | Ruhi Bloodworth reported fixes necessary for OS X and a small 151 | bug in dynload.c. 152 | 153 | Version 1.29 154 | The previous version contained a lot of corrections, but there 155 | were a lot more that still wait on a sheet of paper lost in a 156 | carton someplace after my house move... Manuel Heras-Gilsanz 157 | noticed this and resent his own contribution, which relies on 158 | another bugfix that v.1.28 was missing: a problem with string 159 | output, that this version fixes. I hope other people will take 160 | the time to resend their contributions, if they didn't make it 161 | to v.1.28. 162 | 163 | Version 1.28 164 | Many people have contacted me with bugfixes or remarks in 165 | the three months I was inactive. A lot of them spotted that 166 | scheme_deinit crashed while reporting gc results. They suggested 167 | that sc->outport be set to NIL in scheme_deinit, which I did. 168 | Dennis Taylor remarked that OP_VALUEPRINT reset sc->value instead 169 | of preserving it. He submitted a modification which I adopted 170 | partially. David Hovemeyer sent me many little changes, that you 171 | will find in version 1.28, and Partice Stoessel modified the 172 | float reader to conform to R5RS. 173 | 174 | Version 1.27 175 | Version 1.27 is the successor of 1.25. Bug fixes only, but I had to 176 | release them so that everybody can profit. 'Backchar' tried to write 177 | back to the string, which obviously didn't work for const strings. 178 | 'Substring' didn't check for crossed start and end indices. Defines 179 | changed to restore the ability to compile under MSVC. 180 | 181 | Version 1.26 182 | Version 1.26 was never released. I changed a lot of things, in fact 183 | too much, even the garbage collector, and hell broke loose. I'll 184 | try a more gradual approach next time. 185 | 186 | Version 1.25 187 | Types have been homogenized to be able to accommodate a different 188 | representation. Plus, promises are no longer closures. Unfortunately, 189 | I discovered that continuations and force/delay do not pass the SCM 190 | test (and never did)... However, on the bright side, what little 191 | modifications I did had a large impact on the footprint: 192 | USE_NO_FEATURES now produces an object file of 63960 bytes on Linux! 193 | 194 | Version 1.24 195 | SCM tests now pass again after change in atom2str. 196 | 197 | Version 1.23 198 | Finally I managed to mess it up with my version control. Version 199 | 1.22 actually lacked some of the things I have been fixing in the 200 | meantime. This should be considered as a complete replacement for 201 | 1.22. 202 | 203 | Version 1.22 204 | The new ports had a bug in LOAD. MK_CLOSURE is introduced. 205 | Shawn Wagner inquired about string->number and number->string. 206 | I added string->atom and atom->string and defined the number 207 | functions from them. Doing that, I fixed WRITE applied to symbols 208 | (it didn't quote them). Unfortunately, minimum build is now 209 | slightly larger than 64k... I postpone action because Jason's idea 210 | might solve it elegantly. 211 | 212 | Version 1.21 213 | Jason Felice submitted a radically different datatype representation 214 | which he had implemented. While discussing its pros and cons, it 215 | became apparent that the current implementation of ports suffered 216 | from a grave fault: ports were not garbage-collected. I changed the 217 | ports to be heap-allocated, which enabled the use of string ports 218 | for loading. Jason also fixed errors in the garbage collection of 219 | vectors. USE_VERBATIM is gone. "ssp_compiler.c" has a better solution 220 | on HTML generation. A bug involving backslash notation in strings 221 | has been fixed. '-c' flag now executes next argument as a stream of 222 | Scheme commands. Foreign functions are now also heap allocated, 223 | and scheme_define is used to define everything. 224 | 225 | Version 1.20 226 | Tracing has been added. The toplevel loop has been slightly 227 | rearranged. Backquote reading for vector templates has been 228 | sanitized. Symbol interning is now correct. Arithmetic functions 229 | have been corrected. APPLY, MAP, FOR-EACH, numeric comparison 230 | functions fixed. String reader/writer understands \xAA notation. 231 | 232 | Version 1.19 233 | Carriage Return now delimits identifiers. DOS-formatted Scheme files 234 | can be used by Unix. Random number generator added to library. 235 | Fixed some glitches of the new type-checking scheme. Fixed erroneous 236 | (append '() 'a) behavior. Will continue with r4rstest.scm to 237 | fix errors. 238 | 239 | Version 1.18 240 | The FFI has been extended. USE_VERBOSE_GC has gone. Anyone wanting 241 | the same functionality can put (gcverbose #t) in init.scm. 242 | print-width was removed, along with three corresponding op-codes. 243 | Extended character constants with ASCII names were added. 244 | mk_counted_string paves the way for full support of binary strings. 245 | As much as possible of the type-checking chores were delegated 246 | to the inner loop, thus reducing the code size to less than 4200 loc! 247 | 248 | Version 1.17 249 | Dynamically-loaded extensions are more fully integrated. 250 | TinyScheme is now distributed under the BSD open-source license. 251 | 252 | Version 1.16 253 | Dynamically-loaded extensions introduced (USE_DL). 254 | Santeri Paavolainen found a race condition: When a cons is executed, 255 | and each of the two arguments is a constructing function, GC could 256 | happen before all arguments are evaluated and cons() is called, and 257 | the evaluated arguments would all be reclaimed! 258 | Fortunately, such a case was rare in the code, although it is 259 | a pitfall in new code and code in foreign functions. Currently, only 260 | one such case remains, when COLON_HOOK is defined. 261 | 262 | Version 1.15 263 | David Gould also contributed some changes that speed up operation. 264 | Kirk Zurell fixed HASPROP. 265 | The Garbage Collection didn't collect all the garbage...fixed. 266 | 267 | Version 1.14 268 | Unfortunately, after Andre fixed the GC it became obvious that the 269 | algorithm was too slow... Fortunately, David Gould found a way to 270 | speed it up. 271 | 272 | Version 1.13 273 | Silly bug involving division by zero resolved by Roland Kaufman. 274 | Macintoch support from Shmulik Regev. 275 | Float parser bug fixed by Alexander Shendi. 276 | GC bug from Andru Luvisi. 277 | 278 | Version 1.12 279 | Cis* incorrectly called isalpha() instead of isascii() 280 | Added USE_CHAR_CLASSIFIERS, USE_STRING_PORTS. 281 | 282 | Version 1.11 283 | BSDI defines isnumber... changed all similar functions to is_* 284 | EXPT now has correct definition. Added FLOOR,CEILING,TRUNCATE 285 | and ROUND, courtesy of Bengt Kleberg. Preprocessor symbols now 286 | have values 1 or 0, and can be set as compiler defines (proposed 287 | by Andy Ganor *months* ago). 'prompt' and 'InitFile' can now be 288 | defined during compilation, too. 289 | 290 | Version 1.10 291 | Another bug when file ends with comment! 292 | Added DEFINE-MACRO in init.scm, courtesy of Andy Gaynor. 293 | 294 | Version 1.09 295 | Removed bug when READ met EOF. lcm. 296 | 297 | Version 1.08 298 | quotient,remainder and modulo. gcd. 299 | 300 | Version 1.07 301 | '=>' in cond now exists 302 | list? now checks for circularity 303 | some reader bugs removed 304 | Reader is more consistent wrt vectors 305 | Quote and Quasiquote work with vectors 306 | 307 | Version 1.06 308 | #! is now skipped 309 | generic-assoc bug removed 310 | strings are now managed differently, hack.txt is removed 311 | various delicate points fixed 312 | 313 | Version 1.05 314 | Support for scripts, *args*, "-1" option. 315 | Various R5RS procedures. 316 | *sharp-hook* 317 | Handles unmatched parentheses. 318 | New architecture for procedures. 319 | 320 | Version 1.04 321 | Added missing T_ATOM bits... 322 | Added vectors 323 | Free-list is sorted by address, since vectors need consecutive cells. 324 | (quit ) for use with scripts 325 | 326 | Version 1.03 (26 Aug 1998): 327 | Extended .h with useful functions for FFI 328 | Library: with-input-* etc. 329 | Finished R5RS I/O, added string ports. 330 | 331 | Version 1.02 (25 Aug 1998): 332 | First part of R5RS I/O. 333 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.1) 2 | project (tinyscheme) 3 | 4 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 5 | # set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 6 | # set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 7 | 8 | # remove 'lib' prefix for dlls 9 | if(WIN32 AND CMAKE_COMPILER_IS_GNUCXX) 10 | set(CMAKE_SHARED_LIBRARY_PREFIX "") 11 | endif() 12 | 13 | # ---------------------------------------------------------------------------- 14 | 15 | set (TINYSCHEME_DIR "src") 16 | set (RE_MODULE_DIR "src/re") 17 | 18 | # ---------------------------------------------------------------------------- 19 | 20 | set (TINYSCHEME_SRCS 21 | ${TINYSCHEME_DIR}/scheme.c 22 | ${TINYSCHEME_DIR}/api.c 23 | ${TINYSCHEME_DIR}/dynload.c 24 | ) 25 | 26 | set(TINYSCHEME_APP_SRCS 27 | ${TINYSCHEME_DIR}/main.c 28 | ) 29 | 30 | if (WIN32) 31 | 32 | set(TINYSCHEME_DEFS 33 | USE_STRLWR=0 USE_DL=1 USE_MATH=1 USE_ASCII_NAMES=0 34 | ) 35 | 36 | if (MSVC) 37 | set(TINYSCHEME_DEFS ${TINYSCHEME_DEFS} _CRT_SECURE_NO_WARNINGS _CRT_NONSTDC_NO_WARNINGS) 38 | endif(MSVC) 39 | 40 | else(WIN32) 41 | set(TINYSCHEME_DEFS 42 | SUN_DL=1 USE_DL=1 USE_MATH=1 USE_ASCII_NAMES=0 43 | ) 44 | endif(WIN32) 45 | 46 | # ---------------------------------------------------------------------------- 47 | 48 | add_library (tinyscheme SHARED ${TINYSCHEME_SRCS}) 49 | target_compile_definitions(tinyscheme PUBLIC ${TINYSCHEME_DEFS}) 50 | 51 | add_executable(tinyscheme-app ${TINYSCHEME_APP_SRCS}) 52 | target_link_libraries(tinyscheme-app tinyscheme) 53 | target_compile_definitions(tinyscheme-app PUBLIC ${TINYSCHEME_DEFS}) 54 | set_target_properties (tinyscheme-app PROPERTIES OUTPUT_NAME tinyscheme) 55 | 56 | # ---------------------------------------------------------------------------- 57 | 58 | set(RE_MODULE_SRCS 59 | ${RE_MODULE_DIR}/re.c 60 | ${RE_MODULE_DIR}/debug.c 61 | ${RE_MODULE_DIR}/regcomp.c 62 | ${RE_MODULE_DIR}/regerror.c 63 | ${RE_MODULE_DIR}/regexec.c 64 | ${RE_MODULE_DIR}/regfree.c 65 | ${RE_MODULE_DIR}/split.c 66 | ) 67 | 68 | add_library(tinyscheme-re SHARED ${RE_MODULE_SRCS}) 69 | target_link_libraries(tinyscheme-re tinyscheme) 70 | target_include_directories (tinyscheme-re PUBLIC ${TINYSCHEME_DIR} PUBLIC ${RE_MODULE_DIR}) 71 | target_compile_definitions(tinyscheme-re PUBLIC ${TINYSCHEME_DEFS}) 72 | set_target_properties (tinyscheme-re PROPERTIES OUTPUT_NAME re) 73 | 74 | # ---------------------------------------------------------------------------- 75 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | LICENSE TERMS 2 | 3 | Copyright (c) 2000, Dimitrios Souflis 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are 8 | met: 9 | 10 | Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | 13 | Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | Neither the name of Dimitrios Souflis nor the names of the 18 | contributors may be used to endorse or promote products derived from 19 | this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TinyScheme 2 | 3 | ## Notes by armornick 4 | 5 | This is a sort-of continuation of the TinyScheme project, which seems to have been abandoned. 6 | 7 | It started with just trying to get it to compile via CMake before deciding to separate the shell into its own file, and things escalated from there. 8 | 9 | **About support**: while everyone is free to report any issues, this is a only a minor hobby project for me. That means there's about a 50/50 chance that I'll do something about it. The TinyScheme source code seems to be fairly understandable, however, so an intermediate programmer should be able to make changes without too much difficulty. 10 | 11 | As a final note, I don't need any personal credits since I haven't actually written the original project source. Be sure to properly credit the original author, though. 12 | 13 | ## Description by original author 14 | 15 | TinyScheme is a lightweight Scheme interpreter that implements as large a subset of R5RS as was possible without getting very large and complicated. It is meant to be used as an embedded scripting interpreter for other programs. As such, it does not offer IDEs or extensive toolkits although it does sport a small top-level loop, included conditionally. A lot of functionality in TinyScheme is included conditionally, to allow developers freedom in balancing features and footprint. 16 | 17 | As an embedded interpreter, it allows multiple interpreter states to coexist in the same program, without any interference between them. Programmatically, foreign functions in C can be added and values can be defined in the Scheme environment. Being a quite small program, it is easy to comprehend, get to grips with, and use. TinyScheme was grown out of the MiniScheme distribution during the development of Ovrimos. 18 | 19 | The TinyScheme project has slowly acquired foothold in many open-source projects over the years, notably GIMP. It is evident that it is in need of constant attention, since any bug potentially affects a lot of installations. I am very grateful to the people that have helped with TinyScheme all this time, and I hope they will continue to do so in the future. 20 | 21 | There is an issue that is better put forth, rather than stashed under the carpet. The same qualities that have made TinyScheme suitable for many embedded or small-footprint jobs, have earned it an unwanted place in less desirable software. I hope it is clear that TinyScheme itself is domain-agnostic and I, in particular, am not a supporter of malware or adware. 22 | 23 | ## Licensing 24 | 25 | This software is open source, covered by a BSD-style license. See the LICENSE file in this directory for the full license terms. -------------------------------------------------------------------------------- /docs/hack.txt: -------------------------------------------------------------------------------- 1 | 2 | How to hack TinyScheme 3 | ---------------------- 4 | 5 | TinyScheme is easy to learn and modify. It is structured like a 6 | meta-interpreter, only it is written in C. All data are Scheme 7 | objects, which facilitates both understanding/modifying the 8 | code and reifying the interpreter workings. 9 | 10 | In place of a dry description, we will pace through the addition 11 | of a useful new datatype: garbage-collected memory blocks. 12 | The interface will be: 13 | 14 | (make-block []) makes a new block of the specified size 15 | optionally filling it with a specified byte 16 | (block? ) 17 | (block-length ) 18 | (block-ref ) retrieves byte at location 19 | (block-set! ) modifies byte at location 20 | 21 | In the sequel, lines that begin with '>' denote lines to add to the 22 | code. Lines that begin with '|' are just citations of existing code. 23 | Lines that begin with X denote lines to be removed from the code. 24 | 25 | First of all, we need to assign a typeid to our new type. Typeids 26 | in TinyScheme are small integers declared in the scheme_types enum 27 | located near the top of the scheme.c file; it begins with T_STRING. 28 | Add a new entry at the end, say T_MEMBLOCK. Remember to adjust the 29 | value of T_LAST_SYTEM_TYPE when adding new entries. There can be at 30 | most 31 types, but you don't have to worry about that limit yet. 31 | 32 | | T_ENVIRONMENT=14, 33 | X T_LAST_SYSTEM_TYPE=14 34 | > T_MEMBLOCK=15, 35 | > T_LAST_SYSTEM_TYPE=15 36 | | }; 37 | 38 | 39 | Then, some helper macros would be useful. Go to where is_string() 40 | and the rest are defined and add: 41 | 42 | > INTERFACE INLINE int is_memblock(pointer p) { return (type(p)==T_MEMBLOCK); } 43 | 44 | This actually is a function, because it is meant to be exported by 45 | scheme.h. If no foreign function will ever manipulate a memory block, 46 | you can instead define it as a macro: 47 | 48 | > #define is_memblock(p) (type(p)==T_MEMBLOCK) 49 | 50 | Then we make space for the new type in the main data structure: 51 | struct cell. As it happens, the _string part of the union _object 52 | (that is used to hold character strings) has two fields that suit us: 53 | 54 | | struct { 55 | | char *_svalue; 56 | | int _keynum; 57 | | } _string; 58 | 59 | We can use _svalue to hold the actual pointer and _keynum to hold its 60 | length. If we couln't reuse existing fields, we could always add other 61 | alternatives in union _object. 62 | 63 | We then proceed to write the function that actually makes a new block. 64 | For conformance reasons, we name it mk_memblock 65 | 66 | > static pointer mk_memblock(scheme *sc, int len, char fill) { 67 | > pointer x; 68 | > char *p=(char*)sc->malloc(len); 69 | > 70 | > if(p==0) { 71 | > return sc->NIL; 72 | > } 73 | > x = get_cell(sc, sc->NIL, sc->NIL); 74 | > 75 | > typeflag(x) = T_MEMBLOCK|T_ATOM; 76 | > strvalue(x)=p; 77 | > keynum(x)=len; 78 | > memset(p,fill,len); 79 | > return (x); 80 | > } 81 | 82 | The memory used by the MEMBLOCK will have to be freed when the cell 83 | is reclaimed during garbage collection. There is a placeholder for 84 | that staff, function finalize_cell(), currently handling strings only. 85 | 86 | | static void finalize_cell(scheme *sc, pointer a) { 87 | | if(is_string(a)) { 88 | | sc->free(strvalue(a)); 89 | > } else if(is_memblock(a)) { 90 | > sc->free(strvalue(a)); 91 | | } else if(is_port(a)) { 92 | 93 | There are no MEMBLOCK literals, so we don't concern ourselves with 94 | the READER part (yet!). We must cater to the PRINTER, though. We 95 | add one case more in atom2str(). 96 | 97 | | } else if (iscontinuation(l)) { 98 | | p = "#"; 99 | > } else if (is_memblock(l)) { 100 | > p = "#"; 101 | | } else { 102 | 103 | Whenever a MEMBLOCK is displayed, it will look like that. 104 | Now, we must add the interface functions: constructor, predicate, 105 | accessor, modifier. We must in fact create new op-codes for the virtual 106 | machine underlying TinyScheme. Since version 1.30, TinyScheme uses 107 | macros and a single source text to keep the enums and the dispatch table 108 | in sync. The op-codes are defined in the opdefines.h file with one line 109 | for each op-code. The lines in the file have six columns between the 110 | starting _OPDEF( and ending ): A, B, C, D, E, and OP. 111 | Note that this file uses unusually long lines to accomodate all the 112 | information; adjust your editor to handle this. 113 | 114 | The purpose of the columns is: 115 | - Column A is the name of the subroutine that handles the op-code. 116 | - Column B is the name of the op-code function. 117 | - Columns C and D are the minimum and maximum number of arguments 118 | that are accepted by the op-code. 119 | - Column E is a set of flags that tells the interpreter the type of 120 | each of the arguments expected by the op-code. 121 | - Column OP is used in the scheme_opcodes enum located in the 122 | scheme-private.h file. 123 | 124 | Op-codes are really just tags for a huge C switch, only this switch 125 | is broken up in to a number of different opexe_X functions. The 126 | correspondence is made in table "dispatch_table". There, we assign 127 | the new op-codes to opexe_2, where the equivalent ones for vectors 128 | are situated. We also assign a name for them, and specify the minimum 129 | and maximum arity (number of expected arguments). INF_ARG as a maximum 130 | arity means "unlimited". 131 | 132 | For reasons of consistency, we add the new op-codes right after those 133 | for vectors: 134 | 135 | | _OP_DEF(opexe_2, "vector-set!", 3, 3, TST_VECTOR TST_NATURAL TST_ANY, OP_VECSET ) 136 | > _OP_DEF(opexe_2, "make-block", 1, 2, TST_NATURAL TST_CHAR, OP_MKBLOCK ) 137 | > _OP_DEF(opexe_2, "block-length", 1, 1, T_MEMBLOCK, OP_BLOCKLEN ) 138 | > _OP_DEF(opexe_2, "block-ref", 2, 2, T_MEMBLOCK TST_NATURAL, OP_BLOCKREF ) 139 | > _OP_DEF(opexe_2, "block-set!", 1, 1, T_MEMBLOCK TST_NATURAL TST_CHAR, OP_BLOCKSET ) 140 | | _OP_DEF(opexe_3, "not", 1, 1, TST_NONE, OP_NOT ) 141 | 142 | We add the predicate along with the other predicates in opexe_3: 143 | 144 | | _OP_DEF(opexe_3, "vector?", 1, 1, TST_ANY, OP_VECTORP ) 145 | > _OP_DEF(opexe_3, "block?", 1, 1, TST_ANY, OP_BLOCKP ) 146 | | _OP_DEF(opexe_3, "eq?", 2, 2, TST_ANY, OP_EQ ) 147 | 148 | All that remains is to write the actual code to do the processing and 149 | add it to the switch statement in opexe_2, after the OP_VECSET case. 150 | 151 | > case OP_MKBLOCK: { /* make-block */ 152 | > int fill=0; 153 | > int len; 154 | > 155 | > if(!isnumber(car(sc->args))) { 156 | > Error_1(sc,"make-block: not a number:",car(sc->args)); 157 | > } 158 | > len=ivalue(car(sc->args)); 159 | > if(len<=0) { 160 | > Error_1(sc,"make-block: not positive:",car(sc->args)); 161 | > } 162 | > 163 | > if(cdr(sc->args)!=sc->NIL) { 164 | > if(!isnumber(cadr(sc->args)) || ivalue(cadr(sc->args))<0) { 165 | > Error_1(sc,"make-block: not a positive number:",cadr(sc->args)); 166 | > } 167 | > fill=charvalue(cadr(sc->args))%255; 168 | > } 169 | > s_return(sc,mk_memblock(sc,len,(char)fill)); 170 | > } 171 | > 172 | > case OP_BLOCKLEN: /* block-length */ 173 | > if(!ismemblock(car(sc->args))) { 174 | > Error_1(sc,"block-length: not a memory block:",car(sc->args)); 175 | > } 176 | > s_return(sc,mk_integer(sc,keynum(car(sc->args)))); 177 | > 178 | > case OP_BLOCKREF: { /* block-ref */ 179 | > char *str; 180 | > int index; 181 | > 182 | > if(!ismemblock(car(sc->args))) { 183 | > Error_1(sc,"block-ref: not a memory block:",car(sc->args)); 184 | > } 185 | > str=strvalue(car(sc->args)); 186 | > 187 | > if(cdr(sc->args)==sc->NIL) { 188 | > Error_0(sc,"block-ref: needs two arguments"); 189 | > } 190 | > if(!isnumber(cadr(sc->args))) { 191 | > Error_1(sc,"block-ref: not a number:",cadr(sc->args)); 192 | > } 193 | > index=ivalue(cadr(sc->args)); 194 | > 195 | > if(index<0 || index>=keynum(car(sc->args))) { 196 | > Error_1(sc,"block-ref: out of bounds:",cadr(sc->args)); 197 | > } 198 | > 199 | > s_return(sc,mk_integer(sc,str[index])); 200 | > } 201 | > 202 | > case OP_BLOCKSET: { /* block-set! */ 203 | > char *str; 204 | > int index; 205 | > int c; 206 | > 207 | > if(!ismemblock(car(sc->args))) { 208 | > Error_1(sc,"block-set!: not a memory block:",car(sc->args)); 209 | > } 210 | > if(isimmutable(car(sc->args))) { 211 | > Error_1(sc,"block-set!: unable to alter immutable memory block:",car(sc->args)); 212 | > } 213 | > str=strvalue(car(sc->args)); 214 | > 215 | > if(cdr(sc->args)==sc->NIL) { 216 | > Error_0(sc,"block-set!: needs three arguments"); 217 | > } 218 | > if(!isnumber(cadr(sc->args))) { 219 | > Error_1(sc,"block-set!: not a number:",cadr(sc->args)); 220 | > } 221 | > index=ivalue(cadr(sc->args)); 222 | > if(index<0 || index>=keynum(car(sc->args))) { 223 | > Error_1(sc,"block-set!: out of bounds:",cadr(sc->args)); 224 | > } 225 | > 226 | > if(cddr(sc->args)==sc->NIL) { 227 | > Error_0(sc,"block-set!: needs three arguments"); 228 | > } 229 | > if(!isinteger(caddr(sc->args))) { 230 | > Error_1(sc,"block-set!: not an integer:",caddr(sc->args)); 231 | > } 232 | > c=ivalue(caddr(sc->args))%255; 233 | > 234 | > str[index]=(char)c; 235 | > s_return(sc,car(sc->args)); 236 | > } 237 | 238 | Finally, do the same for the predicate in opexe_3. 239 | 240 | | case OP_VECTORP: /* vector? */ 241 | | s_retbool(is_vector(car(sc->args))); 242 | > case OP_BLOCKP: /* block? */ 243 | > s_retbool(is_memblock(car(sc->args))); 244 | | case OP_EQ: /* eq? */ 245 | -------------------------------------------------------------------------------- /docs/regex-module.txt: -------------------------------------------------------------------------------- 1 | TinyScheme RE (Regular Expressions) extension 2 | --------------------------------------------- 3 | Version 1.2, August 2002 4 | 5 | The bulk of this directory is the regular expression library written 6 | by Henry Spencer (see file README and COPYRIGHT). 7 | 8 | Two files were added to produce the TinyScheme regular expression 9 | library, re.so: re.c and re.makefile. The included re.makefile was contributed 10 | initially by Stephen Gildea and should be adaptable to all Unix systems. 11 | 12 | The makefile produces a DLL named re.so. For now, it contains just 13 | a single foreign function (re-match ). It returns 14 | true (string matches pattern) or false. If it is called with an 15 | extra parameter, which should be a vector, overwrites as many elements 16 | of the vector as needed with the strings that matched the corresponding 17 | parenthesized subexpressions inside . 18 | 19 | It is not fully tested, so use with caution. 20 | 21 | Load the extension from inside TinyScheme using 22 | (load-extension "re/re") 23 | assuming that re.so is in the directory "re". 24 | 25 | Load "re.scm" if you wish to use v.1.1 behavior. 26 | 27 | dsouflis@acm.org 28 | -------------------------------------------------------------------------------- /src/BUILDING: -------------------------------------------------------------------------------- 1 | Building TinyScheme 2 | ------------------- 3 | 4 | The included makefile includes logic for Linux, Solaris and Win32, and can 5 | readily serve as an example for other OSes, especially Unixes. There are 6 | a lot of compile-time flags in TinyScheme (preprocessor defines) that can trim 7 | unwanted features. See next section. 'make all' and 'make clean' function as 8 | expected. 9 | 10 | Autoconfing TinyScheme was once proposed, but the distribution would not be 11 | so small anymore. There are few platform dependencies in TinyScheme, and in 12 | general compiles out of the box. 13 | 14 | Customizing 15 | ----------- 16 | 17 | The following symbols are defined to default values in scheme.h. 18 | Use the -D flag of cc to set to either 1 or 0. 19 | 20 | STANDALONE 21 | Define this to produce a standalone interpreter. 22 | 23 | USE_MATH 24 | Includes math routines. 25 | 26 | USE_CHAR_CLASSIFIERS 27 | Includes character classifier procedures. 28 | 29 | USE_ASCII_NAMES 30 | Enable extended character notation based on ASCII names. 31 | 32 | USE_STRING_PORTS 33 | Enables string ports. 34 | 35 | USE_ERROR_HOOK 36 | To force system errors through user-defined error handling. 37 | (see "Error handling") 38 | 39 | USE_TRACING 40 | To enable use of TRACING. 41 | 42 | USE_COLON_HOOK 43 | Enable use of qualified identifiers. (see "Colon Qualifiers - Packages") 44 | Defining this as 0 has the rather drastic consequence that any code using 45 | packages will stop working, and will have to be modified. It should only 46 | be used if you *absolutely* need to use '::' in identifiers. 47 | 48 | USE_STRCASECMP 49 | Defines stricmp as strcasecmp, for Unix. 50 | 51 | STDIO_ADDS_CR 52 | Informs TinyScheme that stdio translates "\n" to "\r\n". For DOS/Windows. 53 | 54 | USE_DL 55 | Enables dynamically loaded routines. If you define this symbol, you 56 | should also include dynload.c in your compile. 57 | 58 | USE_PLIST 59 | Enables property lists (not Standard Scheme stuff). Off by default. 60 | 61 | USE_NO_FEATURES 62 | Shortcut to disable USE_MATH, USE_CHAR_CLASSIFIERS, USE_ASCII_NAMES, 63 | USE_STRING_PORTS, USE_ERROR_HOOK, USE_TRACING, USE_COLON_HOOK, 64 | USE_DL. 65 | 66 | USE_SCHEME_STACK 67 | Enables 'cons' stack (the alternative is a faster calling scheme, which 68 | breaks continuations). Undefine it if you don't care about strict compatibility 69 | but you do care about faster execution. 70 | 71 | 72 | OS-X tip 73 | -------- 74 | I don't have access to OS-X, but Brian Maher submitted the following tip: 75 | 76 | [1] Download and install fink (I installed fink in 77 | /usr/local/fink) 78 | [2] Install the 'dlcompat' package using fink as such: 79 | > fink install dlcompat 80 | [3] Make the following changes to the 81 | tinyscheme-1.32.tar.gz 82 | 83 | diff -r tinyscheme-1.32/dynload.c 84 | tinyscheme-1.32-new/dynload.c 85 | 24c24 86 | < #define SUN_DL 87 | --- 88 | > 89 | Only in tinyscheme-1.32-new/: dynload.o 90 | Only in tinyscheme-1.32-new/: libtinyscheme.a Only in tinyscheme-1.32-new/: libtinyscheme.so diff -r tinyscheme-1.32/makefile tinyscheme-1.32-new/makefile 91 | 33,34c33,43 92 | < LD = gcc 93 | < LDFLAGS = -shared 94 | --- 95 | > #LD = gcc 96 | > #LDFLAGS = -shared 97 | > #DEBUG=-g -Wno-char-subscripts -O 98 | > #SYS_LIBS= -ldl 99 | > #PLATFORM_FEATURES= -DSUN_DL=1 100 | > 101 | > # Mac OS X 102 | > CC = gcc 103 | > CFLAGS = -I/usr/local/fink/include 104 | > LD = gcc 105 | > LDFLAGS = -L/usr/local/fink/lib 106 | 37c46 107 | < PLATFORM_FEATURES= -DSUN_DL=1 108 | --- 109 | > PLATFORM_FEATURES= -DSUN_DL=1 -DOSX 110 | 60c69 111 | < $(CC) -I. -c $(DEBUG) $(FEATURES) 112 | $(DL_FLAGS) $< 113 | --- 114 | > $(CC) $(CFLAGS) -I. -c $(DEBUG) 115 | $(FEATURES) $(DL_FLAGS) $< 116 | 66c75 117 | < $(CC) -o $@ $(DEBUG) $(OBJS) $(SYS_LIBS) 118 | --- 119 | > $(CC) $(LDFLAGS) -o $@ $(DEBUG) $(OBJS) 120 | $(SYS_LIBS) 121 | Only in tinyscheme-1.32-new/: scheme 122 | diff -r tinyscheme-1.32/scheme.c 123 | tinyscheme-1.32-new/scheme.c 124 | 60,61c60,61 125 | < #ifndef macintosh 126 | < # include 127 | --- 128 | > #ifdef OSX 129 | > /* Do nothing */ 130 | 62a63,65 131 | > # ifndef macintosh 132 | > # include 133 | > # else 134 | 77c80,81 135 | < #endif /* macintosh */ 136 | --- 137 | > # endif /* macintosh */ 138 | > #endif /* !OSX */ 139 | Only in tinyscheme-1.32-new/: scheme.o 140 | -------------------------------------------------------------------------------- /src/CHANGES: -------------------------------------------------------------------------------- 1 | Change Log 2 | ---------- 3 | 4 | Version 1.41 5 | Bugs fixed: 6 | #3020389 - Added makefile section for Mac OS X (SL) 7 | #3286135 - Fixed num_mod routine which caused errors in use of modulo 8 | #3290232 - Corrected version number shown on startup (GM) 9 | #3394882 - Added missing #if in opdefines.h around get and put (DC) 10 | #3395547 - Fix for the modulo procedure (DC) 11 | #3400290 - Optimized append to make it an O(n) operation (DC) 12 | #3493926 - Corrected flag used when building shared files on OSX (J) 13 | 14 | R5RS related changes: 15 | #2866196 - Parser does not handle delimiters correctly 16 | #3395548 - Add a decimal point to inexact numbers in atom2str (DC) 17 | #3399331 - Make min/max return inexact when any argument is inexact 18 | #3399332 - Compatability fix for expt. 19 | #3399335 - Optional radix for string->number and number->string (DC) 20 | #3400202 - Append with one argument should not return a list (DC) 21 | #3400284 - Compatability fix for integer? 22 | 23 | Other changes: 24 | - Added flags to makefile for MinGW/MSYS (TC) 25 | - Moved variable declarations to avoid warnings with some compilers 26 | - Don't print space after initial #( when printing vectors. 27 | - Minor optimization for is_nonneg(). 28 | - No need to round integers in OP_ROUND (#3400284) 29 | - Fixes to code that reports line number with error (RC) 30 | 31 | Contributors: 32 | Kevin Cozens, Gordon McNutt, Doug Currie, Sean Long, Tim Cas, Joey, 33 | Richard Copley, and CMarinier. 34 | 35 | Version 1.40 36 | Bugs fixed: 37 | #1964950 - Stop core dumps due to bad syntax in LET (and variants) 38 | #2826594 - allow reverse to work on empty list (Tony Garnock-Jones) 39 | Potential problem of arglist to foreign calls being wrongly GC'ed. 40 | Fixed bug that read could loop forever (tehom). 41 | 42 | API changes: 43 | Exposed is_list and list_length. 44 | Added scheme_register_foreign_func_list and declarations for it (tehom) 45 | Defined *compile-hook* (tehom) 46 | 47 | Other changes: 48 | Updated is_list and list_length to handle circular lists. 49 | Nested calling thru C has been made now safer (tehom) 50 | Peter Michaux cleaned up port_rep_from_file 51 | Added unwind-protect (tehom) 52 | Some cleanups to in/outport and Eval_Cycle by Peter Michaux 53 | Report error line number (Mostly by Sanel Zukan, back-compatibility by Tehom) 54 | 55 | Contributors: 56 | Kevin Cozens, Dimitrios Souflis, Tom Breton, Peter Michaux, Sanel Zukan, 57 | and Tony Garnock-Jones. 58 | 59 | Version 1.39 60 | Bugs fixed: 61 | Fix for the load bug 62 | Fixed parsing of octal coded characters. Fixes bug #1818018. 63 | Added tests for when mk_vector is out of memory. Can't rely on sc->sink. 64 | Fix for bug #1794369 65 | Finished feature-request 1599947: scheme_apply0 etc return values. 66 | Partly provided feature-request 1599947: Expose list_length, eqv, etc 67 | Provided feature-request 1599945, Scheme->C->Scheme calling. 68 | Fix for bug 1593861 (behavior of is_integer) 69 | Fix for bug 1589711 70 | Error checking of binding spec syntax in LET and LETREC. The bad syntax 71 | was causing a segmentation fault in Linux. Complete fixes for bug #1817986. 72 | Error checking of binding spec syntax in LET* 73 | Bad syntax was causing core dump in Linux. 74 | Fix for nasty gc bug 75 | 76 | R5RS changes: 77 | R5RS requires numbers to be of equal value AND of the same type (ie. both 78 | exact or inexact) in order to return #t from eqv?. R5RS compliance fix. 79 | String output ports now conform to SRFI-6 80 | 81 | Other changes: 82 | Drew Yao fixed buffer overflow problems in mk_sharp_const. 83 | put OP_T0LVL in charge of reacting to EOF 84 | file_push checks array bounds (patch from Ray Lehtiniemi) 85 | Changed to always use snprintf (Patch due to Ramiro bsd1628) 86 | Updated usage information using text from the Manual.txt file. 87 | 88 | Version 1.38 89 | Interim release until the rewrite, mostly incorporating modifications 90 | from Kevin Cozens. Small addition for Cygwin in the makefile, and 91 | modifications by Andrew Guenther for Apple platforms. 92 | 93 | Version 1.37 94 | Joe Buehler submitted reserve_cells. 95 | 96 | Version 1.36 97 | Joe Buehler fixed a patch in the allocator. 98 | Alexander Shendi moved the comment handling in the scanner, which 99 | fixed an obscure bug for which Mike E had provided a patch as well. 100 | Kevin Cozens has submitted some fixes and modifications which have 101 | not been incorporated yet in their entirety. 102 | 103 | Version 1.35 104 | Todd Showalter discovered that the number of free cells reported 105 | after GC was incorrect, which could also cause unnecessary allocations. 106 | 107 | Version 1.34 108 | Long missing version. Lots of bugfixes have accumulated in my email, so 109 | I had to start using them. In this version, Keenan Pepper has submitted 110 | a bugfix for the string comparison library procedure, Wouter Boeke 111 | modified some code that was casting to the wrong type and crashed on 112 | some machines, "SheppardCo" submitted a replacement "modulo" code and 113 | Scott Fenton submitted lots of corrections that shut up some compiler 114 | warnings. Brian Maher submitted instructions on how to build on OS-X. 115 | I have to dig deeper into my mailbox and find earlier emails, too. 116 | 117 | Version 1.33 118 | Charles Hayden fixed a nasty GC bug of the new stack frame, while in 119 | the process of porting TinyScheme to C++. He also submitted other 120 | changes, and other people also had comments or requests, but the GC 121 | bug was so important that this version is put through the door to 122 | correct it. 123 | 124 | Version 1.32 125 | Stephen Gildea put some quality time on TinyScheme again, and made 126 | a whole lot of changes to the interpreter that made it noticeably 127 | faster. 128 | 129 | Version 1.31 130 | Patches to the hastily-done version 1.30. Stephen Gildea fixed 131 | some things done wrongly, and Richard Russo fixed the makefile 132 | for building on Windows. Property lists (heritage from MiniScheme) 133 | are now optional and have dissappeared from the interface. They 134 | should be considered as deprecated. 135 | 136 | Version 1.30 137 | After many months, I followed Preston Bannister's advice of 138 | using macros and a single source text to keep the enums and the 139 | dispatch table in sync, and I used his contributed "opdefines.h". 140 | Timothy Downs contributed a helpful function, "scheme_call". 141 | Stephen Gildea contributed new versions of the makefile and 142 | practically all other sources. He created a built-in STRING-APPEND, 143 | and fixed a lot of other bugs. 144 | Ruhi Bloodworth reported fixes necessary for OS X and a small 145 | bug in dynload.c. 146 | 147 | Version 1.29 148 | The previous version contained a lot of corrections, but there 149 | were a lot more that still wait on a sheet of paper lost in a 150 | carton someplace after my house move... Manuel Heras-Gilsanz 151 | noticed this and resent his own contribution, which relies on 152 | another bugfix that v.1.28 was missing: a problem with string 153 | output, that this version fixes. I hope other people will take 154 | the time to resend their contributions, if they didn't make it 155 | to v.1.28. 156 | 157 | Version 1.28 158 | Many people have contacted me with bugfixes or remarks in 159 | the three months I was inactive. A lot of them spotted that 160 | scheme_deinit crashed while reporting gc results. They suggested 161 | that sc->outport be set to NIL in scheme_deinit, which I did. 162 | Dennis Taylor remarked that OP_VALUEPRINT reset sc->value instead 163 | of preserving it. He submitted a modification which I adopted 164 | partially. David Hovemeyer sent me many little changes, that you 165 | will find in version 1.28, and Partice Stoessel modified the 166 | float reader to conform to R5RS. 167 | 168 | Version 1.27 169 | Version 1.27 is the successor of 1.25. Bug fixes only, but I had to 170 | release them so that everybody can profit. 'Backchar' tried to write 171 | back to the string, which obviously didn't work for const strings. 172 | 'Substring' didn't check for crossed start and end indices. Defines 173 | changed to restore the ability to compile under MSVC. 174 | 175 | Version 1.26 176 | Version 1.26 was never released. I changed a lot of things, in fact 177 | too much, even the garbage collector, and hell broke loose. I'll 178 | try a more gradual approach next time. 179 | 180 | Version 1.25 181 | Types have been homogenized to be able to accommodate a different 182 | representation. Plus, promises are no longer closures. Unfortunately, 183 | I discovered that continuations and force/delay do not pass the SCM 184 | test (and never did)... However, on the bright side, what little 185 | modifications I did had a large impact on the footprint: 186 | USE_NO_FEATURES now produces an object file of 63960 bytes on Linux! 187 | 188 | Version 1.24 189 | SCM tests now pass again after change in atom2str. 190 | 191 | Version 1.23 192 | Finally I managed to mess it up with my version control. Version 193 | 1.22 actually lacked some of the things I have been fixing in the 194 | meantime. This should be considered as a complete replacement for 195 | 1.22. 196 | 197 | Version 1.22 198 | The new ports had a bug in LOAD. MK_CLOSURE is introduced. 199 | Shawn Wagner inquired about string->number and number->string. 200 | I added string->atom and atom->string and defined the number 201 | functions from them. Doing that, I fixed WRITE applied to symbols 202 | (it didn't quote them). Unfortunately, minimum build is now 203 | slightly larger than 64k... I postpone action because Jason's idea 204 | might solve it elegantly. 205 | 206 | Version 1.21 207 | Jason Felice submitted a radically different datatype representation 208 | which he had implemented. While discussing its pros and cons, it 209 | became apparent that the current implementation of ports suffered 210 | from a grave fault: ports were not garbage-collected. I changed the 211 | ports to be heap-allocated, which enabled the use of string ports 212 | for loading. Jason also fixed errors in the garbage collection of 213 | vectors. USE_VERBATIM is gone. "ssp_compiler.c" has a better solution 214 | on HTML generation. A bug involving backslash notation in strings 215 | has been fixed. '-c' flag now executes next argument as a stream of 216 | Scheme commands. Foreign functions are now also heap allocated, 217 | and scheme_define is used to define everything. 218 | 219 | Version 1.20 220 | Tracing has been added. The toplevel loop has been slightly 221 | rearranged. Backquote reading for vector templates has been 222 | sanitized. Symbol interning is now correct. Arithmetic functions 223 | have been corrected. APPLY, MAP, FOR-EACH, numeric comparison 224 | functions fixed. String reader/writer understands \xAA notation. 225 | 226 | Version 1.19 227 | Carriage Return now delimits identifiers. DOS-formatted Scheme files 228 | can be used by Unix. Random number generator added to library. 229 | Fixed some glitches of the new type-checking scheme. Fixed erroneous 230 | (append '() 'a) behavior. Will continue with r4rstest.scm to 231 | fix errors. 232 | 233 | Version 1.18 234 | The FFI has been extended. USE_VERBOSE_GC has gone. Anyone wanting 235 | the same functionality can put (gcverbose #t) in init.scm. 236 | print-width was removed, along with three corresponding op-codes. 237 | Extended character constants with ASCII names were added. 238 | mk_counted_string paves the way for full support of binary strings. 239 | As much as possible of the type-checking chores were delegated 240 | to the inner loop, thus reducing the code size to less than 4200 loc! 241 | 242 | Version 1.17 243 | Dynamically-loaded extensions are more fully integrated. 244 | TinyScheme is now distributed under the BSD open-source license. 245 | 246 | Version 1.16 247 | Dynamically-loaded extensions introduced (USE_DL). 248 | Santeri Paavolainen found a race condition: When a cons is executed, 249 | and each of the two arguments is a constructing function, GC could 250 | happen before all arguments are evaluated and cons() is called, and 251 | the evaluated arguments would all be reclaimed! 252 | Fortunately, such a case was rare in the code, although it is 253 | a pitfall in new code and code in foreign functions. Currently, only 254 | one such case remains, when COLON_HOOK is defined. 255 | 256 | Version 1.15 257 | David Gould also contributed some changes that speed up operation. 258 | Kirk Zurell fixed HASPROP. 259 | The Garbage Collection didn't collect all the garbage...fixed. 260 | 261 | Version 1.14 262 | Unfortunately, after Andre fixed the GC it became obvious that the 263 | algorithm was too slow... Fortunately, David Gould found a way to 264 | speed it up. 265 | 266 | Version 1.13 267 | Silly bug involving division by zero resolved by Roland Kaufman. 268 | Macintoch support from Shmulik Regev. 269 | Float parser bug fixed by Alexander Shendi. 270 | GC bug from Andru Luvisi. 271 | 272 | Version 1.12 273 | Cis* incorrectly called isalpha() instead of isascii() 274 | Added USE_CHAR_CLASSIFIERS, USE_STRING_PORTS. 275 | 276 | Version 1.11 277 | BSDI defines isnumber... changed all similar functions to is_* 278 | EXPT now has correct definition. Added FLOOR,CEILING,TRUNCATE 279 | and ROUND, courtesy of Bengt Kleberg. Preprocessor symbols now 280 | have values 1 or 0, and can be set as compiler defines (proposed 281 | by Andy Ganor *months* ago). 'prompt' and 'InitFile' can now be 282 | defined during compilation, too. 283 | 284 | Version 1.10 285 | Another bug when file ends with comment! 286 | Added DEFINE-MACRO in init.scm, courtesy of Andy Gaynor. 287 | 288 | Version 1.09 289 | Removed bug when READ met EOF. lcm. 290 | 291 | Version 1.08 292 | quotient,remainder and modulo. gcd. 293 | 294 | Version 1.07 295 | '=>' in cond now exists 296 | list? now checks for circularity 297 | some reader bugs removed 298 | Reader is more consistent wrt vectors 299 | Quote and Quasiquote work with vectors 300 | 301 | Version 1.06 302 | #! is now skipped 303 | generic-assoc bug removed 304 | strings are now managed differently, hack.txt is removed 305 | various delicate points fixed 306 | 307 | Version 1.05 308 | Support for scripts, *args*, "-1" option. 309 | Various R5RS procedures. 310 | *sharp-hook* 311 | Handles unmatched parentheses. 312 | New architecture for procedures. 313 | 314 | Version 1.04 315 | Added missing T_ATOM bits... 316 | Added vectors 317 | Free-list is sorted by address, since vectors need consecutive cells. 318 | (quit ) for use with scripts 319 | 320 | Version 1.03 (26 Aug 1998): 321 | Extended .h with useful functions for FFI 322 | Library: with-input-* etc. 323 | Finished R5RS I/O, added string ports. 324 | 325 | Version 1.02 (25 Aug 1998): 326 | First part of R5RS I/O. 327 | -------------------------------------------------------------------------------- /src/COPYING: -------------------------------------------------------------------------------- 1 | LICENSE TERMS 2 | 3 | Copyright (c) 2000, Dimitrios Souflis 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are 8 | met: 9 | 10 | Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | 13 | Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | Neither the name of Dimitrios Souflis nor the names of the 18 | contributors may be used to endorse or promote products derived from 19 | this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /src/MiniSCHEMETribute.txt: -------------------------------------------------------------------------------- 1 | TinyScheme would not exist if it wasn't for MiniScheme. I had just 2 | written the HTTP server for Ovrimos SQL Server, and I was lamenting the 3 | lack of a scripting language. Server-side Javascript would have been the 4 | preferred solution, had there been a Javascript interpreter I could 5 | lay my hands on. But there weren't. Perl would have been another solution, 6 | but it was probably ten times bigger that the program it was supposed to 7 | be embedded in. There would also be thorny licencing issues. 8 | 9 | So, the obvious thing to do was find a trully small interpreter. Forth 10 | was a language I had once quasi-implemented, but the difficulty of 11 | handling dynamic data and the weirdness of the language put me off. I then 12 | looked around for a LISP interpreter, the next thing I knew was easy to 13 | implement. Alas, the LeLisp I knew from my days in UPMC (Universite Pierre 14 | et Marie Curie) had given way to Common Lisp, a megalith of a language! 15 | Then my search lead me to Scheme, a language I knew was very orthogonal 16 | and clean. When I found Mini-Scheme, a single C file of some 2400 loc, I 17 | fell in love with it! What if it lacked floating-point numbers and 18 | strings! The rest, as they say, is history. 19 | 20 | Below are the original credits. Don't email Akira KIDA, the address has 21 | changed. 22 | 23 | ---------- Mini-Scheme Interpreter Version 0.85 ---------- 24 | 25 | coded by Atsushi Moriwaki (11/5/1989) 26 | 27 | E-MAIL : moriwaki@kurims.kurims.kyoto-u.ac.jp 28 | 29 | THIS SOFTWARE IS IN THE PUBLIC DOMAIN 30 | ------------------------------------ 31 | This software is completely free to copy, modify and/or re-distribute. 32 | But I would appreciate it if you left my name on the code as the author. 33 | 34 | This version has been modified by R.C. Secrist. 35 | 36 | Mini-Scheme is now maintained by Akira KIDA. 37 | 38 | This is a revised and modified version by Akira KIDA. 39 | current version is 0.85k4 (15 May 1994) 40 | 41 | Please send suggestions, bug reports and/or requests to: 42 | 43 | 44 | 45 | Features compared to MiniSCHEME 46 | ------------------------------- 47 | 48 | All code is now reentrant. Interpreter state is held in a 'scheme' 49 | struct, and many interpreters can coexist in the same program, possibly 50 | in different threads. The user can specify user-defined memory allocation 51 | primitives. (see "Programmer's Reference") 52 | 53 | The reader is more consistent. 54 | 55 | Strings, characters and flonums are supported. (see "Types") 56 | 57 | Files being loaded can be nested up to some depth. 58 | 59 | R5RS I/O is there, plus String Ports. (see "Scheme Reference","I/O") 60 | 61 | Vectors exist. 62 | 63 | As a standalone application, it supports command-line arguments. 64 | (see "Standalone") 65 | 66 | Running out of memory is now handled. 67 | 68 | The user can add foreign functions in C. (see "Foreign Functions") 69 | 70 | The code has been changed slightly, core functions have been moved 71 | to the library, behavior has been aligned with R5RS etc. 72 | 73 | Support has been added for user-defined error recovery. 74 | (see "Error Handling") 75 | 76 | Support has been added for modular programming. 77 | (see "Colon Qualifiers - Packages") 78 | 79 | To enable this, EVAL has changed internally, and can 80 | now take two arguments, as per R5RS. Environments are supported. 81 | (see "Colon Qualifiers - Packages") 82 | 83 | Promises are now evaluated once only. 84 | 85 | (macro (foo form) ...) is now equivalent to (macro foo (lambda(form) ...)) 86 | 87 | The reader can be extended using new #-expressions 88 | (see "Reader extensions") 89 | -------------------------------------------------------------------------------- /src/api.c: -------------------------------------------------------------------------------- 1 | /* T I N Y S C H E M E 1 . 50 2 | * Continued by armornick (March 2016) 3 | * Original work by Dimitrios Souflis (dsouflis@acm.org) 4 | * 5 | * Based on MiniScheme (original credits follow) 6 | * (MINISCM) coded by Atsushi Moriwaki (11/5/1989) 7 | * (MINISCM) E-MAIL : moriwaki@kurims.kurims.kyoto-u.ac.jp 8 | * (MINISCM) This version has been modified by R.C. Secrist. 9 | * (MINISCM) 10 | * (MINISCM) Mini-Scheme is now maintained by Akira KIDA. 11 | * (MINISCM) 12 | * (MINISCM) This is a revised and modified version by Akira KIDA. 13 | * (MINISCM) current version is 0.85k4 (15 May 1994) 14 | * (MINISCM) 15 | * 16 | */ 17 | 18 | #define _SCHEME_SOURCE 19 | #include "scheme.h" 20 | #include "scheme-private.h" 21 | 22 | /*------------------ API Wrappers -------------------------------------*/ 23 | 24 | int scheme_retcode(scheme *sc) { 25 | return sc->retcode; 26 | } 27 | 28 | pointer scheme_global_env(scheme *sc) { 29 | return sc->global_env; 30 | } 31 | 32 | pointer scheme_nil(scheme *sc) { 33 | return sc->NIL; 34 | } 35 | 36 | pointer scheme_true(scheme *sc) { 37 | return sc->T; 38 | } 39 | 40 | pointer scheme_false(scheme *sc) { 41 | return sc->F; 42 | } 43 | 44 | pointer scheme_reverse(scheme *sc, pointer a) { 45 | return reverse(sc, a); 46 | } 47 | 48 | pointer scheme_reverse_in_place(scheme *sc, pointer term, pointer list) { 49 | return reverse_in_place(sc, term, list); 50 | } 51 | 52 | pointer scheme_cons(scheme *sc, pointer a, pointer b, int immutable) { 53 | return _cons(sc, a, b, immutable); 54 | } 55 | 56 | pointer scheme_integer(scheme *sc, long num) { 57 | return mk_integer(sc, num); 58 | } 59 | 60 | pointer scheme_real(scheme *sc, double num) { 61 | return mk_real(sc, num); 62 | } 63 | 64 | pointer scheme_symbol(scheme *sc, const char *name) { 65 | return mk_symbol(sc, name); 66 | } 67 | 68 | pointer scheme_gensym(scheme *sc) { 69 | return gensym(sc); 70 | } 71 | 72 | pointer scheme_string(scheme *sc, const char *str) { 73 | return mk_string(sc, str); 74 | } 75 | 76 | pointer scheme_counted_string(scheme *sc, const char *str, int len) { 77 | return mk_counted_string(sc, str, len); 78 | } 79 | 80 | pointer scheme_empty_string(scheme *sc, int len, char fill) { 81 | return mk_empty_string(sc, len, fill); 82 | } 83 | 84 | pointer scheme_character(scheme *sc, int c) { 85 | return mk_character(sc, c); 86 | } 87 | 88 | pointer scheme_foreign_func(scheme *sc, foreign_func f) { 89 | return mk_foreign_func(sc, f); 90 | } 91 | 92 | void scheme_putstr(scheme *sc, const char *s) { 93 | putstr(sc, s); 94 | } 95 | 96 | int scheme_list_length(scheme *sc, pointer a) { 97 | return list_length(sc, a); 98 | } 99 | 100 | int scheme_eqv(pointer a, pointer b) { 101 | return eqv(a, b); 102 | } 103 | 104 | int scheme_is_string(pointer p) { 105 | return is_string(p); 106 | } 107 | 108 | char* scheme_string_value(pointer p) { 109 | return string_value(p); 110 | } 111 | 112 | int scheme_is_number(pointer p) { 113 | return is_number(p); 114 | } 115 | 116 | num scheme_nvalue(pointer p) { 117 | return nvalue(p); 118 | } 119 | 120 | long scheme_ivalue(pointer p) { 121 | return ivalue(p); 122 | } 123 | 124 | double scheme_rvalue(pointer p) { 125 | return rvalue(p); 126 | } 127 | 128 | int scheme_is_integer(pointer p) { 129 | return is_integer(p); 130 | } 131 | 132 | int scheme_is_real(pointer p) { 133 | return is_real(p); 134 | } 135 | 136 | int scheme_is_character(pointer p) { 137 | return is_character(p); 138 | } 139 | 140 | long scheme_charvalue(pointer p) { 141 | return charvalue(p); 142 | } 143 | 144 | int scheme_is_vector(pointer p) { 145 | return is_vector(p); 146 | } 147 | 148 | int scheme_is_port(pointer p) { 149 | return is_port(p); 150 | } 151 | 152 | int scheme_is_pair(pointer p) { 153 | return is_pair(p); 154 | } 155 | 156 | pointer scheme_pair_car(pointer p) { 157 | return pair_car(p); 158 | } 159 | 160 | pointer scheme_pair_cdr(pointer p) { 161 | return pair_cdr(p); 162 | } 163 | 164 | pointer scheme_set_car(pointer p, pointer q) { 165 | return set_car(p, q); 166 | } 167 | 168 | pointer scheme_set_cdr(pointer p, pointer q) { 169 | return set_cdr(p, q); 170 | } 171 | 172 | int scheme_is_symbol(pointer p) { 173 | return is_symbol(p); 174 | } 175 | 176 | char* scheme_symname(pointer p) { 177 | return symname(p); 178 | } 179 | 180 | int scheme_is_syntax(pointer p) { 181 | return is_syntax(p); 182 | } 183 | 184 | int scheme_is_proc(pointer p) { 185 | return is_proc(p); 186 | } 187 | 188 | int scheme_is_foreign(pointer p) { 189 | return is_foreign(p); 190 | } 191 | 192 | char* scheme_syntaxname(pointer p) { 193 | return syntaxname(p); 194 | } 195 | 196 | int scheme_is_closure(pointer p) { 197 | return is_closure(p); 198 | } 199 | 200 | #ifdef USE_MACRO 201 | 202 | SCHEME_EXPORT int scheme_is_macro(pointer p) { 203 | return is_macro(p); 204 | } 205 | 206 | #endif 207 | 208 | int scheme_is_continuation(pointer p) { 209 | return is_continuation(p); 210 | } 211 | 212 | int scheme_is_promise(pointer p) { 213 | return is_promise(p); 214 | } 215 | 216 | int scheme_is_environment(pointer p) { 217 | return is_environment(p); 218 | } 219 | 220 | int scheme_is_immutable(pointer p) { 221 | return is_immutable(p); 222 | } 223 | 224 | void scheme_setimmutable(pointer p) { 225 | setimmutable(p); 226 | } 227 | 228 | void scheme_fill_vector(pointer vec, pointer obj) { 229 | fill_vector(vec, obj); 230 | } 231 | 232 | pointer scheme_vector_elem(pointer vec, int ielem) { 233 | return vector_elem(vec, ielem); 234 | } 235 | 236 | pointer scheme_set_vector_elem(pointer vec, int ielem, pointer a) { 237 | return set_vector_elem(vec, ielem, a); 238 | } 239 | 240 | void scheme_memory_error(scheme *sc) { 241 | sc->no_memory=1; 242 | } -------------------------------------------------------------------------------- /src/config.h: -------------------------------------------------------------------------------- 1 | /* T I N Y S C H E M E 1 . 50 2 | * Continued by armornick (March 2016) 3 | * Original work by Dimitrios Souflis (dsouflis@acm.org) 4 | * 5 | * Based on MiniScheme (original credits follow) 6 | * (MINISCM) coded by Atsushi Moriwaki (11/5/1989) 7 | * (MINISCM) E-MAIL : moriwaki@kurims.kurims.kyoto-u.ac.jp 8 | * (MINISCM) This version has been modified by R.C. Secrist. 9 | * (MINISCM) 10 | * (MINISCM) Mini-Scheme is now maintained by Akira KIDA. 11 | * (MINISCM) 12 | * (MINISCM) This is a revised and modified version by Akira KIDA. 13 | * (MINISCM) current version is 0.85k4 (15 May 1994) 14 | * (MINISCM) 15 | * 16 | */ 17 | 18 | #ifndef _CONFIG_H 19 | #define _CONFIG_H 20 | 21 | #if defined _WIN32 && !defined SCHEME_STATIC 22 | # if defined _SCHEME_SOURCE || defined SCHEME_MODULE 23 | # define SCHEME_EXPORT __declspec(dllexport) 24 | # else 25 | # define SCHEME_EXPORT __declspec(dllimport) 26 | # endif 27 | #else 28 | # define SCHEME_EXPORT 29 | #endif 30 | 31 | 32 | #ifndef _MSC_VER 33 | # define USE_STRCASECMP 1 34 | # ifndef USE_STRLWR 35 | # define USE_STRLWR 1 36 | # endif 37 | #else 38 | # define USE_STRCASECMP 0 39 | # define USE_STRLWR 0 40 | #endif 41 | 42 | #if USE_NO_FEATURES 43 | # define USE_MATH 0 44 | # define USE_CHAR_CLASSIFIERS 0 45 | # define USE_ASCII_NAMES 0 46 | # define USE_STRING_PORTS 0 47 | # define USE_ERROR_HOOK 0 48 | # define USE_TRACING 0 49 | # define USE_COLON_HOOK 0 50 | # define USE_DL 0 51 | # define USE_PLIST 0 52 | #endif 53 | 54 | /* 55 | * Leave it defined if you want continuations, and also for the Sharp Zaurus. 56 | * Undefine it if you only care about faster speed and not strict Scheme compatibility. 57 | */ 58 | #define USE_SCHEME_STACK 59 | 60 | #if USE_DL 61 | # define USE_INTERFACE 1 62 | #endif 63 | 64 | 65 | #ifndef USE_MATH /* If math support is needed */ 66 | # define USE_MATH 1 67 | #endif 68 | 69 | #ifndef USE_CHAR_CLASSIFIERS /* If char classifiers are needed */ 70 | # define USE_CHAR_CLASSIFIERS 1 71 | #endif 72 | 73 | #ifndef USE_ASCII_NAMES /* If extended escaped characters are needed */ 74 | # define USE_ASCII_NAMES 1 75 | #endif 76 | 77 | #ifndef USE_STRING_PORTS /* Enable string ports */ 78 | # define USE_STRING_PORTS 1 79 | #endif 80 | 81 | #ifndef USE_TRACING 82 | # define USE_TRACING 1 83 | #endif 84 | 85 | #ifndef USE_PLIST 86 | # define USE_PLIST 0 87 | #endif 88 | 89 | /* To force system errors through user-defined error handling (see *error-hook*) */ 90 | #ifndef USE_ERROR_HOOK 91 | # define USE_ERROR_HOOK 1 92 | #endif 93 | 94 | #ifndef USE_COLON_HOOK /* Enable qualified qualifier */ 95 | # define USE_COLON_HOOK 1 96 | #endif 97 | 98 | #ifndef USE_STRCASECMP /* stricmp for Unix */ 99 | # define USE_STRCASECMP 0 100 | #endif 101 | 102 | #ifndef USE_STRLWR 103 | # define USE_STRLWR 1 104 | #endif 105 | 106 | #ifndef STDIO_ADDS_CR /* Define if DOS/Windows */ 107 | # define STDIO_ADDS_CR 0 108 | #endif 109 | 110 | #ifndef INLINE 111 | # define INLINE 112 | #endif 113 | 114 | #ifndef USE_INTERFACE 115 | # define USE_INTERFACE 0 116 | #endif 117 | 118 | #ifndef SHOW_ERROR_LINE /* Show error line in file */ 119 | # define SHOW_ERROR_LINE 1 120 | #endif 121 | 122 | #endif -------------------------------------------------------------------------------- /src/dynload.c: -------------------------------------------------------------------------------- 1 | /* dynload.c Dynamic Loader for TinyScheme */ 2 | /* Original Copyright (c) 1999 Alexander Shendi */ 3 | /* Modifications for NT and dl_* interface, scm_load_ext: D. Souflis */ 4 | /* Refurbished by Stephen Gildea */ 5 | 6 | #define _SCHEME_SOURCE 7 | #include "dynload.h" 8 | #include "scheme-private.h" 9 | #include 10 | #include 11 | #include 12 | 13 | #ifndef MAXPATHLEN 14 | # define MAXPATHLEN 1024 15 | #endif 16 | 17 | static void make_filename(const char *name, char *filename); 18 | static void make_init_fn(const char *name, char *init_fn); 19 | 20 | #ifdef _WIN32 21 | # include 22 | #else 23 | typedef void *HMODULE; 24 | typedef void (*FARPROC)(); 25 | #define SUN_DL 26 | #include 27 | #endif 28 | 29 | #ifdef _WIN32 30 | 31 | #define PREFIX "" 32 | #define SUFFIX ".dll" 33 | 34 | static void display_w32_error_msg(const char *additional_message) 35 | { 36 | LPVOID msg_buf; 37 | 38 | FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER, 39 | NULL, GetLastError(), 0, 40 | (LPTSTR)&msg_buf, 0, NULL); 41 | fprintf(stderr, "scheme load-extension: %s: %s", additional_message, msg_buf); 42 | LocalFree(msg_buf); 43 | } 44 | 45 | static HMODULE dl_attach(const char *module) { 46 | HMODULE dll = LoadLibrary(module); 47 | if (!dll) display_w32_error_msg(module); 48 | return dll; 49 | } 50 | 51 | static FARPROC dl_proc(HMODULE mo, const char *proc) { 52 | FARPROC procedure = GetProcAddress(mo,proc); 53 | if (!procedure) display_w32_error_msg(proc); 54 | return procedure; 55 | } 56 | 57 | static void dl_detach(HMODULE mo) { 58 | (void)FreeLibrary(mo); 59 | } 60 | 61 | #elif defined(SUN_DL) 62 | 63 | #include 64 | 65 | #define PREFIX "lib" 66 | #define SUFFIX ".so" 67 | 68 | static HMODULE dl_attach(const char *module) { 69 | HMODULE so=dlopen(module,RTLD_LAZY); 70 | if(!so) { 71 | fprintf(stderr, "Error loading scheme extension \"%s\": %s\n", module, dlerror()); 72 | } 73 | return so; 74 | } 75 | 76 | static FARPROC dl_proc(HMODULE mo, const char *proc) { 77 | const char *errmsg; 78 | FARPROC fp=(FARPROC)dlsym(mo,proc); 79 | if ((errmsg = dlerror()) == 0) { 80 | return fp; 81 | } 82 | fprintf(stderr, "Error initializing scheme module \"%s\": %s\n", proc, errmsg); 83 | return 0; 84 | } 85 | 86 | static void dl_detach(HMODULE mo) { 87 | (void)dlclose(mo); 88 | } 89 | #endif 90 | 91 | pointer scm_load_ext(scheme *sc, pointer args) 92 | { 93 | pointer first_arg; 94 | pointer retval; 95 | char filename[MAXPATHLEN], init_fn[MAXPATHLEN+6]; 96 | char *name; 97 | HMODULE dll_handle; 98 | void (*module_init)(scheme *sc); 99 | 100 | if ((args != sc->NIL) && is_string((first_arg = pair_car(args)))) { 101 | name = string_value(first_arg); 102 | make_filename(name,filename); 103 | make_init_fn(name,init_fn); 104 | dll_handle = dl_attach(filename); 105 | if (dll_handle == 0) { 106 | retval = sc -> F; 107 | } 108 | else { 109 | module_init = (void(*)(scheme *))dl_proc(dll_handle, init_fn); 110 | if (module_init != 0) { 111 | (*module_init)(sc); 112 | retval = sc -> T; 113 | } 114 | else { 115 | retval = sc->F; 116 | } 117 | } 118 | } 119 | else { 120 | retval = sc -> F; 121 | } 122 | 123 | return(retval); 124 | } 125 | 126 | static void make_filename(const char *name, char *filename) { 127 | strcpy(filename,name); 128 | strcat(filename,SUFFIX); 129 | } 130 | 131 | static void make_init_fn(const char *name, char *init_fn) { 132 | const char *p=strrchr(name,'/'); 133 | if(p==0) { 134 | p=name; 135 | } else { 136 | p++; 137 | } 138 | strcpy(init_fn,"init_"); 139 | strcat(init_fn,p); 140 | } 141 | 142 | 143 | /* 144 | Local variables: 145 | c-file-style: "k&r" 146 | End: 147 | */ 148 | -------------------------------------------------------------------------------- /src/dynload.h: -------------------------------------------------------------------------------- 1 | /* dynload.h */ 2 | /* Original Copyright (c) 1999 Alexander Shendi */ 3 | /* Modifications for NT and dl_* interface: D. Souflis */ 4 | 5 | #ifndef DYNLOAD_H 6 | #define DYNLOAD_H 7 | 8 | #include "scheme.h" 9 | 10 | SCHEME_EXPORT pointer scm_load_ext(scheme *sc, pointer arglist); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /src/hack.txt: -------------------------------------------------------------------------------- 1 | 2 | How to hack TinyScheme 3 | ---------------------- 4 | 5 | TinyScheme is easy to learn and modify. It is structured like a 6 | meta-interpreter, only it is written in C. All data are Scheme 7 | objects, which facilitates both understanding/modifying the 8 | code and reifying the interpreter workings. 9 | 10 | In place of a dry description, we will pace through the addition 11 | of a useful new datatype: garbage-collected memory blocks. 12 | The interface will be: 13 | 14 | (make-block []) makes a new block of the specified size 15 | optionally filling it with a specified byte 16 | (block? ) 17 | (block-length ) 18 | (block-ref ) retrieves byte at location 19 | (block-set! ) modifies byte at location 20 | 21 | In the sequel, lines that begin with '>' denote lines to add to the 22 | code. Lines that begin with '|' are just citations of existing code. 23 | Lines that begin with X denote lines to be removed from the code. 24 | 25 | First of all, we need to assign a typeid to our new type. Typeids 26 | in TinyScheme are small integers declared in the scheme_types enum 27 | located near the top of the scheme.c file; it begins with T_STRING. 28 | Add a new entry at the end, say T_MEMBLOCK. Remember to adjust the 29 | value of T_LAST_SYTEM_TYPE when adding new entries. There can be at 30 | most 31 types, but you don't have to worry about that limit yet. 31 | 32 | | T_ENVIRONMENT=14, 33 | X T_LAST_SYSTEM_TYPE=14 34 | > T_MEMBLOCK=15, 35 | > T_LAST_SYSTEM_TYPE=15 36 | | }; 37 | 38 | 39 | Then, some helper macros would be useful. Go to where is_string() 40 | and the rest are defined and add: 41 | 42 | > INTERFACE INLINE int is_memblock(pointer p) { return (type(p)==T_MEMBLOCK); } 43 | 44 | This actually is a function, because it is meant to be exported by 45 | scheme.h. If no foreign function will ever manipulate a memory block, 46 | you can instead define it as a macro: 47 | 48 | > #define is_memblock(p) (type(p)==T_MEMBLOCK) 49 | 50 | Then we make space for the new type in the main data structure: 51 | struct cell. As it happens, the _string part of the union _object 52 | (that is used to hold character strings) has two fields that suit us: 53 | 54 | | struct { 55 | | char *_svalue; 56 | | int _keynum; 57 | | } _string; 58 | 59 | We can use _svalue to hold the actual pointer and _keynum to hold its 60 | length. If we couln't reuse existing fields, we could always add other 61 | alternatives in union _object. 62 | 63 | We then proceed to write the function that actually makes a new block. 64 | For conformance reasons, we name it mk_memblock 65 | 66 | > static pointer mk_memblock(scheme *sc, int len, char fill) { 67 | > pointer x; 68 | > char *p=(char*)sc->malloc(len); 69 | > 70 | > if(p==0) { 71 | > return sc->NIL; 72 | > } 73 | > x = get_cell(sc, sc->NIL, sc->NIL); 74 | > 75 | > typeflag(x) = T_MEMBLOCK|T_ATOM; 76 | > strvalue(x)=p; 77 | > keynum(x)=len; 78 | > memset(p,fill,len); 79 | > return (x); 80 | > } 81 | 82 | The memory used by the MEMBLOCK will have to be freed when the cell 83 | is reclaimed during garbage collection. There is a placeholder for 84 | that staff, function finalize_cell(), currently handling strings only. 85 | 86 | | static void finalize_cell(scheme *sc, pointer a) { 87 | | if(is_string(a)) { 88 | | sc->free(strvalue(a)); 89 | > } else if(is_memblock(a)) { 90 | > sc->free(strvalue(a)); 91 | | } else if(is_port(a)) { 92 | 93 | There are no MEMBLOCK literals, so we don't concern ourselves with 94 | the READER part (yet!). We must cater to the PRINTER, though. We 95 | add one case more in atom2str(). 96 | 97 | | } else if (iscontinuation(l)) { 98 | | p = "#"; 99 | > } else if (is_memblock(l)) { 100 | > p = "#"; 101 | | } else { 102 | 103 | Whenever a MEMBLOCK is displayed, it will look like that. 104 | Now, we must add the interface functions: constructor, predicate, 105 | accessor, modifier. We must in fact create new op-codes for the virtual 106 | machine underlying TinyScheme. Since version 1.30, TinyScheme uses 107 | macros and a single source text to keep the enums and the dispatch table 108 | in sync. The op-codes are defined in the opdefines.h file with one line 109 | for each op-code. The lines in the file have six columns between the 110 | starting _OPDEF( and ending ): A, B, C, D, E, and OP. 111 | Note that this file uses unusually long lines to accomodate all the 112 | information; adjust your editor to handle this. 113 | 114 | The purpose of the columns is: 115 | - Column A is the name of the subroutine that handles the op-code. 116 | - Column B is the name of the op-code function. 117 | - Columns C and D are the minimum and maximum number of arguments 118 | that are accepted by the op-code. 119 | - Column E is a set of flags that tells the interpreter the type of 120 | each of the arguments expected by the op-code. 121 | - Column OP is used in the scheme_opcodes enum located in the 122 | scheme-private.h file. 123 | 124 | Op-codes are really just tags for a huge C switch, only this switch 125 | is broken up in to a number of different opexe_X functions. The 126 | correspondence is made in table "dispatch_table". There, we assign 127 | the new op-codes to opexe_2, where the equivalent ones for vectors 128 | are situated. We also assign a name for them, and specify the minimum 129 | and maximum arity (number of expected arguments). INF_ARG as a maximum 130 | arity means "unlimited". 131 | 132 | For reasons of consistency, we add the new op-codes right after those 133 | for vectors: 134 | 135 | | _OP_DEF(opexe_2, "vector-set!", 3, 3, TST_VECTOR TST_NATURAL TST_ANY, OP_VECSET ) 136 | > _OP_DEF(opexe_2, "make-block", 1, 2, TST_NATURAL TST_CHAR, OP_MKBLOCK ) 137 | > _OP_DEF(opexe_2, "block-length", 1, 1, T_MEMBLOCK, OP_BLOCKLEN ) 138 | > _OP_DEF(opexe_2, "block-ref", 2, 2, T_MEMBLOCK TST_NATURAL, OP_BLOCKREF ) 139 | > _OP_DEF(opexe_2, "block-set!", 1, 1, T_MEMBLOCK TST_NATURAL TST_CHAR, OP_BLOCKSET ) 140 | | _OP_DEF(opexe_3, "not", 1, 1, TST_NONE, OP_NOT ) 141 | 142 | We add the predicate along with the other predicates in opexe_3: 143 | 144 | | _OP_DEF(opexe_3, "vector?", 1, 1, TST_ANY, OP_VECTORP ) 145 | > _OP_DEF(opexe_3, "block?", 1, 1, TST_ANY, OP_BLOCKP ) 146 | | _OP_DEF(opexe_3, "eq?", 2, 2, TST_ANY, OP_EQ ) 147 | 148 | All that remains is to write the actual code to do the processing and 149 | add it to the switch statement in opexe_2, after the OP_VECSET case. 150 | 151 | > case OP_MKBLOCK: { /* make-block */ 152 | > int fill=0; 153 | > int len; 154 | > 155 | > if(!isnumber(car(sc->args))) { 156 | > Error_1(sc,"make-block: not a number:",car(sc->args)); 157 | > } 158 | > len=ivalue(car(sc->args)); 159 | > if(len<=0) { 160 | > Error_1(sc,"make-block: not positive:",car(sc->args)); 161 | > } 162 | > 163 | > if(cdr(sc->args)!=sc->NIL) { 164 | > if(!isnumber(cadr(sc->args)) || ivalue(cadr(sc->args))<0) { 165 | > Error_1(sc,"make-block: not a positive number:",cadr(sc->args)); 166 | > } 167 | > fill=charvalue(cadr(sc->args))%255; 168 | > } 169 | > s_return(sc,mk_memblock(sc,len,(char)fill)); 170 | > } 171 | > 172 | > case OP_BLOCKLEN: /* block-length */ 173 | > if(!ismemblock(car(sc->args))) { 174 | > Error_1(sc,"block-length: not a memory block:",car(sc->args)); 175 | > } 176 | > s_return(sc,mk_integer(sc,keynum(car(sc->args)))); 177 | > 178 | > case OP_BLOCKREF: { /* block-ref */ 179 | > char *str; 180 | > int index; 181 | > 182 | > if(!ismemblock(car(sc->args))) { 183 | > Error_1(sc,"block-ref: not a memory block:",car(sc->args)); 184 | > } 185 | > str=strvalue(car(sc->args)); 186 | > 187 | > if(cdr(sc->args)==sc->NIL) { 188 | > Error_0(sc,"block-ref: needs two arguments"); 189 | > } 190 | > if(!isnumber(cadr(sc->args))) { 191 | > Error_1(sc,"block-ref: not a number:",cadr(sc->args)); 192 | > } 193 | > index=ivalue(cadr(sc->args)); 194 | > 195 | > if(index<0 || index>=keynum(car(sc->args))) { 196 | > Error_1(sc,"block-ref: out of bounds:",cadr(sc->args)); 197 | > } 198 | > 199 | > s_return(sc,mk_integer(sc,str[index])); 200 | > } 201 | > 202 | > case OP_BLOCKSET: { /* block-set! */ 203 | > char *str; 204 | > int index; 205 | > int c; 206 | > 207 | > if(!ismemblock(car(sc->args))) { 208 | > Error_1(sc,"block-set!: not a memory block:",car(sc->args)); 209 | > } 210 | > if(isimmutable(car(sc->args))) { 211 | > Error_1(sc,"block-set!: unable to alter immutable memory block:",car(sc->args)); 212 | > } 213 | > str=strvalue(car(sc->args)); 214 | > 215 | > if(cdr(sc->args)==sc->NIL) { 216 | > Error_0(sc,"block-set!: needs three arguments"); 217 | > } 218 | > if(!isnumber(cadr(sc->args))) { 219 | > Error_1(sc,"block-set!: not a number:",cadr(sc->args)); 220 | > } 221 | > index=ivalue(cadr(sc->args)); 222 | > if(index<0 || index>=keynum(car(sc->args))) { 223 | > Error_1(sc,"block-set!: out of bounds:",cadr(sc->args)); 224 | > } 225 | > 226 | > if(cddr(sc->args)==sc->NIL) { 227 | > Error_0(sc,"block-set!: needs three arguments"); 228 | > } 229 | > if(!isinteger(caddr(sc->args))) { 230 | > Error_1(sc,"block-set!: not an integer:",caddr(sc->args)); 231 | > } 232 | > c=ivalue(caddr(sc->args))%255; 233 | > 234 | > str[index]=(char)c; 235 | > s_return(sc,car(sc->args)); 236 | > } 237 | 238 | Finally, do the same for the predicate in opexe_3. 239 | 240 | | case OP_VECTORP: /* vector? */ 241 | | s_retbool(is_vector(car(sc->args))); 242 | > case OP_BLOCKP: /* block? */ 243 | > s_retbool(is_memblock(car(sc->args))); 244 | | case OP_EQ: /* eq? */ 245 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | /* T I N Y S C H E M E 1 . 50 2 | * Continued by armornick (March 2016) 3 | * Original work by Dimitrios Souflis (dsouflis@acm.org) 4 | * 5 | * Based on MiniScheme (original credits follow) 6 | * (MINISCM) coded by Atsushi Moriwaki (11/5/1989) 7 | * (MINISCM) E-MAIL : moriwaki@kurims.kurims.kyoto-u.ac.jp 8 | * (MINISCM) This version has been modified by R.C. Secrist. 9 | * (MINISCM) 10 | * (MINISCM) Mini-Scheme is now maintained by Akira KIDA. 11 | * (MINISCM) 12 | * (MINISCM) This is a revised and modified version by Akira KIDA. 13 | * (MINISCM) current version is 0.85k4 (15 May 1994) 14 | * (MINISCM) 15 | * 16 | */ 17 | 18 | #include "scheme.h" 19 | #include "dynload.h" 20 | 21 | #include 22 | #include 23 | 24 | 25 | #ifndef WIN32 26 | # include 27 | #endif 28 | #ifdef WIN32 29 | # include /* access */ 30 | # include /* off_t */ 31 | #define snprintf _snprintf 32 | #endif 33 | 34 | #define banner "TinyScheme 1.50" 35 | 36 | #ifndef InitFile 37 | # define InitFile "init.scm" 38 | #endif 39 | 40 | 41 | #if defined(__APPLE__) && !defined (OSX) 42 | int main() 43 | { 44 | extern MacTS_main(int argc, char **argv); 45 | char** argv; 46 | int argc = ccommand(&argv); 47 | MacTS_main(argc,argv); 48 | return 0; 49 | } 50 | int MacTS_main(int argc, char **argv) { 51 | #else 52 | 53 | int main(int argc, char **argv) { 54 | #endif 55 | 56 | scheme *sc; 57 | FILE *fin; 58 | char *file_name=InitFile; 59 | int retcode; 60 | int isfile=1; 61 | 62 | if(argc==1) { 63 | printf("%s\n", banner); 64 | } 65 | 66 | if(argc==2 && strcmp(argv[1],"-?")==0) { 67 | printf("Usage: tinyscheme -?\n"); 68 | printf("or: tinyscheme [ ...]\n"); 69 | printf("followed by\n"); 70 | printf(" -1 [ ...]\n"); 71 | printf(" -c [ ...]\n"); 72 | printf("assuming that the executable is named tinyscheme.\n"); 73 | printf("Use - as filename for stdin.\n"); 74 | return 1; 75 | } 76 | 77 | // printf("initializing scheme environment\n"); 78 | if(!scheme_init(&sc)) { 79 | fprintf(stderr,"Could not initialize!\n"); 80 | return 2; 81 | } 82 | 83 | scheme_set_input_port_file(sc, stdin); 84 | scheme_set_output_port_file(sc, stdout); 85 | 86 | #if USE_DL 87 | // printf("defining extension loader\n"); 88 | scheme_define(sc,scheme_global_env(sc),scheme_symbol(sc,"load-extension"),scheme_foreign_func(sc, scm_load_ext)); 89 | #endif 90 | 91 | argv++; 92 | if(access(file_name,0)!=0) { 93 | char *p=getenv("TINYSCHEMEINIT"); 94 | if(p!=0) { 95 | file_name=p; 96 | } 97 | } 98 | do { 99 | if(strcmp(file_name,"-")==0) { 100 | fin=stdin; 101 | } else if(strcmp(file_name,"-1")==0 || strcmp(file_name,"-c")==0) { 102 | pointer args=scheme_nil(sc); 103 | isfile=file_name[1]=='1'; 104 | file_name=*argv++; 105 | if(strcmp(file_name,"-")==0) { 106 | fin=stdin; 107 | } else if(isfile) { 108 | fin=fopen(file_name,"r"); 109 | } 110 | for(;*argv;argv++) { 111 | pointer value=scheme_string(sc,*argv); 112 | args=cons(sc,value,args); 113 | } 114 | args=scheme_reverse_in_place(sc,scheme_nil(sc),args); 115 | scheme_define(sc,scheme_global_env(sc),scheme_symbol(sc,"*args*"),args); 116 | 117 | } else { 118 | fin=fopen(file_name,"r"); 119 | } 120 | if(isfile && fin==0) { 121 | fprintf(stderr,"Could not open file %s\n",file_name); 122 | } else { 123 | if(isfile) { 124 | scheme_load_named_file(sc,fin,file_name); 125 | } else { 126 | scheme_load_string(sc,file_name); 127 | } 128 | if(!isfile || fin!=stdin) { 129 | if(scheme_retcode(sc) != 0) { 130 | fprintf(stderr,"Errors encountered reading %s\n",file_name); 131 | } 132 | if(isfile) { 133 | fclose(fin); 134 | } 135 | } 136 | } 137 | file_name=*argv++; 138 | } while(file_name!=0); 139 | if(argc==1) { 140 | scheme_load_named_file(sc,stdin,0); 141 | } 142 | retcode=scheme_retcode(sc); 143 | scheme_deinit(sc); 144 | 145 | return retcode; 146 | } 147 | -------------------------------------------------------------------------------- /src/re/COPYRIGHT: -------------------------------------------------------------------------------- 1 | Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. 2 | This software is not subject to any license of the American Telephone 3 | and Telegraph Company or of the Regents of the University of California. 4 | 5 | Permission is granted to anyone to use this software for any purpose on 6 | any computer system, and to alter it and redistribute it, subject 7 | to the following restrictions: 8 | 9 | 1. The author is not responsible for the consequences of use of this 10 | software, no matter how awful, even if they arise from flaws in it. 11 | 12 | 2. The origin of this software must not be misrepresented, either by 13 | explicit claim or by omission. Since few users ever read sources, 14 | credits must appear in the documentation. 15 | 16 | 3. Altered versions must be plainly marked as such, and must not be 17 | misrepresented as being the original software. Since few users 18 | ever read sources, credits must appear in the documentation. 19 | 20 | 4. This notice may not be removed or altered. 21 | -------------------------------------------------------------------------------- /src/re/Makefile.in: -------------------------------------------------------------------------------- 1 | SHELL = /bin/sh 2 | 3 | srcdir=@srcdir@ 4 | VPATH=@srcdir@ 5 | 6 | CC=@CC@ 7 | RANLIB=@RANLIB@ 8 | 9 | # You probably want to take -DREDEBUG out of CFLAGS, and put something like 10 | # -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of 11 | # internal assertion checking and some debugging facilities). 12 | # Put -Dconst= in for a pre-ANSI compiler. 13 | # Do not take -DPOSIX_MISTAKE out. 14 | # REGCFLAGS isn't important to you (it's for my use in some special contexts). 15 | CFLAGS=-I$(srcdir) -I. -DPOSIX_MISTAKE @CFLAGS@ 16 | 17 | # If you have a pre-ANSI compiler, put -o into MKHFLAGS. If you want 18 | # the Berkeley __P macro, put -b in. 19 | MKHFLAGS= 20 | 21 | # Flags for linking but not compiling, if any. 22 | LDFLAGS=@LDFLAGS@ 23 | 24 | # Extra libraries for linking, if any. 25 | LIBS= 26 | 27 | # Internal stuff, should not need changing. 28 | OBJPRODN=regcomp.o regexec.o regerror.o regfree.o 29 | OBJS=$(OBJPRODN) split.o debug.o main.o 30 | H=cclass.h cname.h regex2.h utils.h 31 | REGSRC=regcomp.c regerror.c regexec.c regfree.c 32 | ALLSRC=$(REGSRC) engine.c debug.c main.c split.c 33 | 34 | # Stuff that matters only if you're trying to lint the package. 35 | LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG 36 | LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c main.c 37 | JUNKLINT=possible pointer alignment|null effect 38 | 39 | # arrangements to build forward-reference header files 40 | .SUFFIXES: .ih .h 41 | .c.ih: 42 | sh $(srcdir)/mkh $(MKHFLAGS) -p $< >$@ 43 | 44 | all lib: libregex.a 45 | 46 | libregex.a: $(OBJPRODN) 47 | rm -f libregex.a 48 | ar cr libregex.a $(OBJPRODN) 49 | $(RANLIB) libregex.a 50 | 51 | default: r 52 | 53 | purge: 54 | rm -f *.o 55 | 56 | # stuff to build regex.h 57 | REGEXH=regex.h 58 | REGEXHSRC=regex2.h $(REGSRC) 59 | $(REGEXH): $(REGEXHSRC) mkh 60 | sh $(srcdir)/./mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.h 61 | #cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h 62 | #rm -f regex.tmp 63 | 64 | # dependencies 65 | $(OBJPRODN) debug.o: utils.h regex.h regex2.h 66 | regcomp.o: cclass.h cname.h regcomp.ih 67 | regexec.o: engine.c engine.ih 68 | regerror.o: regerror.ih 69 | debug.o: debug.ih 70 | main.o: main.ih 71 | 72 | # tester 73 | re: $(OBJS) 74 | $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@ 75 | 76 | # regression test 77 | r: re tests 78 | ./re &1 | egrep -v '$(JUNKLINT)' | tee lint 101 | 102 | fullprint: 103 | ti README WHATSNEW notes todo | list 104 | ti *.h | list 105 | list *.c 106 | list regex.3 regex.7 107 | 108 | print: 109 | ti README WHATSNEW notes todo | list 110 | ti *.h | list 111 | list reg*.c engine.c 112 | 113 | 114 | mf.tmp: Makefile 115 | sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@ 116 | 117 | DTRH=cclass.h cname.h regex2.h utils.h 118 | PRE=COPYRIGHT README WHATSNEW 119 | POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch] 120 | FILES=$(PRE) Makefile $(POST) 121 | DTR=$(PRE) Makefile=mf.tmp $(POST) 122 | dtr: $(FILES) mf.tmp 123 | makedtr $(DTR) >$@ 124 | rm mf.tmp 125 | 126 | cio: $(FILES) 127 | cio $(FILES) 128 | 129 | rdf: $(FILES) 130 | rcsdiff -c $(FILES) 2>&1 | p 131 | 132 | # various forms of cleanup 133 | tidy: 134 | rm -f junk* core core.* *.core dtr *.tmp lint 135 | 136 | clean: tidy 137 | rm -f *.o *.s re libregex.a 138 | 139 | # don't do this one unless you know what you're doing 140 | spotless: clean 141 | rm -f mkh regex.h 142 | -------------------------------------------------------------------------------- /src/re/README: -------------------------------------------------------------------------------- 1 | alpha3.4 release. 2 | Thu Mar 17 23:17:18 EST 1994 3 | henry@zoo.toronto.edu 4 | 5 | See WHATSNEW for change listing. 6 | 7 | installation notes: 8 | -------- 9 | Read the comments at the beginning of Makefile before running. 10 | 11 | Utils.h contains some things that just might have to be modified on 12 | some systems, as well as a nested include (ugh) of . 13 | 14 | The "fake" directory contains quick-and-dirty fakes for some header 15 | files and routines that old systems may not have. Note also that 16 | -DUSEBCOPY will make utils.h substitute bcopy() for memmove(). 17 | 18 | After that, "make r" will build regcomp.o, regexec.o, regfree.o, 19 | and regerror.o (the actual routines), bundle them together into a test 20 | program, and run regression tests on them. No output is good output. 21 | 22 | "make lib" builds just the .o files for the actual routines (when 23 | you're happy with testing and have adjusted CFLAGS for production), 24 | and puts them together into libregex.a. You can pick up either the 25 | library or *.o ("make lib" makes sure there are no other .o files left 26 | around to confuse things). 27 | 28 | Main.c, debug.c, split.c are used for regression testing but are not part 29 | of the RE routines themselves. 30 | 31 | Regex.h goes in /usr/include. All other .h files are internal only. 32 | -------- 33 | -------------------------------------------------------------------------------- /src/re/README.1st: -------------------------------------------------------------------------------- 1 | TinyScheme RE (Regular Expressions) extension 2 | --------------------------------------------- 3 | Version 1.2, August 2002 4 | 5 | The bulk of this directory is the regular expression library written 6 | by Henry Spencer (see file README and COPYRIGHT). 7 | 8 | Two files were added to produce the TinyScheme regular expression 9 | library, re.so: re.c and re.makefile. The included re.makefile was contributed 10 | initially by Stephen Gildea and should be adaptable to all Unix systems. 11 | 12 | The makefile produces a DLL named re.so. For now, it contains just 13 | a single foreign function (re-match ). It returns 14 | true (string matches pattern) or false. If it is called with an 15 | extra parameter, which should be a vector, overwrites as many elements 16 | of the vector as needed with the strings that matched the corresponding 17 | parenthesized subexpressions inside . 18 | 19 | It is not fully tested, so use with caution. 20 | 21 | Load the extension from inside TinyScheme using 22 | (load-extension "re/re") 23 | assuming that re.so is in the directory "re". 24 | 25 | Load "re.scm" if you wish to use v.1.1 behavior. 26 | 27 | dsouflis@acm.org 28 | -------------------------------------------------------------------------------- /src/re/WHATSNEW: -------------------------------------------------------------------------------- 1 | New in 1.3: Include scheme-private.h in re.c to fix compilation errors 2 | when used with TinyScheme 1.40 and later. 3 | 4 | New in alpha3.4: The complex bug alluded to below has been fixed (in a 5 | slightly kludgey temporary way that may hurt efficiency a bit; this is 6 | another "get it out the door for 4.4" release). The tests at the end of 7 | the tests file have accordingly been uncommented. The primary sign of 8 | the bug was that something like a?b matching ab matched b rather than ab. 9 | (The bug was essentially specific to this exact situation, else it would 10 | have shown up earlier.) 11 | 12 | New in alpha3.3: The definition of word boundaries has been altered 13 | slightly, to more closely match the usual programming notion that "_" 14 | is an alphabetic. Stuff used for pre-ANSI systems is now in a subdir, 15 | and the makefile no longer alludes to it in mysterious ways. The 16 | makefile has generally been cleaned up some. Fixes have been made 17 | (again!) so that the regression test will run without -DREDEBUG, at 18 | the cost of weaker checking. A workaround for a bug in some folks' 19 | has been added. And some more things have been added to 20 | tests, including a couple right at the end which are commented out 21 | because the code currently flunks them (complex bug; fix coming). 22 | Plus the usual minor cleanup. 23 | 24 | New in alpha3.2: Assorted bits of cleanup and portability improvement 25 | (the development base is now a BSDI system using GCC instead of an ancient 26 | Sun system, and the newer compiler exposed some glitches). Fix for a 27 | serious bug that affected REs using many [] (including REG_ICASE REs 28 | because of the way they are implemented), *sometimes*, depending on 29 | memory-allocation patterns. The header-file prototypes no longer name 30 | the parameters, avoiding possible name conflicts. The possibility that 31 | some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is 32 | now handled gracefully. "uchar" is no longer used as an internal type 33 | name (too many people have the same idea). Still the same old lousy 34 | performance, alas. 35 | 36 | New in alpha3.1: Basically nothing, this release is just a bookkeeping 37 | convenience. Stay tuned. 38 | 39 | New in alpha3.0: Performance is no better, alas, but some fixes have been 40 | made and some functionality has been added. (This is basically the "get 41 | it out the door in time for 4.4" release.) One bug fix: regfree() didn't 42 | free the main internal structure (how embarrassing). It is now possible 43 | to put NULs in either the RE or the target string, using (resp.) a new 44 | REG_PEND flag and the old REG_STARTEND flag. The REG_NOSPEC flag to 45 | regcomp() makes all characters ordinary, so you can match a literal 46 | string easily (this will become more useful when performance improves!). 47 | There are now primitives to match beginnings and ends of words, although 48 | the syntax is disgusting and so is the implementation. The REG_ATOI 49 | debugging interface has changed a bit. And there has been considerable 50 | internal cleanup of various kinds. 51 | 52 | New in alpha2.3: Split change list out of README, and moved flags notes 53 | into Makefile. Macro-ized the name of regex(7) in regex(3), since it has 54 | to change for 4.4BSD. Cleanup work in engine.c, and some new regression 55 | tests to catch tricky cases thereof. 56 | 57 | New in alpha2.2: Out-of-date manpages updated. Regerror() acquires two 58 | small extensions -- REG_ITOA and REG_ATOI -- which avoid debugging kludges 59 | in my own test program and might be useful to others for similar purposes. 60 | The regression test will now compile (and run) without REDEBUG. The 61 | BRE \$ bug is fixed. Most uses of "uchar" are gone; it's all chars now. 62 | Char/uchar parameters are now written int/unsigned, to avoid possible 63 | portability problems with unpromoted parameters. Some unsigned casts have 64 | been introduced to minimize portability problems with shifting into sign 65 | bits. 66 | 67 | New in alpha2.1: Lots of little stuff, cleanup and fixes. The one big 68 | thing is that regex.h is now generated, using mkh, rather than being 69 | supplied in the distribution; due to circularities in dependencies, 70 | you have to build regex.h explicitly by "make h". The two known bugs 71 | have been fixed (and the regression test now checks for them), as has a 72 | problem with assertions not being suppressed in the absence of REDEBUG. 73 | No performance work yet. 74 | 75 | New in alpha2: Backslash-anything is an ordinary character, not an 76 | error (except, of course, for the handful of backslashed metacharacters 77 | in BREs), which should reduce script breakage. The regression test 78 | checks *where* null strings are supposed to match, and has generally 79 | been tightened up somewhat. Small bug fixes in parameter passing (not 80 | harmful, but technically errors) and some other areas. Debugging 81 | invoked by defining REDEBUG rather than not defining NDEBUG. 82 | 83 | New in alpha+3: full prototyping for internal routines, using a little 84 | helper program, mkh, which extracts prototypes given in stylized comments. 85 | More minor cleanup. Buglet fix: it's CHAR_BIT, not CHAR_BITS. Simple 86 | pre-screening of input when a literal string is known to be part of the 87 | RE; this does wonders for performance. 88 | 89 | New in alpha+2: minor bits of cleanup. Notably, the number "32" for the 90 | word width isn't hardwired into regexec.c any more, the public header 91 | file prototypes the functions if __STDC__ is defined, and some small typos 92 | in the manpages have been fixed. 93 | 94 | New in alpha+1: improvements to the manual pages, and an important 95 | extension, the REG_STARTEND option to regexec(). 96 | -------------------------------------------------------------------------------- /src/re/cclass.h: -------------------------------------------------------------------------------- 1 | /* character-class table */ 2 | static struct cclass { 3 | char *name; 4 | char *chars; 5 | char *multis; 6 | } cclasses[] = { 7 | { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 8 | 0123456789", "" }, 9 | { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", 10 | "" }, 11 | { "blank", " \t", "" }, 12 | { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ 13 | \25\26\27\30\31\32\33\34\35\36\37\177", "" }, 14 | { "digit", "0123456789", "" }, 15 | { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 16 | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", 17 | "" }, 18 | { "lower", "abcdefghijklmnopqrstuvwxyz", 19 | "" }, 20 | { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 21 | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", 22 | "" }, 23 | { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", 24 | "" }, 25 | { "space", "\t\n\v\f\r ", "" }, 26 | { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 27 | "" }, 28 | { "xdigit", "0123456789ABCDEFabcdef", 29 | "" }, 30 | { NULL, 0, "" } 31 | }; 32 | -------------------------------------------------------------------------------- /src/re/cname.h: -------------------------------------------------------------------------------- 1 | /* character-name table */ 2 | static struct cname { 3 | char *name; 4 | char code; 5 | } cnames[] = { 6 | { "NUL", '\0' }, 7 | { "SOH", '\001' }, 8 | { "STX", '\002' }, 9 | { "ETX", '\003' }, 10 | { "EOT", '\004' }, 11 | { "ENQ", '\005' }, 12 | { "ACK", '\006' }, 13 | { "BEL", '\007' }, 14 | { "alert", '\007' }, 15 | { "BS", '\010' }, 16 | { "backspace", '\b' }, 17 | { "HT", '\011' }, 18 | { "tab", '\t' }, 19 | { "LF", '\012' }, 20 | { "newline", '\n' }, 21 | { "VT", '\013' }, 22 | { "vertical-tab", '\v' }, 23 | { "FF", '\014' }, 24 | { "form-feed", '\f' }, 25 | { "CR", '\015' }, 26 | { "carriage-return", '\r' }, 27 | { "SO", '\016' }, 28 | { "SI", '\017' }, 29 | { "DLE", '\020' }, 30 | { "DC1", '\021' }, 31 | { "DC2", '\022' }, 32 | { "DC3", '\023' }, 33 | { "DC4", '\024' }, 34 | { "NAK", '\025' }, 35 | { "SYN", '\026' }, 36 | { "ETB", '\027' }, 37 | { "CAN", '\030' }, 38 | { "EM", '\031' }, 39 | { "SUB", '\032' }, 40 | { "ESC", '\033' }, 41 | { "IS4", '\034' }, 42 | { "FS", '\034' }, 43 | { "IS3", '\035' }, 44 | { "GS", '\035' }, 45 | { "IS2", '\036' }, 46 | { "RS", '\036' }, 47 | { "IS1", '\037' }, 48 | { "US", '\037' }, 49 | { "space", ' ' }, 50 | { "exclamation-mark", '!' }, 51 | { "quotation-mark", '"' }, 52 | { "number-sign", '#' }, 53 | { "dollar-sign", '$' }, 54 | { "percent-sign", '%' }, 55 | { "ampersand", '&' }, 56 | { "apostrophe", '\'' }, 57 | { "left-parenthesis", '(' }, 58 | { "right-parenthesis", ')' }, 59 | { "asterisk", '*' }, 60 | { "plus-sign", '+' }, 61 | { "comma", ',' }, 62 | { "hyphen", '-' }, 63 | { "hyphen-minus", '-' }, 64 | { "period", '.' }, 65 | { "full-stop", '.' }, 66 | { "slash", '/' }, 67 | { "solidus", '/' }, 68 | { "zero", '0' }, 69 | { "one", '1' }, 70 | { "two", '2' }, 71 | { "three", '3' }, 72 | { "four", '4' }, 73 | { "five", '5' }, 74 | { "six", '6' }, 75 | { "seven", '7' }, 76 | { "eight", '8' }, 77 | { "nine", '9' }, 78 | { "colon", ':' }, 79 | { "semicolon", ';' }, 80 | { "less-than-sign", '<' }, 81 | { "equals-sign", '=' }, 82 | { "greater-than-sign", '>' }, 83 | { "question-mark", '?' }, 84 | { "commercial-at", '@' }, 85 | { "left-square-bracket", '[' }, 86 | { "backslash", '\\' }, 87 | { "reverse-solidus", '\\' }, 88 | { "right-square-bracket", ']' }, 89 | { "circumflex", '^' }, 90 | { "circumflex-accent", '^' }, 91 | { "underscore", '_' }, 92 | { "low-line", '_' }, 93 | { "grave-accent", '`' }, 94 | { "left-brace", '{' }, 95 | { "left-curly-bracket", '{' }, 96 | { "vertical-line", '|' }, 97 | { "right-brace", '}' }, 98 | { "right-curly-bracket", '}' }, 99 | { "tilde", '~' }, 100 | { "DEL", '\177' }, 101 | { NULL, 0 } 102 | }; 103 | -------------------------------------------------------------------------------- /src/re/debug.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "utils.h" 4 | #include "regex2.h" 5 | #include "debug.ih" 6 | 7 | /* 8 | - regprint - print a regexp for debugging 9 | == void regprint(regex_t *r, FILE *d); 10 | */ 11 | void 12 | regprint(r, d) 13 | regex_t *r; 14 | FILE *d; 15 | { 16 | register struct re_guts *g = r->re_g; 17 | register int i; 18 | register int c; 19 | register int last; 20 | int nincat[NC]; 21 | 22 | fprintf(d, "%ld states, %d categories", (long)g->nstates, 23 | g->ncategories); 24 | fprintf(d, ", first %ld last %ld", (long)g->firststate, 25 | (long)g->laststate); 26 | if (g->iflags&USEBOL) 27 | fprintf(d, ", USEBOL"); 28 | if (g->iflags&USEEOL) 29 | fprintf(d, ", USEEOL"); 30 | if (g->iflags&BAD) 31 | fprintf(d, ", BAD"); 32 | if (g->nsub > 0) 33 | fprintf(d, ", nsub=%ld", (long)g->nsub); 34 | if (g->must != NULL) 35 | fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen, 36 | g->must); 37 | if (g->backrefs) 38 | fprintf(d, ", backrefs"); 39 | if (g->nplus > 0) 40 | fprintf(d, ", nplus %ld", (long)g->nplus); 41 | fprintf(d, "\n"); 42 | s_print(g, d); 43 | for (i = 0; i < g->ncategories; i++) { 44 | nincat[i] = 0; 45 | for (c = CHAR_MIN; c <= CHAR_MAX; c++) 46 | if (g->categories[c] == i) 47 | nincat[i]++; 48 | } 49 | fprintf(d, "cc0#%d", nincat[0]); 50 | for (i = 1; i < g->ncategories; i++) 51 | if (nincat[i] == 1) { 52 | for (c = CHAR_MIN; c <= CHAR_MAX; c++) 53 | if (g->categories[c] == i) 54 | break; 55 | fprintf(d, ", %d=%s", i, regchar(c)); 56 | } 57 | fprintf(d, "\n"); 58 | for (i = 1; i < g->ncategories; i++) 59 | if (nincat[i] != 1) { 60 | fprintf(d, "cc%d\t", i); 61 | last = -1; 62 | for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */ 63 | if (c <= CHAR_MAX && g->categories[c] == i) { 64 | if (last < 0) { 65 | fprintf(d, "%s", regchar(c)); 66 | last = c; 67 | } 68 | } else { 69 | if (last >= 0) { 70 | if (last != c-1) 71 | fprintf(d, "-%s", 72 | regchar(c-1)); 73 | last = -1; 74 | } 75 | } 76 | fprintf(d, "\n"); 77 | } 78 | } 79 | 80 | /* 81 | - s_print - print the strip for debugging 82 | == static void s_print(register struct re_guts *g, FILE *d); 83 | */ 84 | static void 85 | s_print(g, d) 86 | register struct re_guts *g; 87 | FILE *d; 88 | { 89 | register sop *s; 90 | register cset *cs; 91 | register int i; 92 | register int done = 0; 93 | register sop opnd; 94 | register int col = 0; 95 | register int last; 96 | register sopno offset = 2; 97 | # define GAP() { if (offset % 5 == 0) { \ 98 | if (col > 40) { \ 99 | fprintf(d, "\n\t"); \ 100 | col = 0; \ 101 | } else { \ 102 | fprintf(d, " "); \ 103 | col++; \ 104 | } \ 105 | } else \ 106 | col++; \ 107 | offset++; \ 108 | } 109 | 110 | if (OP(g->strip[0]) != OEND) 111 | fprintf(d, "missing initial OEND!\n"); 112 | for (s = &g->strip[1]; !done; s++) { 113 | opnd = OPND(*s); 114 | switch (OP(*s)) { 115 | case OEND: 116 | fprintf(d, "\n"); 117 | done = 1; 118 | break; 119 | case OCHAR: 120 | if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL) 121 | fprintf(d, "\\%c", (char)opnd); 122 | else 123 | fprintf(d, "%s", regchar((char)opnd)); 124 | break; 125 | case OBOL: 126 | fprintf(d, "^"); 127 | break; 128 | case OEOL: 129 | fprintf(d, "$"); 130 | break; 131 | case OBOW: 132 | fprintf(d, "\\{"); 133 | break; 134 | case OEOW: 135 | fprintf(d, "\\}"); 136 | break; 137 | case OANY: 138 | fprintf(d, "."); 139 | break; 140 | case OANYOF: 141 | fprintf(d, "[(%ld)", (long)opnd); 142 | cs = &g->sets[opnd]; 143 | last = -1; 144 | for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */ 145 | if (CHIN(cs, i) && i < g->csetsize) { 146 | if (last < 0) { 147 | fprintf(d, "%s", regchar(i)); 148 | last = i; 149 | } 150 | } else { 151 | if (last >= 0) { 152 | if (last != i-1) 153 | fprintf(d, "-%s", 154 | regchar(i-1)); 155 | last = -1; 156 | } 157 | } 158 | fprintf(d, "]"); 159 | break; 160 | case OBACK_: 161 | fprintf(d, "(\\<%ld>", (long)opnd); 162 | break; 163 | case O_BACK: 164 | fprintf(d, "<%ld>\\)", (long)opnd); 165 | break; 166 | case OPLUS_: 167 | fprintf(d, "(+"); 168 | if (OP(*(s+opnd)) != O_PLUS) 169 | fprintf(d, "<%ld>", (long)opnd); 170 | break; 171 | case O_PLUS: 172 | if (OP(*(s-opnd)) != OPLUS_) 173 | fprintf(d, "<%ld>", (long)opnd); 174 | fprintf(d, "+)"); 175 | break; 176 | case OQUEST_: 177 | fprintf(d, "(?"); 178 | if (OP(*(s+opnd)) != O_QUEST) 179 | fprintf(d, "<%ld>", (long)opnd); 180 | break; 181 | case O_QUEST: 182 | if (OP(*(s-opnd)) != OQUEST_) 183 | fprintf(d, "<%ld>", (long)opnd); 184 | fprintf(d, "?)"); 185 | break; 186 | case OLPAREN: 187 | fprintf(d, "((<%ld>", (long)opnd); 188 | break; 189 | case ORPAREN: 190 | fprintf(d, "<%ld>))", (long)opnd); 191 | break; 192 | case OCH_: 193 | fprintf(d, "<"); 194 | if (OP(*(s+opnd)) != OOR2) 195 | fprintf(d, "<%ld>", (long)opnd); 196 | break; 197 | case OOR1: 198 | if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_) 199 | fprintf(d, "<%ld>", (long)opnd); 200 | fprintf(d, "|"); 201 | break; 202 | case OOR2: 203 | fprintf(d, "|"); 204 | if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH) 205 | fprintf(d, "<%ld>", (long)opnd); 206 | break; 207 | case O_CH: 208 | if (OP(*(s-opnd)) != OOR1) 209 | fprintf(d, "<%ld>", (long)opnd); 210 | fprintf(d, ">"); 211 | break; 212 | default: 213 | fprintf(d, "!%ld(%ld)!", OP(*s), opnd); 214 | break; 215 | } 216 | if (!done) 217 | GAP(); 218 | } 219 | } 220 | 221 | /* 222 | - regchar - make a character printable 223 | == static char *regchar(int ch); 224 | */ 225 | static char * /* -> representation */ 226 | regchar(ch) 227 | int ch; 228 | { 229 | static char buf[10]; 230 | 231 | if (isprint(ch) || ch == ' ') 232 | sprintf(buf, "%c", ch); 233 | else 234 | sprintf(buf, "\\%o", ch); 235 | return(buf); 236 | } 237 | -------------------------------------------------------------------------------- /src/re/debug.ih: -------------------------------------------------------------------------------- 1 | /* ========= begin header generated by ./mkh ========= */ 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* === debug.c === */ 7 | void regprint(regex_t *r, FILE *d); 8 | static void s_print(register struct re_guts *g, FILE *d); 9 | static char *regchar(int ch); 10 | 11 | #ifdef __cplusplus 12 | } 13 | #endif 14 | /* ========= end header generated by ./mkh ========= */ 15 | -------------------------------------------------------------------------------- /src/re/engine.ih: -------------------------------------------------------------------------------- 1 | /* ========= begin header generated by ./mkh ========= */ 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* === engine.c === */ 7 | static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags); 8 | static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); 9 | static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev); 10 | static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); 11 | static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); 12 | static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft); 13 | #define BOL (OUT+1) 14 | #define EOL (BOL+1) 15 | #define BOLEOL (BOL+2) 16 | #define NOTHING (BOL+3) 17 | #define BOW (BOL+4) 18 | #define EOW (BOL+5) 19 | #define CODEMAX (BOL+5) /* highest code used */ 20 | #define NONCHAR(c) ((c) > CHAR_MAX) 21 | #define NNONCHAR (CODEMAX-CHAR_MAX) 22 | #ifdef REDEBUG 23 | static void print(struct match *m, char *caption, states st, int ch, FILE *d); 24 | #endif 25 | #ifdef REDEBUG 26 | static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst); 27 | #endif 28 | #ifdef REDEBUG 29 | static char *pchar(int ch); 30 | #endif 31 | 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | /* ========= end header generated by ./mkh ========= */ 36 | -------------------------------------------------------------------------------- /src/re/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "main.ih" 9 | 10 | char *progname; 11 | int debug = 0; 12 | int line = 0; 13 | int status = 0; 14 | 15 | int copts = REG_EXTENDED; 16 | int eopts = 0; 17 | regoff_t startoff = 0; 18 | regoff_t endoff = 0; 19 | 20 | 21 | extern int split(); 22 | extern void regprint(); 23 | 24 | /* 25 | - main - do the simple case, hand off to regress() for regression 26 | */ 27 | int main(argc, argv) 28 | int argc; 29 | char *argv[]; 30 | { 31 | regex_t re; 32 | # define NS 10 33 | regmatch_t subs[NS]; 34 | char erbuf[100]; 35 | int err; 36 | size_t len; 37 | int c; 38 | int errflg = 0; 39 | register int i; 40 | extern int optind; 41 | extern char *optarg; 42 | 43 | progname = argv[0]; 44 | 45 | while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF) 46 | switch (c) { 47 | case 'c': /* compile options */ 48 | copts = options('c', optarg); 49 | break; 50 | case 'e': /* execute options */ 51 | eopts = options('e', optarg); 52 | break; 53 | case 'S': /* start offset */ 54 | startoff = (regoff_t)atoi(optarg); 55 | break; 56 | case 'E': /* end offset */ 57 | endoff = (regoff_t)atoi(optarg); 58 | break; 59 | case 'x': /* Debugging. */ 60 | debug++; 61 | break; 62 | case '?': 63 | default: 64 | errflg++; 65 | break; 66 | } 67 | if (errflg) { 68 | fprintf(stderr, "usage: %s ", progname); 69 | fprintf(stderr, "[-c copt][-C][-d] [re]\n"); 70 | exit(2); 71 | } 72 | 73 | if (optind >= argc) { 74 | regress(stdin); 75 | exit(status); 76 | } 77 | 78 | err = regcomp(&re, argv[optind++], copts); 79 | if (err) { 80 | len = regerror(err, &re, erbuf, sizeof(erbuf)); 81 | fprintf(stderr, "error %s, %d/%d `%s'\n", 82 | eprint(err), len, sizeof(erbuf), erbuf); 83 | exit(status); 84 | } 85 | regprint(&re, stdout); 86 | 87 | if (optind >= argc) { 88 | regfree(&re); 89 | exit(status); 90 | } 91 | 92 | if (eopts®_STARTEND) { 93 | subs[0].rm_so = startoff; 94 | subs[0].rm_eo = strlen(argv[optind]) - endoff; 95 | } 96 | err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); 97 | if (err) { 98 | len = regerror(err, &re, erbuf, sizeof(erbuf)); 99 | fprintf(stderr, "error %s, %d/%d `%s'\n", 100 | eprint(err), len, sizeof(erbuf), erbuf); 101 | exit(status); 102 | } 103 | if (!(copts®_NOSUB)) { 104 | len = (int)(subs[0].rm_eo - subs[0].rm_so); 105 | if (subs[0].rm_so != -1) { 106 | if (len != 0) 107 | printf("match `%.*s'\n", (int)len, 108 | argv[optind] + subs[0].rm_so); 109 | else 110 | printf("match `'@%.1s\n", 111 | argv[optind] + subs[0].rm_so); 112 | } 113 | for (i = 1; i < NS; i++) 114 | if (subs[i].rm_so != -1) 115 | printf("(%d) `%.*s'\n", i, 116 | (int)(subs[i].rm_eo - subs[i].rm_so), 117 | argv[optind] + subs[i].rm_so); 118 | } 119 | exit(status); 120 | } 121 | 122 | /* 123 | - regress - main loop of regression test 124 | == void regress(FILE *in); 125 | */ 126 | void 127 | regress(in) 128 | FILE *in; 129 | { 130 | char inbuf[1000]; 131 | # define MAXF 10 132 | char *f[MAXF]; 133 | int nf; 134 | int i; 135 | char erbuf[100]; 136 | size_t ne; 137 | char *badpat = "invalid regular expression"; 138 | # define SHORT 10 139 | char *bpname = "REG_BADPAT"; 140 | regex_t re; 141 | 142 | while (fgets(inbuf, sizeof(inbuf), in) != NULL) { 143 | line++; 144 | if (inbuf[0] == '#' || inbuf[0] == '\n') 145 | continue; /* NOTE CONTINUE */ 146 | inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */ 147 | if (debug) 148 | fprintf(stdout, "%d:\n", line); 149 | nf = split(inbuf, f, MAXF, "\t\t"); 150 | if (nf < 3) { 151 | fprintf(stderr, "bad input, line %d\n", line); 152 | exit(1); 153 | } 154 | for (i = 0; i < nf; i++) 155 | if (strcmp(f[i], "\"\"") == 0) 156 | f[i] = ""; 157 | if (nf <= 3) 158 | f[3] = NULL; 159 | if (nf <= 4) 160 | f[4] = NULL; 161 | try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); 162 | if (opt('&', f[1])) /* try with either type of RE */ 163 | try(f[0], f[1], f[2], f[3], f[4], 164 | options('c', f[1]) &~ REG_EXTENDED); 165 | } 166 | 167 | ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); 168 | if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { 169 | fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", 170 | erbuf, badpat); 171 | status = 1; 172 | } 173 | ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT); 174 | if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || 175 | ne != strlen(badpat)+1) { 176 | fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", 177 | erbuf, SHORT-1, badpat); 178 | status = 1; 179 | } 180 | ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); 181 | if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) { 182 | fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", 183 | erbuf, bpname); 184 | status = 1; 185 | } 186 | re.re_endp = bpname; 187 | ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf)); 188 | if (atoi(erbuf) != (int)REG_BADPAT) { 189 | fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", 190 | erbuf, (long)REG_BADPAT); 191 | status = 1; 192 | } else if (ne != strlen(erbuf)+1) { 193 | fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", 194 | erbuf, (long)REG_BADPAT); 195 | status = 1; 196 | } 197 | } 198 | 199 | /* 200 | - try - try it, and report on problems 201 | == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); 202 | */ 203 | void 204 | try(f0, f1, f2, f3, f4, opts) 205 | char *f0; 206 | char *f1; 207 | char *f2; 208 | char *f3; 209 | char *f4; 210 | int opts; /* may not match f1 */ 211 | { 212 | regex_t re; 213 | # define NSUBS 10 214 | regmatch_t subs[NSUBS]; 215 | # define NSHOULD 15 216 | char *should[NSHOULD]; 217 | int nshould; 218 | char erbuf[100]; 219 | int err; 220 | int len; 221 | char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; 222 | register int i; 223 | char *grump; 224 | char f0copy[1000]; 225 | char f2copy[1000]; 226 | 227 | strcpy(f0copy, f0); 228 | re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; 229 | fixstr(f0copy); 230 | err = regcomp(&re, f0copy, opts); 231 | if (err != 0 && (!opt('C', f1) || err != efind(f2))) { 232 | /* unexpected error or wrong error */ 233 | len = regerror(err, &re, erbuf, sizeof(erbuf)); 234 | fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", 235 | line, type, eprint(err), len, 236 | sizeof(erbuf), erbuf); 237 | status = 1; 238 | } else if (err == 0 && opt('C', f1)) { 239 | /* unexpected success */ 240 | fprintf(stderr, "%d: %s should have given REG_%s\n", 241 | line, type, f2); 242 | status = 1; 243 | err = 1; /* so we won't try regexec */ 244 | } 245 | 246 | if (err != 0) { 247 | regfree(&re); 248 | return; 249 | } 250 | 251 | strcpy(f2copy, f2); 252 | fixstr(f2copy); 253 | 254 | if (options('e', f1)®_STARTEND) { 255 | if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) 256 | fprintf(stderr, "%d: bad STARTEND syntax\n", line); 257 | subs[0].rm_so = strchr(f2, '(') - f2 + 1; 258 | subs[0].rm_eo = strchr(f2, ')') - f2; 259 | } 260 | err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); 261 | 262 | if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { 263 | /* unexpected error or wrong error */ 264 | len = regerror(err, &re, erbuf, sizeof(erbuf)); 265 | fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", 266 | line, type, eprint(err), len, 267 | sizeof(erbuf), erbuf); 268 | status = 1; 269 | } else if (err != 0) { 270 | /* nothing more to check */ 271 | } else if (f3 == NULL) { 272 | /* unexpected success */ 273 | fprintf(stderr, "%d: %s exec should have failed\n", 274 | line, type); 275 | status = 1; 276 | err = 1; /* just on principle */ 277 | } else if (opts®_NOSUB) { 278 | /* nothing more to check */ 279 | } else if ((grump = check(f2, subs[0], f3)) != NULL) { 280 | fprintf(stderr, "%d: %s %s\n", line, type, grump); 281 | status = 1; 282 | err = 1; 283 | } 284 | 285 | if (err != 0 || f4 == NULL) { 286 | regfree(&re); 287 | return; 288 | } 289 | 290 | for (i = 1; i < NSHOULD; i++) 291 | should[i] = NULL; 292 | nshould = split(f4, should+1, NSHOULD-1, ","); 293 | if (nshould == 0) { 294 | nshould = 1; 295 | should[1] = ""; 296 | } 297 | for (i = 1; i < NSUBS; i++) { 298 | grump = check(f2, subs[i], should[i]); 299 | if (grump != NULL) { 300 | fprintf(stderr, "%d: %s $%d %s\n", line, 301 | type, i, grump); 302 | status = 1; 303 | err = 1; 304 | } 305 | } 306 | 307 | regfree(&re); 308 | } 309 | 310 | /* 311 | - options - pick options out of a regression-test string 312 | == int options(int type, char *s); 313 | */ 314 | int 315 | options(type, s) 316 | int type; /* 'c' compile, 'e' exec */ 317 | char *s; 318 | { 319 | register char *p; 320 | register int o = (type == 'c') ? copts : eopts; 321 | register char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; 322 | 323 | for (p = s; *p != '\0'; p++) 324 | if (strchr(legal, *p) != NULL) 325 | switch (*p) { 326 | case 'b': 327 | o &= ~REG_EXTENDED; 328 | break; 329 | case 'i': 330 | o |= REG_ICASE; 331 | break; 332 | case 's': 333 | o |= REG_NOSUB; 334 | break; 335 | case 'n': 336 | o |= REG_NEWLINE; 337 | break; 338 | case 'm': 339 | o &= ~REG_EXTENDED; 340 | o |= REG_NOSPEC; 341 | break; 342 | case 'p': 343 | o |= REG_PEND; 344 | break; 345 | case '^': 346 | o |= REG_NOTBOL; 347 | break; 348 | case '$': 349 | o |= REG_NOTEOL; 350 | break; 351 | case '#': 352 | o |= REG_STARTEND; 353 | break; 354 | case 't': /* trace */ 355 | o |= REG_TRACE; 356 | break; 357 | case 'l': /* force long representation */ 358 | o |= REG_LARGE; 359 | break; 360 | case 'r': /* force backref use */ 361 | o |= REG_BACKR; 362 | break; 363 | } 364 | return(o); 365 | } 366 | 367 | /* 368 | - opt - is a particular option in a regression string? 369 | == int opt(int c, char *s); 370 | */ 371 | int /* predicate */ 372 | opt(c, s) 373 | int c; 374 | char *s; 375 | { 376 | return(strchr(s, c) != NULL); 377 | } 378 | 379 | /* 380 | - fixstr - transform magic characters in strings 381 | == void fixstr(register char *p); 382 | */ 383 | void 384 | fixstr(p) 385 | register char *p; 386 | { 387 | if (p == NULL) 388 | return; 389 | 390 | for (; *p != '\0'; p++) 391 | if (*p == 'N') 392 | *p = '\n'; 393 | else if (*p == 'T') 394 | *p = '\t'; 395 | else if (*p == 'S') 396 | *p = ' '; 397 | else if (*p == 'Z') 398 | *p = '\0'; 399 | } 400 | 401 | /* 402 | - check - check a substring match 403 | == char *check(char *str, regmatch_t sub, char *should); 404 | */ 405 | char * /* NULL or complaint */ 406 | check(str, sub, should) 407 | char *str; 408 | regmatch_t sub; 409 | char *should; 410 | { 411 | register int len; 412 | register int shlen; 413 | register char *p; 414 | static char grump[500]; 415 | register char *at = NULL; 416 | 417 | if (should != NULL && strcmp(should, "-") == 0) 418 | should = NULL; 419 | if (should != NULL && should[0] == '@') { 420 | at = should + 1; 421 | should = ""; 422 | } 423 | 424 | /* check rm_so and rm_eo for consistency */ 425 | if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || 426 | (sub.rm_so != -1 && sub.rm_eo == -1) || 427 | (sub.rm_so != -1 && sub.rm_so < 0) || 428 | (sub.rm_eo != -1 && sub.rm_eo < 0) ) { 429 | sprintf(grump, "start %ld end %ld", (long)sub.rm_so, 430 | (long)sub.rm_eo); 431 | return(grump); 432 | } 433 | 434 | /* check for no match */ 435 | if (sub.rm_so == -1 && should == NULL) 436 | return(NULL); 437 | if (sub.rm_so == -1) 438 | return("did not match"); 439 | 440 | /* check for in range */ 441 | if (sub.rm_eo > strlen(str)) { 442 | sprintf(grump, "start %ld end %ld, past end of string", 443 | (long)sub.rm_so, (long)sub.rm_eo); 444 | return(grump); 445 | } 446 | 447 | len = (int)(sub.rm_eo - sub.rm_so); 448 | shlen = (int)strlen(should); 449 | p = str + sub.rm_so; 450 | 451 | /* check for not supposed to match */ 452 | if (should == NULL) { 453 | sprintf(grump, "matched `%.*s'", len, p); 454 | return(grump); 455 | } 456 | 457 | /* check for wrong match */ 458 | if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { 459 | sprintf(grump, "matched `%.*s' instead", len, p); 460 | return(grump); 461 | } 462 | if (shlen > 0) 463 | return(NULL); 464 | 465 | /* check null match in right place */ 466 | if (at == NULL) 467 | return(NULL); 468 | shlen = strlen(at); 469 | if (shlen == 0) 470 | shlen = 1; /* force check for end-of-string */ 471 | if (strncmp(p, at, shlen) != 0) { 472 | sprintf(grump, "matched null at `%.20s'", p); 473 | return(grump); 474 | } 475 | return(NULL); 476 | } 477 | 478 | /* 479 | - eprint - convert error number to name 480 | == static char *eprint(int err); 481 | */ 482 | static char * 483 | eprint(err) 484 | int err; 485 | { 486 | static char epbuf[100]; 487 | size_t len; 488 | 489 | len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf)); 490 | assert(len <= sizeof(epbuf)); 491 | return(epbuf); 492 | } 493 | 494 | /* 495 | - efind - convert error name to number 496 | == static int efind(char *name); 497 | */ 498 | static int 499 | efind(name) 500 | char *name; 501 | { 502 | static char efbuf[100]; 503 | regex_t re; 504 | 505 | sprintf(efbuf, "REG_%s", name); 506 | assert(strlen(efbuf) < sizeof(efbuf)); 507 | re.re_endp = efbuf; 508 | (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf)); 509 | return(atoi(efbuf)); 510 | } 511 | -------------------------------------------------------------------------------- /src/re/main.ih: -------------------------------------------------------------------------------- 1 | /* ========= begin header generated by ./mkh ========= */ 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* === main.c === */ 7 | void regress(FILE *in); 8 | void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); 9 | int options(int type, char *s); 10 | int opt(int c, char *s); 11 | void fixstr(register char *p); 12 | char *check(char *str, regmatch_t sub, char *should); 13 | static char *eprint(int err); 14 | static int efind(char *name); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | /* ========= end header generated by ./mkh ========= */ 20 | -------------------------------------------------------------------------------- /src/re/mkh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # mkh - pull headers out of C source 3 | PATH=/bin:/usr/bin ; export PATH 4 | 5 | # egrep pattern to pick out marked lines 6 | egrep='^ =([ ]|$)' 7 | 8 | # Sed program to process marked lines into lines for the header file. 9 | # The markers have already been removed. Two things are done here: removal 10 | # of backslashed newlines, and some fudging of comments. The first is done 11 | # because -o needs to have prototypes on one line to strip them down. 12 | # Getting comments into the output is tricky; we turn C++-style // comments 13 | # into /* */ comments, after altering any existing */'s to avoid trouble. 14 | peel=' /\\$/N 15 | /\\\n[ ]*/s///g 16 | /\/\//s;\*/;* /;g 17 | /\/\//s;//\(.*\);/*\1 */;' 18 | 19 | for a 20 | do 21 | case "$a" in 22 | -o) # old (pre-function-prototype) compiler 23 | # add code to comment out argument lists 24 | peel="$peel 25 | "'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1(/*\2*/);' 26 | shift 27 | ;; 28 | -b) # funny Berkeley __P macro 29 | peel="$peel 30 | "'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1 __P((\2));' 31 | shift 32 | ;; 33 | -s) # compiler doesn't like `static foo();' 34 | # add code to get rid of the `static' 35 | peel="$peel 36 | "'/^static[ ][^\/]*[a-zA-Z0-9_)](.*)/s;static.;;' 37 | shift 38 | ;; 39 | -p) # private declarations 40 | egrep='^ ==([ ]|$)' 41 | shift 42 | ;; 43 | -i) # wrap in #ifndef, argument is name 44 | ifndef="$2" 45 | shift ; shift 46 | ;; 47 | *) break 48 | ;; 49 | esac 50 | done 51 | 52 | if test " $ifndef" != " " 53 | then 54 | echo "#ifndef $ifndef" 55 | echo "#define $ifndef /* never again */" 56 | fi 57 | echo "/* ========= begin header generated by $0 ========= */" 58 | echo '#ifdef __cplusplus' 59 | echo 'extern "C" {' 60 | echo '#endif' 61 | for f 62 | do 63 | echo 64 | echo "/* === $f === */" 65 | egrep "$egrep" $f | sed 's/^ ==*[ ]//;s/^ ==*$//' | sed "$peel" 66 | echo 67 | done 68 | echo '#ifdef __cplusplus' 69 | echo '}' 70 | echo '#endif' 71 | echo "/* ========= end header generated by $0 ========= */" 72 | if test " $ifndef" != " " 73 | then 74 | echo "#endif" 75 | fi 76 | exit 0 77 | -------------------------------------------------------------------------------- /src/re/re.c: -------------------------------------------------------------------------------- 1 | /* re.c */ 2 | /* Henry Spencer's implementation of Regular Expressions, 3 | used for TinyScheme */ 4 | /* Refurbished by Stephen Gildea */ 5 | /* Updated by armornick */ 6 | 7 | /* this defintion is to export the init function */ 8 | #define SCHEME_MODULE 9 | #include "regex.h" 10 | #include 11 | 12 | pointer foreign_re_match(scheme *sc, pointer args) { 13 | pointer retval=scheme_false(sc); 14 | int retcode; 15 | regex_t rt; 16 | pointer first_arg, second_arg; 17 | pointer third_arg=scheme_nil(sc); 18 | char *string; 19 | char *pattern; 20 | int num=0; 21 | 22 | if(!((args != scheme_nil(sc)) && scheme_is_string((first_arg = scheme_pair_car(args))) 23 | && (args=scheme_pair_cdr(args)) 24 | && scheme_is_pair(args) && scheme_is_string((second_arg = scheme_pair_car(args))))) { 25 | return scheme_false(sc); 26 | } 27 | pattern = scheme_string_value(first_arg); 28 | string = scheme_string_value(second_arg); 29 | 30 | args=scheme_pair_cdr(args); 31 | if(args!=scheme_nil(sc)) { 32 | if(!(scheme_is_pair(args) && scheme_is_vector((third_arg = scheme_pair_car(args))))) { 33 | return scheme_false(sc); 34 | } else { 35 | num = scheme_ivalue(third_arg); 36 | } 37 | } 38 | 39 | 40 | if(regcomp(&rt,pattern,REG_EXTENDED)!=0) { 41 | return scheme_false(sc); 42 | } 43 | 44 | if(num==0) { 45 | retcode=regexec(&rt,string,0,0,0); 46 | } else { 47 | regmatch_t *pmatch=malloc((num+1)*sizeof(regmatch_t)); 48 | if(pmatch!=0) { 49 | retcode=regexec(&rt,string,num+1,pmatch,0); 50 | if(retcode==0) { 51 | int i; 52 | for(i=0; i 3 | 4 | SCHEME_H_DIR=../tinyscheme-1.30 5 | 6 | # Windows/2000 7 | # CC = cl -nologo 8 | # DEBUG = -W3 -Z7 -MD 9 | # SYS_LIBS= 10 | # Osuf=obj 11 | # SOsuf=dll 12 | # LD = link -nologo 13 | # LDFLAGS = -debug -map -dll -incremental:no 14 | # OUT = -out:$@ 15 | 16 | # Unix, generally 17 | CC = gcc -fpic 18 | DEBUG=-g -Wall -Wno-char-subscripts -O 19 | Osuf=o 20 | SOsuf=so 21 | OUT = -o $@ 22 | 23 | # Linux 24 | LD = gcc 25 | LDFLAGS = -shared 26 | SYS_LIBS= 27 | 28 | # Solaris 29 | # LD = /usr/ccs/bin/ld 30 | ## -Bsymbolic nec. because we have fns w same name as in libc. 31 | # LDFLAGS = -G -Bsymbolic -z text 32 | # SYS_LIBS= -lc 33 | 34 | SRCS = re.c debug.c regcomp.c regerror.c regexec.c regfree.c split.c 35 | OBJS = $(SRCS:.c=.$(Osuf)) 36 | 37 | all: re.$(SOsuf) 38 | 39 | %.$(Osuf): %.c 40 | $(CC) -c $(DEBUG) -DUSE_DL -I. -I$(SCHEME_H_DIR) $+ 41 | 42 | re.$(SOsuf): $(OBJS) 43 | $(LD) $(LDFLAGS) $(OUT) $+ $(SYS_LIBS) 44 | 45 | clean: 46 | -rm -f $(OBJS) re.$(SOsuf) 47 | -------------------------------------------------------------------------------- /src/re/re.scm: -------------------------------------------------------------------------------- 1 | ;; return the substring of STRING matched in MATCH-VECTOR, 2 | ;; the Nth subexpression match (default 0). 3 | (define (re-match-nth string match-vector . n) 4 | (let ((n (if (pair? n) (car n) 0))) 5 | (substring string (car (vector-ref match-vector n)) 6 | (cdr (vector-ref match-vector n))))) 7 | 8 | (define (re-before-nth string match-vector . n) 9 | (let ((n (if (pair? n) (car n) 0))) 10 | (substring string 0 (car (vector-ref match-vector n))))) 11 | 12 | (define (re-after-nth string match-vector . n) 13 | (let ((n (if (pair? n) (car n) 0))) 14 | (substring string (cdr (vector-ref match-vector n)) 15 | (string-length string)))) -------------------------------------------------------------------------------- /src/re/regcomp.ih: -------------------------------------------------------------------------------- 1 | /* ========= begin header generated by ./mkh ========= */ 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* === regcomp.c === */ 7 | static void p_ere(register struct parse *p, int stop); 8 | static void p_ere_exp(register struct parse *p); 9 | static void p_str(register struct parse *p); 10 | static void p_bre(register struct parse *p, register int end1, register int end2); 11 | static int p_simp_re(register struct parse *p, int starordinary); 12 | static int p_count(register struct parse *p); 13 | static void p_bracket(register struct parse *p); 14 | static void p_b_term(register struct parse *p, register cset *cs); 15 | static void p_b_cclass(register struct parse *p, register cset *cs); 16 | static void p_b_eclass(register struct parse *p, register cset *cs); 17 | static char p_b_symbol(register struct parse *p); 18 | static char p_b_coll_elem(register struct parse *p, int endc); 19 | static char othercase(int ch); 20 | static void bothcases(register struct parse *p, int ch); 21 | static void ordinary(register struct parse *p, register int ch); 22 | static void nonnewline(register struct parse *p); 23 | static void repeat(register struct parse *p, sopno start, int from, int to); 24 | static int seterr(register struct parse *p, int e); 25 | static cset *allocset(register struct parse *p); 26 | static void freeset(register struct parse *p, register cset *cs); 27 | static int freezeset(register struct parse *p, register cset *cs); 28 | static int firstch(register struct parse *p, register cset *cs); 29 | static int nch(register struct parse *p, register cset *cs); 30 | static void mcadd(register struct parse *p, register cset *cs, register char *cp); 31 | static void mcinvert(register struct parse *p, register cset *cs); 32 | static void mccase(register struct parse *p, register cset *cs); 33 | static int isinsets(register struct re_guts *g, int c); 34 | static int samesets(register struct re_guts *g, int c1, int c2); 35 | static void categorize(struct parse *p, register struct re_guts *g); 36 | static sopno dupl(register struct parse *p, sopno start, sopno finish); 37 | static void doemit(register struct parse *p, sop op, size_t opnd); 38 | static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); 39 | static void dofwd(register struct parse *p, sopno pos, sop value); 40 | static void enlarge(register struct parse *p, sopno size); 41 | static void stripsnug(register struct parse *p, register struct re_guts *g); 42 | static void findmust(register struct parse *p, register struct re_guts *g); 43 | static sopno pluscount(register struct parse *p, register struct re_guts *g); 44 | 45 | #ifdef __cplusplus 46 | } 47 | #endif 48 | /* ========= end header generated by ./mkh ========= */ 49 | -------------------------------------------------------------------------------- /src/re/regerror.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "utils.h" 4 | #include "regerror.ih" 5 | 6 | /* 7 | = #define REG_NOMATCH 1 8 | = #define REG_BADPAT 2 9 | = #define REG_ECOLLATE 3 10 | = #define REG_ECTYPE 4 11 | = #define REG_EESCAPE 5 12 | = #define REG_ESUBREG 6 13 | = #define REG_EBRACK 7 14 | = #define REG_EPAREN 8 15 | = #define REG_EBRACE 9 16 | = #define REG_BADBR 10 17 | = #define REG_ERANGE 11 18 | = #define REG_ESPACE 12 19 | = #define REG_BADRPT 13 20 | = #define REG_EMPTY 14 21 | = #define REG_ASSERT 15 22 | = #define REG_INVARG 16 23 | = #define REG_ATOI 255 // convert name to number (!) 24 | = #define REG_ITOA 0400 // convert number to name (!) 25 | */ 26 | static struct rerr { 27 | int code; 28 | char *name; 29 | char *explain; 30 | } rerrs[] = { 31 | { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" }, 32 | { REG_BADPAT, "REG_BADPAT", "invalid regular expression" }, 33 | { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, 34 | { REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, 35 | { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" }, 36 | { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, 37 | { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" }, 38 | { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" }, 39 | { REG_EBRACE, "REG_EBRACE", "braces not balanced" }, 40 | { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, 41 | { REG_ERANGE, "REG_ERANGE", "invalid character range" }, 42 | { REG_ESPACE, "REG_ESPACE", "out of memory" }, 43 | { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" }, 44 | { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" }, 45 | { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, 46 | { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" }, 47 | { 0, "", "*** unknown regexp error code ***" } 48 | }; 49 | 50 | /* 51 | - regerror - the interface to error numbers 52 | = API_EXPORT(size_t) regerror(int, const regex_t *, char *, size_t); 53 | */ 54 | /* ARGSUSED */ 55 | API_EXPORT(size_t) 56 | regerror(errcode, preg, errbuf, errbuf_size) 57 | int errcode; 58 | const regex_t *preg; 59 | char *errbuf; 60 | size_t errbuf_size; 61 | { 62 | register struct rerr *r; 63 | register size_t len; 64 | register int target = errcode &~ REG_ITOA; 65 | register char *s; 66 | char convbuf[50]; 67 | 68 | if (errcode == REG_ATOI) 69 | s = regatoi(preg, convbuf); 70 | else { 71 | for (r = rerrs; r->code != 0; r++) 72 | if (r->code == target) 73 | break; 74 | 75 | if (errcode®_ITOA) { 76 | if (r->code != 0) 77 | (void) strcpy(convbuf, r->name); 78 | else 79 | sprintf(convbuf, "REG_0x%x", target); 80 | assert(strlen(convbuf) < sizeof(convbuf)); 81 | s = convbuf; 82 | } else 83 | s = r->explain; 84 | } 85 | 86 | len = strlen(s) + 1; 87 | if (errbuf_size > 0) { 88 | if (errbuf_size > len) 89 | (void) strcpy(errbuf, s); 90 | else { 91 | (void) strncpy(errbuf, s, errbuf_size-1); 92 | errbuf[errbuf_size-1] = '\0'; 93 | } 94 | } 95 | 96 | return(len); 97 | } 98 | 99 | /* 100 | - regatoi - internal routine to implement REG_ATOI 101 | == static char *regatoi(const regex_t *preg, char *localbuf); 102 | */ 103 | static char * 104 | regatoi(preg, localbuf) 105 | const regex_t *preg; 106 | char *localbuf; 107 | { 108 | register struct rerr *r; 109 | 110 | for (r = rerrs; r->code != 0; r++) 111 | if (strcmp(r->name, preg->re_endp) == 0) 112 | break; 113 | if (r->code == 0) 114 | return("0"); 115 | 116 | sprintf(localbuf, "%d", r->code); 117 | return(localbuf); 118 | } 119 | -------------------------------------------------------------------------------- /src/re/regerror.ih: -------------------------------------------------------------------------------- 1 | /* ========= begin header generated by ./mkh ========= */ 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* === regerror.c === */ 7 | static char *regatoi(const regex_t *preg, char *localbuf); 8 | 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | /* ========= end header generated by ./mkh ========= */ 13 | -------------------------------------------------------------------------------- /src/re/regex.001: -------------------------------------------------------------------------------- 1 | # Microsoft Developer Studio Project File - Name="regex" - Package Owner=<4> 2 | # Microsoft Developer Studio Generated Build File, Format Version 5.00 3 | # ** DO NOT EDIT ** 4 | 5 | # TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102 6 | 7 | CFG=regex - Win32 Debug 8 | !MESSAGE This is not a valid makefile. To build this project using NMAKE, 9 | !MESSAGE use the Export Makefile command and run 10 | !MESSAGE 11 | !MESSAGE NMAKE /f "regex.mak". 12 | !MESSAGE 13 | !MESSAGE You can specify a configuration when running NMAKE 14 | !MESSAGE by defining the macro CFG on the command line. For example: 15 | !MESSAGE 16 | !MESSAGE NMAKE /f "regex.mak" CFG="regex - Win32 Debug" 17 | !MESSAGE 18 | !MESSAGE Possible choices for configuration are: 19 | !MESSAGE 20 | !MESSAGE "regex - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library") 21 | !MESSAGE "regex - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library") 22 | !MESSAGE 23 | 24 | # Begin Project 25 | # PROP Scc_ProjName "" 26 | # PROP Scc_LocalPath "" 27 | CPP=cl.exe 28 | MTL=midl.exe 29 | RSC=rc.exe 30 | 31 | !IF "$(CFG)" == "regex - Win32 Release" 32 | 33 | # PROP BASE Use_MFC 0 34 | # PROP BASE Use_Debug_Libraries 0 35 | # PROP BASE Output_Dir "Release" 36 | # PROP BASE Intermediate_Dir "Release" 37 | # PROP BASE Target_Dir "" 38 | # PROP Use_MFC 0 39 | # PROP Use_Debug_Libraries 0 40 | # PROP Output_Dir "Release" 41 | # PROP Intermediate_Dir "Release" 42 | # PROP Ignore_Export_Lib 0 43 | # PROP Target_Dir "" 44 | # ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c 45 | # ADD CPP /nologo /MT /W3 /GX /O2 /I "." /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /FD /c 46 | # ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32 47 | # ADD MTL /nologo /D "NDEBUG" /mktyplib203 /o NUL /win32 48 | # ADD BASE RSC /l 0x409 /d "NDEBUG" 49 | # ADD RSC /l 0x409 /d "NDEBUG" 50 | BSC32=bscmake.exe 51 | # ADD BASE BSC32 /nologo 52 | # ADD BSC32 /nologo 53 | LINK32=link.exe 54 | # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386 55 | # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386 56 | 57 | !ELSEIF "$(CFG)" == "regex - Win32 Debug" 58 | 59 | # PROP BASE Use_MFC 0 60 | # PROP BASE Use_Debug_Libraries 1 61 | # PROP BASE Output_Dir "Debug" 62 | # PROP BASE Intermediate_Dir "Debug" 63 | # PROP BASE Target_Dir "" 64 | # PROP Use_MFC 0 65 | # PROP Use_Debug_Libraries 1 66 | # PROP Output_Dir "Debug" 67 | # PROP Intermediate_Dir "Debug" 68 | # PROP Ignore_Export_Lib 0 69 | # PROP Target_Dir "" 70 | # ADD BASE CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c 71 | # ADD CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /I "." /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /FD /c 72 | # ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32 73 | # ADD MTL /nologo /D "_DEBUG" /mktyplib203 /o NUL /win32 74 | # ADD BASE RSC /l 0x409 /d "_DEBUG" 75 | # ADD RSC /l 0x409 /d "_DEBUG" 76 | BSC32=bscmake.exe 77 | # ADD BASE BSC32 /nologo 78 | # ADD BSC32 /nologo 79 | LINK32=link.exe 80 | # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /pdbtype:sept 81 | # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /pdbtype:sept 82 | 83 | !ENDIF 84 | 85 | # Begin Target 86 | 87 | # Name "regex - Win32 Release" 88 | # Name "regex - Win32 Debug" 89 | # Begin Source File 90 | 91 | SOURCE=.\regcomp.c 92 | # End Source File 93 | # Begin Source File 94 | 95 | SOURCE=.\regerror.c 96 | # End Source File 97 | # Begin Source File 98 | 99 | SOURCE=.\regexec.c 100 | # End Source File 101 | # Begin Source File 102 | 103 | SOURCE=.\regfree.c 104 | # End Source File 105 | # End Target 106 | # End Project 107 | -------------------------------------------------------------------------------- /src/re/regex.7: -------------------------------------------------------------------------------- 1 | .TH REGEX 7 "7 Feb 1994" 2 | .BY "Henry Spencer" 3 | .SH NAME 4 | regex \- POSIX 1003.2 regular expressions 5 | .SH DESCRIPTION 6 | Regular expressions (``RE''s), 7 | as defined in POSIX 1003.2, come in two forms: 8 | modern REs (roughly those of 9 | .IR egrep ; 10 | 1003.2 calls these ``extended'' REs) 11 | and obsolete REs (roughly those of 12 | .IR ed ; 13 | 1003.2 ``basic'' REs). 14 | Obsolete REs mostly exist for backward compatibility in some old programs; 15 | they will be discussed at the end. 16 | 1003.2 leaves some aspects of RE syntax and semantics open; 17 | `\(dg' marks decisions on these aspects that 18 | may not be fully portable to other 1003.2 implementations. 19 | .PP 20 | A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR, 21 | separated by `|'. 22 | It matches anything that matches one of the branches. 23 | .PP 24 | A branch is one\(dg or more \fIpieces\fR, concatenated. 25 | It matches a match for the first, followed by a match for the second, etc. 26 | .PP 27 | A piece is an \fIatom\fR possibly followed 28 | by a single\(dg `*', `+', `?', or \fIbound\fR. 29 | An atom followed by `*' matches a sequence of 0 or more matches of the atom. 30 | An atom followed by `+' matches a sequence of 1 or more matches of the atom. 31 | An atom followed by `?' matches a sequence of 0 or 1 matches of the atom. 32 | .PP 33 | A \fIbound\fR is `{' followed by an unsigned decimal integer, 34 | possibly followed by `,' 35 | possibly followed by another unsigned decimal integer, 36 | always followed by `}'. 37 | The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive, 38 | and if there are two of them, the first may not exceed the second. 39 | An atom followed by a bound containing one integer \fIi\fR 40 | and no comma matches 41 | a sequence of exactly \fIi\fR matches of the atom. 42 | An atom followed by a bound 43 | containing one integer \fIi\fR and a comma matches 44 | a sequence of \fIi\fR or more matches of the atom. 45 | An atom followed by a bound 46 | containing two integers \fIi\fR and \fIj\fR matches 47 | a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom. 48 | .PP 49 | An atom is a regular expression enclosed in `()' (matching a match for the 50 | regular expression), 51 | an empty set of `()' (matching the null string)\(dg, 52 | a \fIbracket expression\fR (see below), `.' 53 | (matching any single character), `^' (matching the null string at the 54 | beginning of a line), `$' (matching the null string at the 55 | end of a line), a `\e' followed by one of the characters 56 | `^.[$()|*+?{\e' 57 | (matching that character taken as an ordinary character), 58 | a `\e' followed by any other character\(dg 59 | (matching that character taken as an ordinary character, 60 | as if the `\e' had not been present\(dg), 61 | or a single character with no other significance (matching that character). 62 | A `{' followed by a character other than a digit is an ordinary 63 | character, not the beginning of a bound\(dg. 64 | It is illegal to end an RE with `\e'. 65 | .PP 66 | A \fIbracket expression\fR is a list of characters enclosed in `[]'. 67 | It normally matches any single character from the list (but see below). 68 | If the list begins with `^', 69 | it matches any single character 70 | (but see below) \fInot\fR from the rest of the list. 71 | If two characters in the list are separated by `\-', this is shorthand 72 | for the full \fIrange\fR of characters between those two (inclusive) in the 73 | collating sequence, 74 | e.g. `[0-9]' in ASCII matches any decimal digit. 75 | It is illegal\(dg for two ranges to share an 76 | endpoint, e.g. `a-c-e'. 77 | Ranges are very collating-sequence-dependent, 78 | and portable programs should avoid relying on them. 79 | .PP 80 | To include a literal `]' in the list, make it the first character 81 | (following a possible `^'). 82 | To include a literal `\-', make it the first or last character, 83 | or the second endpoint of a range. 84 | To use a literal `\-' as the first endpoint of a range, 85 | enclose it in `[.' and `.]' to make it a collating element (see below). 86 | With the exception of these and some combinations using `[' (see next 87 | paragraphs), all other special characters, including `\e', lose their 88 | special significance within a bracket expression. 89 | .PP 90 | Within a bracket expression, a collating element (a character, 91 | a multi-character sequence that collates as if it were a single character, 92 | or a collating-sequence name for either) 93 | enclosed in `[.' and `.]' stands for the 94 | sequence of characters of that collating element. 95 | The sequence is a single element of the bracket expression's list. 96 | A bracket expression containing a multi-character collating element 97 | can thus match more than one character, 98 | e.g. if the collating sequence includes a `ch' collating element, 99 | then the RE `[[.ch.]]*c' matches the first five characters 100 | of `chchcc'. 101 | .PP 102 | Within a bracket expression, a collating element enclosed in `[=' and 103 | `=]' is an equivalence class, standing for the sequences of characters 104 | of all collating elements equivalent to that one, including itself. 105 | (If there are no other equivalent collating elements, 106 | the treatment is as if the enclosing delimiters were `[.' and `.]'.) 107 | For example, if o and \o'o^' are the members of an equivalence class, 108 | then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous. 109 | An equivalence class may not\(dg be an endpoint 110 | of a range. 111 | .PP 112 | Within a bracket expression, the name of a \fIcharacter class\fR enclosed 113 | in `[:' and `:]' stands for the list of all characters belonging to that 114 | class. 115 | Standard character class names are: 116 | .PP 117 | .RS 118 | .nf 119 | .ta 3c 6c 9c 120 | alnum digit punct 121 | alpha graph space 122 | blank lower upper 123 | cntrl print xdigit 124 | .fi 125 | .RE 126 | .PP 127 | These stand for the character classes defined in 128 | .IR ctype (3). 129 | A locale may provide others. 130 | A character class may not be used as an endpoint of a range. 131 | .PP 132 | There are two special cases\(dg of bracket expressions: 133 | the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at 134 | the beginning and end of a word respectively. 135 | A word is defined as a sequence of 136 | word characters 137 | which is neither preceded nor followed by 138 | word characters. 139 | A word character is an 140 | .I alnum 141 | character (as defined by 142 | .IR ctype (3)) 143 | or an underscore. 144 | This is an extension, 145 | compatible with but not specified by POSIX 1003.2, 146 | and should be used with 147 | caution in software intended to be portable to other systems. 148 | .PP 149 | In the event that an RE could match more than one substring of a given 150 | string, 151 | the RE matches the one starting earliest in the string. 152 | If the RE could match more than one substring starting at that point, 153 | it matches the longest. 154 | Subexpressions also match the longest possible substrings, subject to 155 | the constraint that the whole match be as long as possible, 156 | with subexpressions starting earlier in the RE taking priority over 157 | ones starting later. 158 | Note that higher-level subexpressions thus take priority over 159 | their lower-level component subexpressions. 160 | .PP 161 | Match lengths are measured in characters, not collating elements. 162 | A null string is considered longer than no match at all. 163 | For example, 164 | `bb*' matches the three middle characters of `abbbc', 165 | `(wee|week)(knights|nights)' matches all ten characters of `weeknights', 166 | when `(.*).*' is matched against `abc' the parenthesized subexpression 167 | matches all three characters, and 168 | when `(a*)*' is matched against `bc' both the whole RE and the parenthesized 169 | subexpression match the null string. 170 | .PP 171 | If case-independent matching is specified, 172 | the effect is much as if all case distinctions had vanished from the 173 | alphabet. 174 | When an alphabetic that exists in multiple cases appears as an 175 | ordinary character outside a bracket expression, it is effectively 176 | transformed into a bracket expression containing both cases, 177 | e.g. `x' becomes `[xX]'. 178 | When it appears inside a bracket expression, all case counterparts 179 | of it are added to the bracket expression, so that (e.g.) `[x]' 180 | becomes `[xX]' and `[^x]' becomes `[^xX]'. 181 | .PP 182 | No particular limit is imposed on the length of REs\(dg. 183 | Programs intended to be portable should not employ REs longer 184 | than 256 bytes, 185 | as an implementation can refuse to accept such REs and remain 186 | POSIX-compliant. 187 | .PP 188 | Obsolete (``basic'') regular expressions differ in several respects. 189 | `|', `+', and `?' are ordinary characters and there is no equivalent 190 | for their functionality. 191 | The delimiters for bounds are `\e{' and `\e}', 192 | with `{' and `}' by themselves ordinary characters. 193 | The parentheses for nested subexpressions are `\e(' and `\e)', 194 | with `(' and `)' by themselves ordinary characters. 195 | `^' is an ordinary character except at the beginning of the 196 | RE or\(dg the beginning of a parenthesized subexpression, 197 | `$' is an ordinary character except at the end of the 198 | RE or\(dg the end of a parenthesized subexpression, 199 | and `*' is an ordinary character if it appears at the beginning of the 200 | RE or the beginning of a parenthesized subexpression 201 | (after a possible leading `^'). 202 | Finally, there is one new type of atom, a \fIback reference\fR: 203 | `\e' followed by a non-zero decimal digit \fId\fR 204 | matches the same sequence of characters 205 | matched by the \fId\fRth parenthesized subexpression 206 | (numbering subexpressions by the positions of their opening parentheses, 207 | left to right), 208 | so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'. 209 | .SH SEE ALSO 210 | regex(3) 211 | .PP 212 | POSIX 1003.2, section 2.8 (Regular Expression Notation). 213 | .SH BUGS 214 | Having two kinds of REs is a botch. 215 | .PP 216 | The current 1003.2 spec says that `)' is an ordinary character in 217 | the absence of an unmatched `('; 218 | this was an unintentional result of a wording error, 219 | and change is likely. 220 | Avoid relying on it. 221 | .PP 222 | Back references are a dreadful botch, 223 | posing major problems for efficient implementations. 224 | They are also somewhat vaguely defined 225 | (does 226 | `a\e(\e(b\e)*\e2\e)*d' match `abbbd'?). 227 | Avoid using them. 228 | .PP 229 | 1003.2's specification of case-independent matching is vague. 230 | The ``one case implies all cases'' definition given above 231 | is current consensus among implementors as to the right interpretation. 232 | .PP 233 | The syntax for word boundaries is incredibly ugly. 234 | -------------------------------------------------------------------------------- /src/re/regex.h: -------------------------------------------------------------------------------- 1 | #ifndef _REGEX_H_ 2 | #define _REGEX_H_ /* never again */ 3 | 4 | /* Added by dsouflis - include files should be self contained */ 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | /* ========= begin header generated by ././mkh ========= */ 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | /* === regex2.h === */ 18 | #ifdef WIN32 19 | #define API_EXPORT(type) __declspec(dllexport) type __stdcall 20 | #else 21 | #define API_EXPORT(type) type 22 | #endif 23 | 24 | typedef off_t regoff_t; 25 | typedef struct { 26 | int re_magic; 27 | size_t re_nsub; /* number of parenthesized subexpressions */ 28 | const char *re_endp; /* end pointer for REG_PEND */ 29 | struct re_guts *re_g; /* none of your business :-) */ 30 | } regex_t; 31 | typedef struct { 32 | regoff_t rm_so; /* start of match */ 33 | regoff_t rm_eo; /* end of match */ 34 | } regmatch_t; 35 | 36 | 37 | /* === regcomp.c === */ 38 | API_EXPORT(int) regcomp(regex_t *, const char *, int); 39 | #define REG_BASIC 0000 40 | #define REG_EXTENDED 0001 41 | #define REG_ICASE 0002 42 | #define REG_NOSUB 0004 43 | #define REG_NEWLINE 0010 44 | #define REG_NOSPEC 0020 45 | #define REG_PEND 0040 46 | #define REG_DUMP 0200 47 | 48 | 49 | /* === regerror.c === */ 50 | #define REG_NOMATCH 1 51 | #define REG_BADPAT 2 52 | #define REG_ECOLLATE 3 53 | #define REG_ECTYPE 4 54 | #define REG_EESCAPE 5 55 | #define REG_ESUBREG 6 56 | #define REG_EBRACK 7 57 | #define REG_EPAREN 8 58 | #define REG_EBRACE 9 59 | #define REG_BADBR 10 60 | #define REG_ERANGE 11 61 | #define REG_ESPACE 12 62 | #define REG_BADRPT 13 63 | #define REG_EMPTY 14 64 | #define REG_ASSERT 15 65 | #define REG_INVARG 16 66 | #define REG_ATOI 255 /* convert name to number (!) */ 67 | #define REG_ITOA 0400 /* convert number to name (!) */ 68 | API_EXPORT(size_t) regerror(int, const regex_t *, char *, size_t); 69 | 70 | 71 | /* === regexec.c === */ 72 | API_EXPORT(int) regexec(const regex_t *, const char *, size_t, regmatch_t [], int); 73 | #define REG_NOTBOL 00001 74 | #define REG_NOTEOL 00002 75 | #define REG_STARTEND 00004 76 | #define REG_TRACE 00400 /* tracing of execution */ 77 | #define REG_LARGE 01000 /* force large representation */ 78 | #define REG_BACKR 02000 /* force use of backref code */ 79 | 80 | 81 | /* === regfree.c === */ 82 | API_EXPORT(void) regfree(regex_t *); 83 | 84 | #ifdef __cplusplus 85 | } 86 | #endif 87 | /* ========= end header generated by ././mkh ========= */ 88 | #endif 89 | -------------------------------------------------------------------------------- /src/re/regex2.h: -------------------------------------------------------------------------------- 1 | /* 2 | * First, the stuff that ends up in the outside-world include file 3 | = #ifdef WIN32 4 | = #define API_EXPORT(type) __declspec(dllexport) type __stdcall 5 | = #else 6 | = #define API_EXPORT(type) type 7 | = #endif 8 | = 9 | = typedef off_t regoff_t; 10 | = typedef struct { 11 | = int re_magic; 12 | = size_t re_nsub; // number of parenthesized subexpressions 13 | = const char *re_endp; // end pointer for REG_PEND 14 | = struct re_guts *re_g; // none of your business :-) 15 | = } regex_t; 16 | = typedef struct { 17 | = regoff_t rm_so; // start of match 18 | = regoff_t rm_eo; // end of match 19 | = } regmatch_t; 20 | */ 21 | /* 22 | * internals of regex_t 23 | */ 24 | #define MAGIC1 ((('r'^0200)<<8) | 'e') 25 | 26 | /* 27 | * The internal representation is a *strip*, a sequence of 28 | * operators ending with an endmarker. (Some terminology etc. is a 29 | * historical relic of earlier versions which used multiple strips.) 30 | * Certain oddities in the representation are there to permit running 31 | * the machinery backwards; in particular, any deviation from sequential 32 | * flow must be marked at both its source and its destination. Some 33 | * fine points: 34 | * 35 | * - OPLUS_ and O_PLUS are *inside* the loop they create. 36 | * - OQUEST_ and O_QUEST are *outside* the bypass they create. 37 | * - OCH_ and O_CH are *outside* the multi-way branch they create, while 38 | * OOR1 and OOR2 are respectively the end and the beginning of one of 39 | * the branches. Note that there is an implicit OOR2 following OCH_ 40 | * and an implicit OOR1 preceding O_CH. 41 | * 42 | * In state representations, an operator's bit is on to signify a state 43 | * immediately *preceding* "execution" of that operator. 44 | */ 45 | typedef unsigned long sop; /* strip operator */ 46 | typedef long sopno; 47 | #define OPRMASK 0xf8000000 48 | #define OPDMASK 0x07ffffff 49 | #define OPSHIFT ((unsigned)27) 50 | #define OP(n) ((n)&OPRMASK) 51 | #define OPND(n) ((n)&OPDMASK) 52 | #define SOP(op, opnd) ((op)|(opnd)) 53 | /* operators meaning operand */ 54 | /* (back, fwd are offsets) */ 55 | #define OEND (1< uch [csetsize] */ 90 | uch mask; /* bit within array */ 91 | uch hash; /* hash code */ 92 | size_t smultis; 93 | char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ 94 | } cset; 95 | /* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ 96 | #define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) 97 | #define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) 98 | #define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) 99 | #define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */ 100 | 101 | /* stuff for character categories */ 102 | typedef unsigned char cat_t; 103 | 104 | /* 105 | * main compiled-expression structure 106 | */ 107 | struct re_guts { 108 | int magic; 109 | # define MAGIC2 ((('R'^0200)<<8)|'E') 110 | sop *strip; /* malloced area for strip */ 111 | int csetsize; /* number of bits in a cset vector */ 112 | int ncsets; /* number of csets in use */ 113 | cset *sets; /* -> cset [ncsets] */ 114 | uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ 115 | int cflags; /* copy of regcomp() cflags argument */ 116 | sopno nstates; /* = number of sops */ 117 | sopno firststate; /* the initial OEND (normally 0) */ 118 | sopno laststate; /* the final OEND */ 119 | int iflags; /* internal flags */ 120 | # define USEBOL 01 /* used ^ */ 121 | # define USEEOL 02 /* used $ */ 122 | # define BAD 04 /* something wrong */ 123 | int nbol; /* number of ^ used */ 124 | int neol; /* number of $ used */ 125 | int ncategories; /* how many character categories */ 126 | cat_t *categories; /* ->catspace[-CHAR_MIN] */ 127 | char *must; /* match must contain this string */ 128 | int mlen; /* length of must */ 129 | size_t nsub; /* copy of re_nsub */ 130 | int backrefs; /* does it use back references? */ 131 | sopno nplus; /* how deep does it nest +s? */ 132 | /* catspace must be last */ 133 | cat_t catspace[1]; /* actually [NC] */ 134 | }; 135 | 136 | /* misc utilities */ 137 | #define OUT (CHAR_MAX+1) /* a non-character value */ 138 | #define ISWORD(c) (isalnum(c) || (c) == '_') 139 | -------------------------------------------------------------------------------- /src/re/regexec.c: -------------------------------------------------------------------------------- 1 | /* 2 | * the outer shell of regexec() 3 | * 4 | * This file includes engine.c *twice*, after muchos fiddling with the 5 | * macros that code uses. This lets the same code operate on two different 6 | * representations for state sets. 7 | */ 8 | #include 9 | 10 | #include "utils.h" 11 | #include "regex2.h" 12 | 13 | #ifndef NDEBUG 14 | static int nope = 0; /* for use in asserts; shuts lint up */ 15 | #endif 16 | 17 | /* macros for manipulating states, small version */ 18 | #define states long 19 | #define states1 states /* for later use in regexec() decision */ 20 | #define CLEAR(v) ((v) = 0) 21 | #define SET0(v, n) ((v) &= ~(1 << (n))) 22 | #define SET1(v, n) ((v) |= 1 << (n)) 23 | #define ISSET(v, n) ((v) & (1 << (n))) 24 | #define ASSIGN(d, s) ((d) = (s)) 25 | #define EQ(a, b) ((a) == (b)) 26 | #define STATEVARS int dummy /* dummy version */ 27 | #define STATESETUP(m, n) /* nothing */ 28 | #define STATETEARDOWN(m) /* nothing */ 29 | #define SETUP(v) ((v) = 0) 30 | #define onestate int 31 | #define INIT(o, n) ((o) = (unsigned)1 << (n)) 32 | #define INC(o) ((o) <<= 1) 33 | #define ISSTATEIN(v, o) ((v) & (o)) 34 | /* some abbreviations; note that some of these know variable names! */ 35 | /* do "if I'm here, I can also be there" etc without branches */ 36 | #define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n)) 37 | #define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n)) 38 | #define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n))) 39 | /* function names */ 40 | #define SNAMES /* engine.c looks after details */ 41 | 42 | #include "engine.c" 43 | 44 | /* now undo things */ 45 | #undef states 46 | #undef CLEAR 47 | #undef SET0 48 | #undef SET1 49 | #undef ISSET 50 | #undef ASSIGN 51 | #undef EQ 52 | #undef STATEVARS 53 | #undef STATESETUP 54 | #undef STATETEARDOWN 55 | #undef SETUP 56 | #undef onestate 57 | #undef INIT 58 | #undef INC 59 | #undef ISSTATEIN 60 | #undef FWD 61 | #undef BACK 62 | #undef ISSETBACK 63 | #undef SNAMES 64 | 65 | /* macros for manipulating states, large version */ 66 | #define states char * 67 | #define CLEAR(v) memset(v, 0, m->g->nstates) 68 | #define SET0(v, n) ((v)[n] = 0) 69 | #define SET1(v, n) ((v)[n] = 1) 70 | #define ISSET(v, n) ((v)[n]) 71 | #define ASSIGN(d, s) memcpy(d, s, m->g->nstates) 72 | #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) 73 | #define STATEVARS int vn; char *space 74 | #define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ 75 | if ((m)->space == NULL) return(REG_ESPACE); \ 76 | (m)->vn = 0; } 77 | #define STATETEARDOWN(m) { free((m)->space); } 78 | #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) 79 | #define onestate int 80 | #define INIT(o, n) ((o) = (n)) 81 | #define INC(o) ((o)++) 82 | #define ISSTATEIN(v, o) ((v)[o]) 83 | /* some abbreviations; note that some of these know variable names! */ 84 | /* do "if I'm here, I can also be there" etc without branches */ 85 | #define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) 86 | #define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) 87 | #define ISSETBACK(v, n) ((v)[here - (n)]) 88 | /* function names */ 89 | #define LNAMES /* flag */ 90 | 91 | #include "engine.c" 92 | 93 | /* 94 | - regexec - interface for matching 95 | = API_EXPORT(int) regexec(const regex_t *, const char *, size_t, \ 96 | = regmatch_t [], int); 97 | = #define REG_NOTBOL 00001 98 | = #define REG_NOTEOL 00002 99 | = #define REG_STARTEND 00004 100 | = #define REG_TRACE 00400 // tracing of execution 101 | = #define REG_LARGE 01000 // force large representation 102 | = #define REG_BACKR 02000 // force use of backref code 103 | * 104 | * We put this here so we can exploit knowledge of the state representation 105 | * when choosing which matcher to call. Also, by this point the matchers 106 | * have been prototyped. 107 | */ 108 | API_EXPORT(int) /* 0 success, REG_NOMATCH failure */ 109 | regexec(preg, string, nmatch, pmatch, eflags) 110 | const regex_t *preg; 111 | const char *string; 112 | size_t nmatch; 113 | regmatch_t pmatch[]; 114 | int eflags; 115 | { 116 | register struct re_guts *g = preg->re_g; 117 | #ifdef REDEBUG 118 | # define GOODFLAGS(f) (f) 119 | #else 120 | # define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) 121 | #endif 122 | 123 | if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) 124 | return(REG_BADPAT); 125 | assert(!(g->iflags&BAD)); 126 | if (g->iflags&BAD) /* backstop for no-debug case */ 127 | return(REG_BADPAT); 128 | eflags = GOODFLAGS(eflags); 129 | 130 | if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) 131 | return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); 132 | else 133 | return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); 134 | } 135 | -------------------------------------------------------------------------------- /src/re/regfree.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "utils.h" 4 | #include "regex2.h" 5 | 6 | /* 7 | - regfree - free everything 8 | = API_EXPORT(void) regfree(regex_t *); 9 | */ 10 | API_EXPORT(void) 11 | regfree(preg) 12 | regex_t *preg; 13 | { 14 | register struct re_guts *g; 15 | 16 | if (preg->re_magic != MAGIC1) /* oops */ 17 | return; /* nice to complain, but hard */ 18 | 19 | g = preg->re_g; 20 | if (g == NULL || g->magic != MAGIC2) /* oops again */ 21 | return; 22 | preg->re_magic = 0; /* mark it invalid */ 23 | g->magic = 0; /* mark it invalid */ 24 | 25 | if (g->strip != NULL) 26 | free((char *)g->strip); 27 | if (g->sets != NULL) 28 | free((char *)g->sets); 29 | if (g->setbits != NULL) 30 | free((char *)g->setbits); 31 | if (g->must != NULL) 32 | free(g->must); 33 | free((char *)g); 34 | } 35 | -------------------------------------------------------------------------------- /src/re/split.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | - split - divide a string into fields, like awk split() 6 | = int split(char *string, char *fields[], int nfields, char *sep); 7 | */ 8 | int /* number of fields, including overflow */ 9 | split(string, fields, nfields, sep) 10 | char *string; 11 | char *fields[]; /* list is not NULL-terminated */ 12 | int nfields; /* number of entries available in fields[] */ 13 | char *sep; /* "" white, "c" single char, "ab" [ab]+ */ 14 | { 15 | register char *p = string; 16 | register char c; /* latest character */ 17 | register char sepc = sep[0]; 18 | register char sepc2; 19 | register int fn; 20 | register char **fp = fields; 21 | register char *sepp; 22 | register int trimtrail; 23 | 24 | /* white space */ 25 | if (sepc == '\0') { 26 | while ((c = *p++) == ' ' || c == '\t') 27 | continue; 28 | p--; 29 | trimtrail = 1; 30 | sep = " \t"; /* note, code below knows this is 2 long */ 31 | sepc = ' '; 32 | } else 33 | trimtrail = 0; 34 | sepc2 = sep[1]; /* now we can safely pick this up */ 35 | 36 | /* catch empties */ 37 | if (*p == '\0') 38 | return(0); 39 | 40 | /* single separator */ 41 | if (sepc2 == '\0') { 42 | fn = nfields; 43 | for (;;) { 44 | *fp++ = p; 45 | fn--; 46 | if (fn == 0) 47 | break; 48 | while ((c = *p++) != sepc) 49 | if (c == '\0') 50 | return(nfields - fn); 51 | *(p-1) = '\0'; 52 | } 53 | /* we have overflowed the fields vector -- just count them */ 54 | fn = nfields; 55 | for (;;) { 56 | while ((c = *p++) != sepc) 57 | if (c == '\0') 58 | return(fn); 59 | fn++; 60 | } 61 | /* not reached */ 62 | } 63 | 64 | /* two separators */ 65 | if (sep[2] == '\0') { 66 | fn = nfields; 67 | for (;;) { 68 | *fp++ = p; 69 | fn--; 70 | while ((c = *p++) != sepc && c != sepc2) 71 | if (c == '\0') { 72 | if (trimtrail && **(fp-1) == '\0') 73 | fn++; 74 | return(nfields - fn); 75 | } 76 | if (fn == 0) 77 | break; 78 | *(p-1) = '\0'; 79 | while ((c = *p++) == sepc || c == sepc2) 80 | continue; 81 | p--; 82 | } 83 | /* we have overflowed the fields vector -- just count them */ 84 | fn = nfields; 85 | while (c != '\0') { 86 | while ((c = *p++) == sepc || c == sepc2) 87 | continue; 88 | p--; 89 | fn++; 90 | while ((c = *p++) != '\0' && c != sepc && c != sepc2) 91 | continue; 92 | } 93 | /* might have to trim trailing white space */ 94 | if (trimtrail) { 95 | p--; 96 | while ((c = *--p) == sepc || c == sepc2) 97 | continue; 98 | p++; 99 | if (*p != '\0') { 100 | if (fn == nfields+1) 101 | *p = '\0'; 102 | fn--; 103 | } 104 | } 105 | return(fn); 106 | } 107 | 108 | /* n separators */ 109 | fn = 0; 110 | for (;;) { 111 | if (fn < nfields) 112 | *fp++ = p; 113 | fn++; 114 | for (;;) { 115 | c = *p++; 116 | if (c == '\0') 117 | return(fn); 118 | sepp = sep; 119 | while ((sepc = *sepp++) != '\0' && sepc != c) 120 | continue; 121 | if (sepc != '\0') /* it was a separator */ 122 | break; 123 | } 124 | if (fn < nfields) 125 | *(p-1) = '\0'; 126 | for (;;) { 127 | c = *p++; 128 | sepp = sep; 129 | while ((sepc = *sepp++) != '\0' && sepc != c) 130 | continue; 131 | if (sepc == '\0') /* it wasn't a separator */ 132 | break; 133 | } 134 | p--; 135 | } 136 | 137 | /* not reached */ 138 | } 139 | 140 | #ifdef TEST_SPLIT 141 | 142 | 143 | /* 144 | * test program 145 | * pgm runs regression 146 | * pgm sep splits stdin lines by sep 147 | * pgm str sep splits str by sep 148 | * pgm str sep n splits str by sep n times 149 | */ 150 | int 151 | main(argc, argv) 152 | int argc; 153 | char *argv[]; 154 | { 155 | char buf[512]; 156 | register int n; 157 | # define MNF 10 158 | char *fields[MNF]; 159 | 160 | if (argc > 4) 161 | for (n = atoi(argv[3]); n > 0; n--) { 162 | (void) strcpy(buf, argv[1]); 163 | } 164 | else if (argc > 3) 165 | for (n = atoi(argv[3]); n > 0; n--) { 166 | (void) strcpy(buf, argv[1]); 167 | (void) split(buf, fields, MNF, argv[2]); 168 | } 169 | else if (argc > 2) 170 | dosplit(argv[1], argv[2]); 171 | else if (argc > 1) 172 | while (fgets(buf, sizeof(buf), stdin) != NULL) { 173 | buf[strlen(buf)-1] = '\0'; /* stomp newline */ 174 | dosplit(buf, argv[1]); 175 | } 176 | else 177 | regress(); 178 | 179 | exit(0); 180 | } 181 | 182 | dosplit(string, seps) 183 | char *string; 184 | char *seps; 185 | { 186 | # define NF 5 187 | char *fields[NF]; 188 | register int nf; 189 | 190 | nf = split(string, fields, NF, seps); 191 | print(nf, NF, fields); 192 | } 193 | 194 | print(nf, nfp, fields) 195 | int nf; 196 | int nfp; 197 | char *fields[]; 198 | { 199 | register int fn; 200 | register int bound; 201 | 202 | bound = (nf > nfp) ? nfp : nf; 203 | printf("%d:\t", nf); 204 | for (fn = 0; fn < bound; fn++) 205 | printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); 206 | } 207 | 208 | #define RNF 5 /* some table entries know this */ 209 | struct { 210 | char *str; 211 | char *seps; 212 | int nf; 213 | char *fi[RNF]; 214 | } tests[] = { 215 | "", " ", 0, { "" }, 216 | " ", " ", 2, { "", "" }, 217 | "x", " ", 1, { "x" }, 218 | "xy", " ", 1, { "xy" }, 219 | "x y", " ", 2, { "x", "y" }, 220 | "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, 221 | " a bcd", " ", 4, { "", "", "a", "bcd" }, 222 | "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 223 | " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 224 | 225 | "", " _", 0, { "" }, 226 | " ", " _", 2, { "", "" }, 227 | "x", " _", 1, { "x" }, 228 | "x y", " _", 2, { "x", "y" }, 229 | "ab _ cd", " _", 2, { "ab", "cd" }, 230 | " a_b c ", " _", 5, { "", "a", "b", "c", "" }, 231 | "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, 232 | " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, 233 | 234 | "", " _~", 0, { "" }, 235 | " ", " _~", 2, { "", "" }, 236 | "x", " _~", 1, { "x" }, 237 | "x y", " _~", 2, { "x", "y" }, 238 | "ab _~ cd", " _~", 2, { "ab", "cd" }, 239 | " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, 240 | "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, 241 | "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, 242 | 243 | "", " _~-", 0, { "" }, 244 | " ", " _~-", 2, { "", "" }, 245 | "x", " _~-", 1, { "x" }, 246 | "x y", " _~-", 2, { "x", "y" }, 247 | "ab _~- cd", " _~-", 2, { "ab", "cd" }, 248 | " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, 249 | "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, 250 | "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, 251 | 252 | "", " ", 0, { "" }, 253 | " ", " ", 2, { "", "" }, 254 | "x", " ", 1, { "x" }, 255 | "xy", " ", 1, { "xy" }, 256 | "x y", " ", 2, { "x", "y" }, 257 | "abc def g ", " ", 4, { "abc", "def", "g", "" }, 258 | " a bcd", " ", 3, { "", "a", "bcd" }, 259 | "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 260 | " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 261 | 262 | "", "", 0, { "" }, 263 | " ", "", 0, { "" }, 264 | "x", "", 1, { "x" }, 265 | "xy", "", 1, { "xy" }, 266 | "x y", "", 2, { "x", "y" }, 267 | "abc def g ", "", 3, { "abc", "def", "g" }, 268 | "\t a bcd", "", 2, { "a", "bcd" }, 269 | " a \tb\t c ", "", 3, { "a", "b", "c" }, 270 | "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, 271 | "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, 272 | " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, 273 | 274 | NULL, NULL, 0, { NULL }, 275 | }; 276 | 277 | regress() 278 | { 279 | char buf[512]; 280 | register int n; 281 | char *fields[RNF+1]; 282 | register int nf; 283 | register int i; 284 | register int printit; 285 | register char *f; 286 | 287 | for (n = 0; tests[n].str != NULL; n++) { 288 | (void) strcpy(buf, tests[n].str); 289 | fields[RNF] = NULL; 290 | nf = split(buf, fields, RNF, tests[n].seps); 291 | printit = 0; 292 | if (nf != tests[n].nf) { 293 | printf("split `%s' by `%s' gave %d fields, not %d\n", 294 | tests[n].str, tests[n].seps, nf, tests[n].nf); 295 | printit = 1; 296 | } else if (fields[RNF] != NULL) { 297 | printf("split() went beyond array end\n"); 298 | printit = 1; 299 | } else { 300 | for (i = 0; i < nf && i < RNF; i++) { 301 | f = fields[i]; 302 | if (f == NULL) 303 | f = "(NULL)"; 304 | if (strcmp(f, tests[n].fi[i]) != 0) { 305 | printf("split `%s' by `%s', field %d is `%s', not `%s'\n", 306 | tests[n].str, tests[n].seps, 307 | i, fields[i], tests[n].fi[i]); 308 | printit = 1; 309 | } 310 | } 311 | } 312 | if (printit) 313 | print(nf, RNF, fields); 314 | } 315 | } 316 | #endif 317 | -------------------------------------------------------------------------------- /src/re/tests: -------------------------------------------------------------------------------- 1 | # regular expression test set 2 | # Lines are at least three fields, separated by one or more tabs. "" stands 3 | # for an empty field. First field is an RE. Second field is flags. If 4 | # C flag given, regcomp() is expected to fail, and the third field is the 5 | # error name (minus the leading REG_). 6 | # 7 | # Otherwise it is expected to succeed, and the third field is the string to 8 | # try matching it against. If there is no fourth field, the match is 9 | # expected to fail. If there is a fourth field, it is the substring that 10 | # the RE is expected to match. If there is a fifth field, it is a comma- 11 | # separated list of what the subexpressions should match, with - indicating 12 | # no match for that one. In both the fourth and fifth fields, a (sub)field 13 | # starting with @ indicates that the (sub)expression is expected to match 14 | # a null string followed by the stuff after the @; this provides a way to 15 | # test where null strings match. The character `N' in REs and strings 16 | # is newline, `S' is space, `T' is tab, `Z' is NUL. 17 | # 18 | # The full list of flags: 19 | # - placeholder, does nothing 20 | # b RE is a BRE, not an ERE 21 | # & try it as both an ERE and a BRE 22 | # C regcomp() error expected, third field is error name 23 | # i REG_ICASE 24 | # m ("mundane") REG_NOSPEC 25 | # s REG_NOSUB (not really testable) 26 | # n REG_NEWLINE 27 | # ^ REG_NOTBOL 28 | # $ REG_NOTEOL 29 | # # REG_STARTEND (see below) 30 | # p REG_PEND 31 | # 32 | # For REG_STARTEND, the start/end offsets are those of the substring 33 | # enclosed in (). 34 | 35 | # basics 36 | a & a a 37 | abc & abc abc 38 | abc|de - abc abc 39 | a|b|c - abc a 40 | 41 | # parentheses and perversions thereof 42 | a(b)c - abc abc 43 | a\(b\)c b abc abc 44 | a( C EPAREN 45 | a( b a( a( 46 | a\( - a( a( 47 | a\( bC EPAREN 48 | a\(b bC EPAREN 49 | a(b C EPAREN 50 | a(b b a(b a(b 51 | # gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly) 52 | a) - a) a) 53 | ) - ) ) 54 | # end gagging (in a just world, those *should* give EPAREN) 55 | a) b a) a) 56 | a\) bC EPAREN 57 | \) bC EPAREN 58 | a()b - ab ab 59 | a\(\)b b ab ab 60 | 61 | # anchoring and REG_NEWLINE 62 | ^abc$ & abc abc 63 | a^b - a^b 64 | a^b b a^b a^b 65 | a$b - a$b 66 | a$b b a$b a$b 67 | ^ & abc @abc 68 | $ & abc @ 69 | ^$ & "" @ 70 | $^ - "" @ 71 | \($\)\(^\) b "" @ 72 | # stop retching, those are legitimate (although disgusting) 73 | ^^ - "" @ 74 | $$ - "" @ 75 | b$ & abNc 76 | b$ &n abNc b 77 | ^b$ & aNbNc 78 | ^b$ &n aNbNc b 79 | ^$ &n aNNb @Nb 80 | ^$ n abc 81 | ^$ n abcN @ 82 | $^ n aNNb @Nb 83 | \($\)\(^\) bn aNNb @Nb 84 | ^^ n^ aNNb @Nb 85 | $$ n aNNb @NN 86 | ^a ^ a 87 | a$ $ a 88 | ^a ^n aNb 89 | ^b ^n aNb b 90 | a$ $n bNa 91 | b$ $n bNa b 92 | a*(^b$)c* - b b 93 | a*\(^b$\)c* b b b 94 | 95 | # certain syntax errors and non-errors 96 | | C EMPTY 97 | | b | | 98 | * C BADRPT 99 | * b * * 100 | + C BADRPT 101 | ? C BADRPT 102 | "" &C EMPTY 103 | () - abc @abc 104 | \(\) b abc @abc 105 | a||b C EMPTY 106 | |ab C EMPTY 107 | ab| C EMPTY 108 | (|a)b C EMPTY 109 | (a|)b C EMPTY 110 | (*a) C BADRPT 111 | (+a) C BADRPT 112 | (?a) C BADRPT 113 | ({1}a) C BADRPT 114 | \(\{1\}a\) bC BADRPT 115 | (a|*b) C BADRPT 116 | (a|+b) C BADRPT 117 | (a|?b) C BADRPT 118 | (a|{1}b) C BADRPT 119 | ^* C BADRPT 120 | ^* b * * 121 | ^+ C BADRPT 122 | ^? C BADRPT 123 | ^{1} C BADRPT 124 | ^\{1\} bC BADRPT 125 | 126 | # metacharacters, backslashes 127 | a.c & abc abc 128 | a[bc]d & abd abd 129 | a\*c & a*c a*c 130 | a\\b & a\b a\b 131 | a\\\*b & a\*b a\*b 132 | a\bc & abc abc 133 | a\ &C EESCAPE 134 | a\\bc & a\bc a\bc 135 | \{ bC BADRPT 136 | a\[b & a[b a[b 137 | a[b &C EBRACK 138 | # trailing $ is a peculiar special case for the BRE code 139 | a$ & a a 140 | a$ & a$ 141 | a\$ & a 142 | a\$ & a$ a$ 143 | a\\$ & a 144 | a\\$ & a$ 145 | a\\$ & a\$ 146 | a\\$ & a\ a\ 147 | 148 | # back references, ugh 149 | a\(b\)\2c bC ESUBREG 150 | a\(b\1\)c bC ESUBREG 151 | a\(b*\)c\1d b abbcbbd abbcbbd bb 152 | a\(b*\)c\1d b abbcbd 153 | a\(b*\)c\1d b abbcbbbd 154 | ^\(.\)\1 b abc 155 | a\([bc]\)\1d b abcdabbd abbd b 156 | a\(\([bc]\)\2\)*d b abbccd abbccd 157 | a\(\([bc]\)\2\)*d b abbcbd 158 | # actually, this next one probably ought to fail, but the spec is unclear 159 | a\(\(b\)*\2\)*d b abbbd abbbd 160 | # here is a case that no NFA implementation does right 161 | \(ab*\)[ab]*\1 b ababaaa ababaaa a 162 | # check out normal matching in the presence of back refs 163 | \(a\)\1bcd b aabcd aabcd 164 | \(a\)\1bc*d b aabcd aabcd 165 | \(a\)\1bc*d b aabd aabd 166 | \(a\)\1bc*d b aabcccd aabcccd 167 | \(a\)\1bc*[ce]d b aabcccd aabcccd 168 | ^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd 169 | 170 | # ordinary repetitions 171 | ab*c & abc abc 172 | ab+c - abc abc 173 | ab?c - abc abc 174 | a\(*\)b b a*b a*b 175 | a\(**\)b b ab ab 176 | a\(***\)b bC BADRPT 177 | *a b *a *a 178 | **a b a a 179 | ***a bC BADRPT 180 | 181 | # the dreaded bounded repetitions 182 | { & { { 183 | {abc & {abc {abc 184 | {1 C BADRPT 185 | {1} C BADRPT 186 | a{b & a{b a{b 187 | a{1}b - ab ab 188 | a\{1\}b b ab ab 189 | a{1,}b - ab ab 190 | a\{1,\}b b ab ab 191 | a{1,2}b - aab aab 192 | a\{1,2\}b b aab aab 193 | a{1 C EBRACE 194 | a\{1 bC EBRACE 195 | a{1a C EBRACE 196 | a\{1a bC EBRACE 197 | a{1a} C BADBR 198 | a\{1a\} bC BADBR 199 | a{,2} - a{,2} a{,2} 200 | a\{,2\} bC BADBR 201 | a{,} - a{,} a{,} 202 | a\{,\} bC BADBR 203 | a{1,x} C BADBR 204 | a\{1,x\} bC BADBR 205 | a{1,x C EBRACE 206 | a\{1,x bC EBRACE 207 | a{300} C BADBR 208 | a\{300\} bC BADBR 209 | a{1,0} C BADBR 210 | a\{1,0\} bC BADBR 211 | ab{0,0}c - abcac ac 212 | ab\{0,0\}c b abcac ac 213 | ab{0,1}c - abcac abc 214 | ab\{0,1\}c b abcac abc 215 | ab{0,3}c - abbcac abbc 216 | ab\{0,3\}c b abbcac abbc 217 | ab{1,1}c - acabc abc 218 | ab\{1,1\}c b acabc abc 219 | ab{1,3}c - acabc abc 220 | ab\{1,3\}c b acabc abc 221 | ab{2,2}c - abcabbc abbc 222 | ab\{2,2\}c b abcabbc abbc 223 | ab{2,4}c - abcabbc abbc 224 | ab\{2,4\}c b abcabbc abbc 225 | ((a{1,10}){1,10}){1,10} - a a a,a 226 | 227 | # multiple repetitions 228 | a** &C BADRPT 229 | a++ C BADRPT 230 | a?? C BADRPT 231 | a*+ C BADRPT 232 | a*? C BADRPT 233 | a+* C BADRPT 234 | a+? C BADRPT 235 | a?* C BADRPT 236 | a?+ C BADRPT 237 | a{1}{1} C BADRPT 238 | a*{1} C BADRPT 239 | a+{1} C BADRPT 240 | a?{1} C BADRPT 241 | a{1}* C BADRPT 242 | a{1}+ C BADRPT 243 | a{1}? C BADRPT 244 | a*{b} - a{b} a{b} 245 | a\{1\}\{1\} bC BADRPT 246 | a*\{1\} bC BADRPT 247 | a\{1\}* bC BADRPT 248 | 249 | # brackets, and numerous perversions thereof 250 | a[b]c & abc abc 251 | a[ab]c & abc abc 252 | a[^ab]c & adc adc 253 | a[]b]c & a]c a]c 254 | a[[b]c & a[c a[c 255 | a[-b]c & a-c a-c 256 | a[^]b]c & adc adc 257 | a[^-b]c & adc adc 258 | a[b-]c & a-c a-c 259 | a[b &C EBRACK 260 | a[] &C EBRACK 261 | a[1-3]c & a2c a2c 262 | a[3-1]c &C ERANGE 263 | a[1-3-5]c &C ERANGE 264 | a[[.-.]--]c & a-c a-c 265 | a[1- &C ERANGE 266 | a[[. &C EBRACK 267 | a[[.x &C EBRACK 268 | a[[.x. &C EBRACK 269 | a[[.x.] &C EBRACK 270 | a[[.x.]] & ax ax 271 | a[[.x,.]] &C ECOLLATE 272 | a[[.one.]]b & a1b a1b 273 | a[[.notdef.]]b &C ECOLLATE 274 | a[[.].]]b & a]b a]b 275 | a[[:alpha:]]c & abc abc 276 | a[[:notdef:]]c &C ECTYPE 277 | a[[: &C EBRACK 278 | a[[:alpha &C EBRACK 279 | a[[:alpha:] &C EBRACK 280 | a[[:alpha,:] &C ECTYPE 281 | a[[:]:]]b &C ECTYPE 282 | a[[:-:]]b &C ECTYPE 283 | a[[:alph:]] &C ECTYPE 284 | a[[:alphabet:]] &C ECTYPE 285 | [[:alnum:]]+ - -%@a0X- a0X 286 | [[:alpha:]]+ - -%@aX0- aX 287 | [[:blank:]]+ - aSSTb SST 288 | [[:cntrl:]]+ - aNTb NT 289 | [[:digit:]]+ - a019b 019 290 | [[:graph:]]+ - Sa%bS a%b 291 | [[:lower:]]+ - AabC ab 292 | [[:print:]]+ - NaSbN aSb 293 | [[:punct:]]+ - S%-&T %-& 294 | [[:space:]]+ - aSNTb SNT 295 | [[:upper:]]+ - aBCd BC 296 | [[:xdigit:]]+ - p0f3Cq 0f3C 297 | a[[=b=]]c & abc abc 298 | a[[= &C EBRACK 299 | a[[=b &C EBRACK 300 | a[[=b= &C EBRACK 301 | a[[=b=] &C EBRACK 302 | a[[=b,=]] &C ECOLLATE 303 | a[[=one=]]b & a1b a1b 304 | 305 | # complexities 306 | a(((b)))c - abc abc 307 | a(b|(c))d - abd abd 308 | a(b*|c)d - abbd abbd 309 | # just gotta have one DFA-buster, of course 310 | a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab 311 | # and an inline expansion in case somebody gets tricky 312 | a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab 313 | # and in case somebody just slips in an NFA... 314 | a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights 315 | # fish for anomalies as the number of states passes 32 316 | 12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789 317 | 123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890 318 | 1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901 319 | 12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012 320 | 123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123 321 | # and one really big one, beyond any plausible word width 322 | 1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890 323 | # fish for problems as brackets go past 8 324 | [ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm 325 | [ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo 326 | [ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq 327 | [ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq 328 | 329 | # subtleties of matching 330 | abc & xabcy abc 331 | a\(b\)?c\1d b acd 332 | aBc i Abc Abc 333 | a[Bc]*d i abBCcd abBCcd 334 | 0[[:upper:]]1 &i 0a1 0a1 335 | 0[[:lower:]]1 &i 0A1 0A1 336 | a[^b]c &i abc 337 | a[^b]c &i aBc 338 | a[^b]c &i adc adc 339 | [a]b[c] - abc abc 340 | [a]b[a] - aba aba 341 | [abc]b[abc] - abc abc 342 | [abc]b[abd] - abd abd 343 | a(b?c)+d - accd accd 344 | (wee|week)(knights|night) - weeknights weeknights 345 | (we|wee|week|frob)(knights|night|day) - weeknights weeknights 346 | a[bc]d - xyzaaabcaababdacd abd 347 | a[ab]c - aaabc abc 348 | abc s abc abc 349 | a* & b @b 350 | 351 | # Let's have some fun -- try to match a C comment. 352 | # first the obvious, which looks okay at first glance... 353 | /\*.*\*/ - /*x*/ /*x*/ 354 | # but... 355 | /\*.*\*/ - /*x*/y/*z*/ /*x*/y/*z*/ 356 | # okay, we must not match */ inside; try to do that... 357 | /\*([^*]|\*[^/])*\*/ - /*x*/ /*x*/ 358 | /\*([^*]|\*[^/])*\*/ - /*x*/y/*z*/ /*x*/ 359 | # but... 360 | /\*([^*]|\*[^/])*\*/ - /*x**/y/*z*/ /*x**/y/*z*/ 361 | # and a still fancier version, which does it right (I think)... 362 | /\*([^*]|\*+[^*/])*\*+/ - /*x*/ /*x*/ 363 | /\*([^*]|\*+[^*/])*\*+/ - /*x*/y/*z*/ /*x*/ 364 | /\*([^*]|\*+[^*/])*\*+/ - /*x**/y/*z*/ /*x**/ 365 | /\*([^*]|\*+[^*/])*\*+/ - /*x****/y/*z*/ /*x****/ 366 | /\*([^*]|\*+[^*/])*\*+/ - /*x**x*/y/*z*/ /*x**x*/ 367 | /\*([^*]|\*+[^*/])*\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/ 368 | 369 | # subexpressions 370 | a(b)(c)d - abcd abcd b,c 371 | a(((b)))c - abc abc b,b,b 372 | a(b|(c))d - abd abd b,- 373 | a(b*|c|e)d - abbd abbd bb 374 | a(b*|c|e)d - acd acd c 375 | a(b*|c|e)d - ad ad @d 376 | a(b?)c - abc abc b 377 | a(b?)c - ac ac @c 378 | a(b+)c - abc abc b 379 | a(b+)c - abbbc abbbc bbb 380 | a(b*)c - ac ac @c 381 | (a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de 382 | # the regression tester only asks for 9 subexpressions 383 | a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j 384 | a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k 385 | a([bc]?)c - abc abc b 386 | a([bc]?)c - ac ac @c 387 | a([bc]+)c - abc abc b 388 | a([bc]+)c - abcc abcc bc 389 | a([bc]+)bc - abcbc abcbc bc 390 | a(bb+|b)b - abb abb b 391 | a(bbb+|bb+|b)b - abb abb b 392 | a(bbb+|bb+|b)b - abbb abbb bb 393 | a(bbb+|bb+|b)bb - abbb abbb b 394 | (.*).* - abcdef abcdef abcdef 395 | (a*)* - bc @b @b 396 | 397 | # do we get the right subexpression when it is used more than once? 398 | a(b|c)*d - ad ad - 399 | a(b|c)*d - abcd abcd c 400 | a(b|c)+d - abd abd b 401 | a(b|c)+d - abcd abcd c 402 | a(b|c?)+d - ad ad @d 403 | a(b|c?)+d - abcd abcd @d 404 | a(b|c){0,0}d - ad ad - 405 | a(b|c){0,1}d - ad ad - 406 | a(b|c){0,1}d - abd abd b 407 | a(b|c){0,2}d - ad ad - 408 | a(b|c){0,2}d - abcd abcd c 409 | a(b|c){0,}d - ad ad - 410 | a(b|c){0,}d - abcd abcd c 411 | a(b|c){1,1}d - abd abd b 412 | a(b|c){1,1}d - acd acd c 413 | a(b|c){1,2}d - abd abd b 414 | a(b|c){1,2}d - abcd abcd c 415 | a(b|c){1,}d - abd abd b 416 | a(b|c){1,}d - abcd abcd c 417 | a(b|c){2,2}d - acbd acbd b 418 | a(b|c){2,2}d - abcd abcd c 419 | a(b|c){2,4}d - abcd abcd c 420 | a(b|c){2,4}d - abcbd abcbd b 421 | a(b|c){2,4}d - abcbcd abcbcd c 422 | a(b|c){2,}d - abcd abcd c 423 | a(b|c){2,}d - abcbd abcbd b 424 | a(b+|((c)*))+d - abd abd @d,@d,- 425 | a(b+|((c)*))+d - abcd abcd @d,@d,- 426 | 427 | # check out the STARTEND option 428 | [abc] &# a(b)c b 429 | [abc] &# a(d)c 430 | [abc] &# a(bc)d b 431 | [abc] &# a(dc)d c 432 | . &# a()c 433 | b.*c &# b(bc)c bc 434 | b.* &# b(bc)c bc 435 | .*c &# b(bc)c bc 436 | 437 | # plain strings, with the NOSPEC flag 438 | abc m abc abc 439 | abc m xabcy abc 440 | abc m xyz 441 | a*b m aba*b a*b 442 | a*b m ab 443 | "" mC EMPTY 444 | 445 | # cases involving NULs 446 | aZb & a a 447 | aZb &p a 448 | aZb &p# (aZb) aZb 449 | aZ*b &p# (ab) ab 450 | a.b &# (aZb) aZb 451 | a.* &# (aZb)c aZb 452 | 453 | # word boundaries (ick) 454 | [[:<:]]a & a a 455 | [[:<:]]a & ba 456 | [[:<:]]a & -a a 457 | a[[:>:]] & a a 458 | a[[:>:]] & ab 459 | a[[:>:]] & a- a 460 | [[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc 461 | [[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc 462 | [[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc 463 | [[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc 464 | [[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_ 465 | [[:<:]]a_b[[:>:]] & x_a_b 466 | 467 | # past problems, and suspected problems 468 | (A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1 469 | abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop 470 | abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv 471 | (ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11 472 | CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11 473 | Char \([a-z0-9_]*\)\[.* b Char xyz[k Char xyz[k xyz 474 | a?b - ab ab 475 | -\{0,1\}[0-9]*$ b -5 -5 476 | -------------------------------------------------------------------------------- /src/re/utils.h: -------------------------------------------------------------------------------- 1 | /* utility definitions */ 2 | #ifndef _POSIX2_RE_DUP_MAX 3 | #define _POSIX2_RE_DUP_MAX 255 4 | #endif 5 | 6 | #define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */ 7 | #define INFINITY (DUPMAX + 1) 8 | #define NC (CHAR_MAX - CHAR_MIN + 1) 9 | typedef unsigned char uch; 10 | 11 | /* switch off assertions (if not already off) if no REDEBUG */ 12 | #ifndef REDEBUG 13 | #ifndef NDEBUG 14 | #define NDEBUG /* no assertions please */ 15 | #endif 16 | #endif 17 | #include 18 | 19 | /* for old systems with bcopy() but no memmove() */ 20 | #ifdef USEBCOPY 21 | #define memmove(d, s, c) bcopy(s, d, c) 22 | #endif 23 | -------------------------------------------------------------------------------- /src/scheme-private.h: -------------------------------------------------------------------------------- 1 | /* T I N Y S C H E M E 1 . 50 2 | * Continued by armornick (March 2016) 3 | * Original work by Dimitrios Souflis (dsouflis@acm.org) 4 | * 5 | * Based on MiniScheme (original credits follow) 6 | * (MINISCM) coded by Atsushi Moriwaki (11/5/1989) 7 | * (MINISCM) E-MAIL : moriwaki@kurims.kurims.kyoto-u.ac.jp 8 | * (MINISCM) This version has been modified by R.C. Secrist. 9 | * (MINISCM) 10 | * (MINISCM) Mini-Scheme is now maintained by Akira KIDA. 11 | * (MINISCM) 12 | * (MINISCM) This is a revised and modified version by Akira KIDA. 13 | * (MINISCM) current version is 0.85k4 (15 May 1994) 14 | * (MINISCM) 15 | * 16 | */ 17 | 18 | /* scheme-private.h */ 19 | 20 | #ifndef _SCHEME_PRIVATE_H 21 | #define _SCHEME_PRIVATE_H 22 | 23 | #include "scheme.h" 24 | /*------------------ Ugly internals -----------------------------------*/ 25 | /*------------------ Of interest only to FFI users --------------------*/ 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | enum scheme_port_kind { 32 | port_free=0, 33 | port_file=1, 34 | port_string=2, 35 | port_srfi6=4, 36 | port_input=16, 37 | port_output=32, 38 | port_saw_EOF=64 39 | }; 40 | 41 | typedef struct port { 42 | unsigned char kind; 43 | union { 44 | struct { 45 | FILE *file; 46 | int closeit; 47 | #if SHOW_ERROR_LINE 48 | int curr_line; 49 | char *filename; 50 | #endif 51 | } stdio; 52 | struct { 53 | char *start; 54 | char *past_the_end; 55 | char *curr; 56 | } string; 57 | } rep; 58 | } port; 59 | 60 | /* cell structure */ 61 | struct cell { 62 | unsigned int _flag; 63 | union { 64 | struct { 65 | char *_svalue; 66 | int _length; 67 | } _string; 68 | num _number; 69 | port *_port; 70 | foreign_func _ff; 71 | struct { 72 | struct cell *_car; 73 | struct cell *_cdr; 74 | } _cons; 75 | } _object; 76 | }; 77 | 78 | struct scheme { 79 | /* arrays for segments */ 80 | func_alloc malloc; 81 | func_dealloc free; 82 | 83 | /* return code */ 84 | int retcode; 85 | int tracing; 86 | 87 | 88 | #define CELL_SEGSIZE 5000 /* # of cells in one segment */ 89 | #define CELL_NSEGMENT 10 /* # of segments for cells */ 90 | char *alloc_seg[CELL_NSEGMENT]; 91 | pointer cell_seg[CELL_NSEGMENT]; 92 | int last_cell_seg; 93 | 94 | /* We use 4 registers. */ 95 | pointer args; /* register for arguments of function */ 96 | pointer envir; /* stack register for current environment */ 97 | pointer code; /* register for current code */ 98 | pointer dump; /* stack register for next evaluation */ 99 | 100 | int interactive_repl; /* are we in an interactive REPL? */ 101 | 102 | struct cell _sink; 103 | pointer sink; /* when mem. alloc. fails */ 104 | struct cell _NIL; 105 | pointer NIL; /* special cell representing empty cell */ 106 | struct cell _HASHT; 107 | pointer T; /* special cell representing #t */ 108 | struct cell _HASHF; 109 | pointer F; /* special cell representing #f */ 110 | struct cell _EOF_OBJ; 111 | pointer EOF_OBJ; /* special cell representing end-of-file object */ 112 | pointer oblist; /* pointer to symbol table */ 113 | pointer global_env; /* pointer to global environment */ 114 | pointer c_nest; /* stack for nested calls from C */ 115 | 116 | /* global pointers to special symbols */ 117 | pointer LAMBDA; /* pointer to syntax lambda */ 118 | pointer QUOTE; /* pointer to syntax quote */ 119 | 120 | pointer QQUOTE; /* pointer to symbol quasiquote */ 121 | pointer UNQUOTE; /* pointer to symbol unquote */ 122 | pointer UNQUOTESP; /* pointer to symbol unquote-splicing */ 123 | pointer FEED_TO; /* => */ 124 | pointer COLON_HOOK; /* *colon-hook* */ 125 | pointer ERROR_HOOK; /* *error-hook* */ 126 | pointer SHARP_HOOK; /* *sharp-hook* */ 127 | pointer COMPILE_HOOK; /* *compile-hook* */ 128 | 129 | pointer free_cell; /* pointer to top of free cells */ 130 | long fcells; /* # of free cells */ 131 | 132 | pointer inport; 133 | pointer outport; 134 | pointer save_inport; 135 | pointer loadport; 136 | 137 | #define MAXFIL 64 138 | port load_stack[MAXFIL]; /* Stack of open files for port -1 (LOADing) */ 139 | int nesting_stack[MAXFIL]; 140 | int file_i; 141 | int nesting; 142 | 143 | char gc_verbose; /* if gc_verbose is not zero, print gc status */ 144 | char no_memory; /* Whether mem. alloc. has failed */ 145 | 146 | #define LINESIZE 1024 147 | char linebuff[LINESIZE]; 148 | #define STRBUFFSIZE 256 149 | char strbuff[STRBUFFSIZE]; 150 | 151 | FILE *tmpfp; 152 | int tok; 153 | int print_flag; 154 | pointer value; 155 | int op; 156 | 157 | void *ext_data; /* For the benefit of foreign functions */ 158 | long gensym_cnt; 159 | 160 | struct scheme_interface *vptr; 161 | void *dump_base; /* pointer to base of allocated dump stack */ 162 | int dump_size; /* number of frames allocated for dump stack */ 163 | }; 164 | 165 | /* operator code */ 166 | enum scheme_opcodes { 167 | #define _OP_DEF(A,B,C,D,E,OP) OP, 168 | #include "opdefines.h" 169 | OP_MAXDEFINED 170 | }; 171 | 172 | 173 | pointer _cons(scheme *sc, pointer a, pointer b, int immutable); 174 | pointer mk_integer(scheme *sc, long num); 175 | pointer mk_real(scheme *sc, double num); 176 | pointer mk_symbol(scheme *sc, const char *name); 177 | pointer gensym(scheme *sc); 178 | pointer mk_string(scheme *sc, const char *str); 179 | pointer mk_counted_string(scheme *sc, const char *str, int len); 180 | pointer mk_empty_string(scheme *sc, int len, char fill); 181 | pointer mk_character(scheme *sc, int c); 182 | pointer mk_foreign_func(scheme *sc, foreign_func f); 183 | void putstr(scheme *sc, const char *s); 184 | int list_length(scheme *sc, pointer a); 185 | int eqv(pointer a, pointer b); 186 | 187 | 188 | int is_string(pointer p); 189 | char *string_value(pointer p); 190 | int is_number(pointer p); 191 | num nvalue(pointer p); 192 | long ivalue(pointer p); 193 | double rvalue(pointer p); 194 | int is_integer(pointer p); 195 | int is_real(pointer p); 196 | int is_character(pointer p); 197 | long charvalue(pointer p); 198 | int is_vector(pointer p); 199 | 200 | int is_port(pointer p); 201 | 202 | int is_pair(pointer p); 203 | pointer pair_car(pointer p); 204 | pointer pair_cdr(pointer p); 205 | pointer set_car(pointer p, pointer q); 206 | pointer set_cdr(pointer p, pointer q); 207 | 208 | int is_symbol(pointer p); 209 | char *symname(pointer p); 210 | int hasprop(pointer p); 211 | 212 | int is_syntax(pointer p); 213 | int is_proc(pointer p); 214 | int is_foreign(pointer p); 215 | char *syntaxname(pointer p); 216 | int is_closure(pointer p); 217 | #ifdef USE_MACRO 218 | int is_macro(pointer p); 219 | #endif 220 | pointer closure_code(pointer p); 221 | pointer closure_env(pointer p); 222 | 223 | int is_continuation(pointer p); 224 | int is_promise(pointer p); 225 | int is_environment(pointer p); 226 | int is_immutable(pointer p); 227 | void setimmutable(pointer p); 228 | 229 | pointer reverse(scheme *sc, pointer a); 230 | pointer reverse_in_place(scheme *sc, pointer term, pointer list); 231 | void fill_vector(pointer vec, pointer obj); 232 | pointer vector_elem(pointer vec, int ielem); 233 | pointer set_vector_elem(pointer vec, int ielem, pointer a); 234 | 235 | #ifdef __cplusplus 236 | } 237 | #endif 238 | 239 | #endif 240 | 241 | -------------------------------------------------------------------------------- /src/scheme.h: -------------------------------------------------------------------------------- 1 | /* T I N Y S C H E M E 1 . 50 2 | * Continued by armornick (March 2016) 3 | * Original work by Dimitrios Souflis (dsouflis@acm.org) 4 | * 5 | * Based on MiniScheme (original credits follow) 6 | * (MINISCM) coded by Atsushi Moriwaki (11/5/1989) 7 | * (MINISCM) E-MAIL : moriwaki@kurims.kurims.kyoto-u.ac.jp 8 | * (MINISCM) This version has been modified by R.C. Secrist. 9 | * (MINISCM) 10 | * (MINISCM) Mini-Scheme is now maintained by Akira KIDA. 11 | * (MINISCM) 12 | * (MINISCM) This is a revised and modified version by Akira KIDA. 13 | * (MINISCM) current version is 0.85k4 (15 May 1994) 14 | * (MINISCM) 15 | * 16 | */ 17 | 18 | #ifndef _SCHEME_H 19 | #define _SCHEME_H 20 | 21 | #include "config.h" 22 | 23 | #include 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | typedef struct scheme scheme; 30 | typedef struct cell *pointer; 31 | 32 | typedef void * (*func_alloc)(size_t); 33 | typedef void (*func_dealloc)(void *); 34 | 35 | /* num, for generic arithmetic */ 36 | typedef struct num { 37 | char is_fixnum; 38 | union { 39 | long ivalue; 40 | double rvalue; 41 | } value; 42 | } num; 43 | 44 | SCHEME_EXPORT scheme *scheme_init_new(); 45 | SCHEME_EXPORT scheme *scheme_init_new_custom_alloc(func_alloc malloc, func_dealloc free); 46 | SCHEME_EXPORT int scheme_init(scheme **sc); 47 | SCHEME_EXPORT int scheme_init_custom_alloc(scheme **sc, func_alloc, func_dealloc); 48 | SCHEME_EXPORT void scheme_deinit(scheme *sc); 49 | SCHEME_EXPORT void scheme_set_input_port_file(scheme *sc, FILE *fin); 50 | SCHEME_EXPORT void scheme_set_input_port_string(scheme *sc, char *start, char *past_the_end); 51 | SCHEME_EXPORT void scheme_set_output_port_file(scheme *sc, FILE *fin); 52 | SCHEME_EXPORT void scheme_set_output_port_string(scheme *sc, char *start, char *past_the_end); 53 | SCHEME_EXPORT void scheme_load_file(scheme *sc, FILE *fin); 54 | SCHEME_EXPORT void scheme_load_named_file(scheme *sc, FILE *fin, const char *filename); 55 | SCHEME_EXPORT void scheme_load_string(scheme *sc, const char *cmd); 56 | SCHEME_EXPORT pointer scheme_apply0(scheme *sc, const char *procname); 57 | SCHEME_EXPORT pointer scheme_call(scheme *sc, pointer func, pointer args); 58 | SCHEME_EXPORT pointer scheme_eval(scheme *sc, pointer obj); 59 | SCHEME_EXPORT void scheme_set_external_data(scheme *sc, void *p); 60 | SCHEME_EXPORT void scheme_define(scheme *sc, pointer env, pointer symbol, pointer value); 61 | 62 | typedef pointer (*foreign_func)(scheme *, pointer); 63 | 64 | SCHEME_EXPORT pointer scheme_cons(scheme *sc, pointer a, pointer b, int immutable); 65 | SCHEME_EXPORT pointer scheme_integer(scheme *sc, long num); 66 | SCHEME_EXPORT pointer scheme_real(scheme *sc, double num); 67 | SCHEME_EXPORT pointer scheme_symbol(scheme *sc, const char *name); 68 | SCHEME_EXPORT pointer scheme_gensym(scheme *sc); 69 | SCHEME_EXPORT pointer scheme_string(scheme *sc, const char *str); 70 | SCHEME_EXPORT pointer scheme_counted_string(scheme *sc, const char *str, int len); 71 | SCHEME_EXPORT pointer scheme_empty_string(scheme *sc, int len, char fill); 72 | SCHEME_EXPORT pointer scheme_character(scheme *sc, int c); 73 | SCHEME_EXPORT pointer scheme_foreign_func(scheme *sc, foreign_func f); 74 | SCHEME_EXPORT void scheme_putstr(scheme *sc, const char *s); 75 | SCHEME_EXPORT int scheme_list_length(scheme *sc, pointer a); 76 | SCHEME_EXPORT int scheme_eqv(pointer a, pointer b); 77 | 78 | SCHEME_EXPORT int scheme_is_string(pointer p); 79 | SCHEME_EXPORT char* scheme_string_value(pointer p); 80 | SCHEME_EXPORT int scheme_is_number(pointer p); 81 | SCHEME_EXPORT num scheme_nvalue(pointer p); 82 | SCHEME_EXPORT long scheme_ivalue(pointer p); 83 | SCHEME_EXPORT double scheme_rvalue(pointer p); 84 | SCHEME_EXPORT int scheme_is_integer(pointer p); 85 | SCHEME_EXPORT int scheme_is_real(pointer p); 86 | SCHEME_EXPORT int scheme_is_character(pointer p); 87 | SCHEME_EXPORT long scheme_charvalue(pointer p); 88 | SCHEME_EXPORT int scheme_is_vector(pointer p); 89 | 90 | SCHEME_EXPORT int scheme_is_port(pointer p); 91 | 92 | SCHEME_EXPORT int scheme_is_pair(pointer p); 93 | SCHEME_EXPORT pointer scheme_pair_car(pointer p); 94 | SCHEME_EXPORT pointer scheme_pair_cdr(pointer p); 95 | SCHEME_EXPORT pointer scheme_set_car(pointer p, pointer q); 96 | SCHEME_EXPORT pointer scheme_set_cdr(pointer p, pointer q); 97 | 98 | SCHEME_EXPORT int scheme_is_symbol(pointer p); 99 | SCHEME_EXPORT char* scheme_symname(pointer p); 100 | 101 | SCHEME_EXPORT int scheme_is_syntax(pointer p); 102 | SCHEME_EXPORT int scheme_is_proc(pointer p); 103 | SCHEME_EXPORT int scheme_is_foreign(pointer p); 104 | SCHEME_EXPORT char* scheme_syntaxname(pointer p); 105 | SCHEME_EXPORT int scheme_is_closure(pointer p); 106 | 107 | #ifdef USE_MACRO 108 | SCHEME_EXPORT int scheme_is_macro(pointer p); 109 | #endif 110 | 111 | SCHEME_EXPORT int scheme_is_continuation(pointer p); 112 | SCHEME_EXPORT int scheme_is_promise(pointer p); 113 | SCHEME_EXPORT int scheme_is_environment(pointer p); 114 | SCHEME_EXPORT int scheme_is_immutable(pointer p); 115 | SCHEME_EXPORT void scheme_setimmutable(pointer p); 116 | 117 | 118 | SCHEME_EXPORT int scheme_retcode(scheme *sc); 119 | SCHEME_EXPORT pointer scheme_global_env(scheme *sc); 120 | SCHEME_EXPORT pointer scheme_nil(scheme *sc); 121 | SCHEME_EXPORT pointer scheme_true(scheme *sc); 122 | SCHEME_EXPORT pointer scheme_false(scheme *sc); 123 | SCHEME_EXPORT void scheme_memory_error(scheme *sc); 124 | 125 | SCHEME_EXPORT pointer scheme_reverse(scheme *sc, pointer a); 126 | SCHEME_EXPORT pointer scheme_reverse_in_place(scheme *sc, pointer term, pointer list); 127 | SCHEME_EXPORT void scheme_fill_vector(pointer vec, pointer obj); 128 | SCHEME_EXPORT pointer scheme_vector_elem(pointer vec, int ielem); 129 | SCHEME_EXPORT pointer scheme_set_vector_elem(pointer vec, int ielem, pointer a); 130 | 131 | 132 | 133 | #define cons(sc,a,b) scheme_cons(sc,a,b,0) 134 | #define immutable_cons(sc,a,b) scheme_cons(sc,a,b,1) 135 | 136 | 137 | struct scheme_interface { 138 | void (*scheme_define)(scheme *sc, pointer env, pointer symbol, pointer value); 139 | pointer (*cons)(scheme *sc, pointer a, pointer b); 140 | pointer (*immutable_cons)(scheme *sc, pointer a, pointer b); 141 | pointer (*reserve_cells)(scheme *sc, int n); 142 | pointer (*mk_integer)(scheme *sc, long num); 143 | pointer (*mk_real)(scheme *sc, double num); 144 | pointer (*mk_symbol)(scheme *sc, const char *name); 145 | pointer (*gensym)(scheme *sc); 146 | pointer (*mk_string)(scheme *sc, const char *str); 147 | pointer (*mk_counted_string)(scheme *sc, const char *str, int len); 148 | pointer (*mk_character)(scheme *sc, int c); 149 | pointer (*mk_vector)(scheme *sc, int len); 150 | pointer (*mk_foreign_func)(scheme *sc, foreign_func f); 151 | void (*putstr)(scheme *sc, const char *s); 152 | void (*putcharacter)(scheme *sc, int c); 153 | 154 | int (*is_string)(pointer p); 155 | char *(*string_value)(pointer p); 156 | int (*is_number)(pointer p); 157 | num (*nvalue)(pointer p); 158 | long (*ivalue)(pointer p); 159 | double (*rvalue)(pointer p); 160 | int (*is_integer)(pointer p); 161 | int (*is_real)(pointer p); 162 | int (*is_character)(pointer p); 163 | long (*charvalue)(pointer p); 164 | int (*is_list)(scheme *sc, pointer p); 165 | int (*is_vector)(pointer p); 166 | int (*list_length)(scheme *sc, pointer vec); 167 | long (*vector_length)(pointer vec); 168 | void (*fill_vector)(pointer vec, pointer elem); 169 | pointer (*vector_elem)(pointer vec, int ielem); 170 | pointer (*set_vector_elem)(pointer vec, int ielem, pointer newel); 171 | int (*is_port)(pointer p); 172 | 173 | int (*is_pair)(pointer p); 174 | pointer (*pair_car)(pointer p); 175 | pointer (*pair_cdr)(pointer p); 176 | pointer (*set_car)(pointer p, pointer q); 177 | pointer (*set_cdr)(pointer p, pointer q); 178 | 179 | int (*is_symbol)(pointer p); 180 | char *(*symname)(pointer p); 181 | 182 | int (*is_syntax)(pointer p); 183 | int (*is_proc)(pointer p); 184 | int (*is_foreign)(pointer p); 185 | char *(*syntaxname)(pointer p); 186 | int (*is_closure)(pointer p); 187 | int (*is_macro)(pointer p); 188 | pointer (*closure_code)(pointer p); 189 | pointer (*closure_env)(pointer p); 190 | 191 | int (*is_continuation)(pointer p); 192 | int (*is_promise)(pointer p); 193 | int (*is_environment)(pointer p); 194 | int (*is_immutable)(pointer p); 195 | void (*setimmutable)(pointer p); 196 | void (*load_file)(scheme *sc, FILE *fin); 197 | void (*load_string)(scheme *sc, const char *input); 198 | }; 199 | 200 | typedef struct scheme_interface scheme_interface; 201 | 202 | SCHEME_EXPORT void scheme_init_interface(scheme_interface *sci); 203 | 204 | typedef struct scheme_registerable 205 | { 206 | foreign_func f; 207 | const char * name; 208 | } scheme_registerable; 209 | 210 | void scheme_register_foreign_func_list(scheme * sc, 211 | scheme_registerable * list, 212 | int n); 213 | 214 | #ifdef __cplusplus 215 | } 216 | #endif 217 | 218 | #endif 219 | --------------------------------------------------------------------------------