├── LICENSE ├── README.md ├── example ├── README.md ├── msvcbuild.bat └── repl.c ├── ext ├── re │ ├── COPYRIGHT │ ├── Makefile │ ├── README │ ├── WHATSNEW │ ├── cclass.h │ ├── cname.h │ ├── debug.c │ ├── engine.c │ ├── engine.ih │ ├── fake │ │ ├── limits.h │ │ ├── memmove.c │ │ └── stdlib.h │ ├── main.c │ ├── mkh │ ├── msvcbuild.bat │ ├── re.c │ ├── regcomp.c │ ├── regcomp.ih │ ├── regerror.c │ ├── regerror.ih │ ├── regex.3 │ ├── regex.7 │ ├── regex.h │ ├── regex2.h │ ├── regexec.c │ ├── regfree.c │ ├── split.c │ ├── tests │ └── utils.h └── tsx │ ├── LICENSE │ ├── Makefile │ ├── README │ ├── listhome.scm │ ├── msvcbuild.bat │ ├── smtp.scm │ ├── srepl.scm │ ├── tsx-functions.txt │ ├── tsx.c │ └── tsx.h └── src ├── README ├── README.md ├── bignum.c ├── bignum.h ├── init.scm ├── makefile ├── miniscm.c ├── miniscm.h └── msvcbuild.bat /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Tatsuya Watanabe 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | minischeme 2 | ========== 3 | 4 | Mini-Scheme Interpreter with Copying GC 5 | 6 | 7 | Features 8 | -------- 9 | 10 | * based on Mini-Scheme 0.85k4-a 11 | * bignum (multi-precision integer) operations supported 12 | * continuations support without using 'SCHEME STACK' (#undef USE_SCHEME_STACK) 13 | * implemented a Copying GC using the Cheney's algorithm (#define USE_COPYING_GC) 14 | * implemented Hygienic macro 'syntax-rules' (supports SRFI 46) 15 | * implemented Legacy macro 'define-macro' ('gensym' also implemented) 16 | * supports UTF-8 multibyte characters (limitations: NO normalization nor toupper/tolower, ...) 17 | * reinventing the [tinyscheme](https://github.com/ignorabimus/tinyscheme "Experimental fork of TinyScheme and extensions TSX, RE.") :) 18 | 19 | 20 | Build (with MSVC) 21 | ----------------- 22 | 23 | Install "Visual C++ 2015" or later. 24 | 25 | * I compiled using "Microsoft Visual C++ 2015", but "2013" or earlier maybe OK. 26 | 27 | ### Setting up environemt 28 | 29 | Open the Visual Studio command prompt, or open Normal command prompt then run 30 | 31 | > (Visual Studio installed path)\VC\vcvarsall.bat 32 | 33 | and change directory to unpacking source files. 34 | 35 | > cd src 36 | 37 | ### Build an executable 38 | 39 | To build an executable, just run 40 | 41 | > msvcbuild.bat 42 | 43 | and you'll get 44 | 45 | > bin\minischeme.exe 46 | 47 | ### Build a static library 48 | 49 | To build as a static library, run with "static" option 50 | 51 | > msvcbuild.bat static 52 | 53 | and you'll get 54 | 55 | > lib\minischeme.lib 56 | 57 | 58 | [R5RS standard](http://www.schemers.org/Documents/Standards/R5RS/ "schemers.org: Documents: Standards: R5RS") compatibility 59 | ------------------ 60 | 61 | * [x] + 62 | * [x] * 63 | * [x] - 64 | * [x] / 65 | * [x] = 66 | * [x] < 67 | * [x] > 68 | * [x] <= 69 | * [x] >= 70 | * [x] abs 71 | * [x] acos 72 | * [x] and 73 | * [ ] angle 74 | * [x] append 75 | * [x] apply 76 | * [x] asin 77 | * [x] assoc 78 | * [x] assq 79 | * [x] assv 80 | * [x] atan 81 | * [x] begin 82 | * [x] boolean? 83 | * [x] caaaar 84 | * [x] caaadr 85 | * [x] caaar 86 | * [x] caadr 87 | * [x] caadar 88 | * [x] caaddr 89 | * [x] caar 90 | * [x] cadaar 91 | * [x] cadadr 92 | * [x] cadar 93 | * [x] caddr 94 | * [x] caddar 95 | * [x] cadddr 96 | * [x] cadr 97 | * [x] call-with-current-continuation 98 | * [x] call-with-input-file 99 | * [x] call-with-output-file 100 | * [x] call-with-values 101 | * [x] car 102 | * [x] case 103 | * [x] cdar 104 | * [x] cdaar 105 | * [x] cdaaar 106 | * [x] cdaadr 107 | * [x] cdadar 108 | * [x] cdaddr 109 | * [x] cdadr 110 | * [x] cddr 111 | * [x] cddar 112 | * [x] cddaar 113 | * [x] cddadr 114 | * [x] cdddr 115 | * [x] cdddar 116 | * [x] cddddr 117 | * [x] cdr 118 | * [x] ceiling 119 | * [x] char->integer 120 | * [x] char-alphabetic? 121 | * [x] char-ci<=? 122 | * [x] char-ci=? 125 | * [x] char-ci>? 126 | * [x] char-downcase 127 | * [x] char-lower-case? 128 | * [x] char-numeric? 129 | * [x] char-ready? 130 | * [x] char-upcase 131 | * [x] char-upper-case? 132 | * [x] char-whitespace? 133 | * [x] char<=? 134 | * [x] char=? 137 | * [x] char>? 138 | * [x] char? 139 | * [x] close-input-port 140 | * [x] close-output-port 141 | * [ ] complex? 142 | * [x] cond 143 | * [x] cons 144 | * [x] cos 145 | * [x] current-input-port 146 | * [x] current-output-port 147 | * [x] define 148 | * [x] define-syntax 149 | * [x] delay 150 | * [ ] denominator 151 | * [x] display 152 | * [x] do 153 | * [x] dynamic-wind 154 | * [x] else 155 | * [x] eof-object? 156 | * [x] eq? 157 | * [x] equal? 158 | * [x] eqv? 159 | * [x] eval 160 | * [x] even? 161 | * [x] exact->inexact 162 | * [x] exact? 163 | * [x] exp 164 | * [x] expt 165 | * [x] floor 166 | * [x] for-each 167 | * [x] force 168 | * [x] gcd 169 | * [x] if 170 | * [ ] imag-part 171 | * [x] inexact->exact 172 | * [x] inexact? 173 | * [x] input-port? 174 | * [x] integer->char 175 | * [x] integer? 176 | * [x] interaction-environment 177 | * [x] lambda 178 | * [x] lcm 179 | * [x] length 180 | * [x] let 181 | * [x] let* 182 | * [x] let-syntax 183 | * [x] letrec 184 | * [x] letrec-syntax 185 | * [x] list 186 | * [x] list->string 187 | * [x] list->vector 188 | * [x] list-ref 189 | * [x] list-tail 190 | * [x] list? 191 | * [x] load 192 | * [x] log 193 | * [ ] magnitude 194 | * [ ] make-polar 195 | * [ ] make-rectangular 196 | * [x] make-string 197 | * [x] make-vector 198 | * [x] map 199 | * [x] max 200 | * [x] member 201 | * [x] memq 202 | * [x] memv 203 | * [x] min 204 | * [x] modulo 205 | * [x] negative? 206 | * [x] newline 207 | * [x] not 208 | * [ ] null-environment 209 | * [x] null? 210 | * [x] number->string 211 | * [x] number? 212 | * [ ] numerator 213 | * [x] odd? 214 | * [x] open-input-file 215 | * [x] open-output-file 216 | * [x] or 217 | * [x] output-port? 218 | * [x] pair? 219 | * [x] peek-char? 220 | * [x] port? 221 | * [x] positive? 222 | * [x] procedure? 223 | * [x] quasiquote 224 | * [x] quote 225 | * [x] quotient 226 | * [ ] rational? 227 | * [ ] rationalize 228 | * [x] read 229 | * [x] read-char 230 | * [ ] real-part 231 | * [x] real? 232 | * [x] remainder 233 | * [x] reverse 234 | * [x] round 235 | * [ ] scheme-report-environment 236 | * [x] set! 237 | * [x] set-car! 238 | * [x] set-cdr! 239 | * [x] sin 240 | * [x] sqrt 241 | * [x] string 242 | * [x] string->list 243 | * [x] string->number 244 | * [x] string->symbol 245 | * [x] string-append 246 | * [x] string-ci<=? 247 | * [x] string-ci=? 250 | * [x] string-ci>? 251 | * [x] string-copy 252 | * [x] string-fill! 253 | * [x] string-length 254 | * [x] string-ref 255 | * [x] string-set! 256 | * [x] string<=? 257 | * [x] string=? 260 | * [x] string>? 261 | * [x] string? 262 | * [x] substring 263 | * [x] symbol->string 264 | * [x] symbol? 265 | * [x] syntax-rules 266 | * [x] tan 267 | * [ ] transcript-off 268 | * [ ] transcript-on 269 | * [x] truncate 270 | * [x] values 271 | * [x] vector 272 | * [x] vector->list 273 | * [x] vector-fill! 274 | * [x] vector-length 275 | * [x] vector-ref 276 | * [x] vector-set! 277 | * [x] vector? 278 | * [x] with-input-from-file 279 | * [x] with-output-to-file 280 | * [x] write 281 | * [x] write-char 282 | * [x] zero? 283 | 284 | 285 | Links 286 | ----- 287 | 288 | [D. Souflis, J. Shapiro - TinyScheme Home](http://tinyscheme.sourceforge.net/home.html) 289 | 290 | [Visual Studio Downloads](http://www.visualstudio.com/downloads/) 291 | 292 | 293 | License 294 | ------- 295 | 296 | Copyright (c) 2015 Tatsuya Watanabe. See the LICENSE file for license rights and limitations (MIT). 297 | -------------------------------------------------------------------------------- /example/README.md: -------------------------------------------------------------------------------- 1 | # REPL Example 2 | 3 | * depends on [replxx](https://github.com/AmokHuginnsson/replxx) 4 | 5 | ## Build (with MSVC) 6 | 7 | ### Setting up environemt 8 | 9 | Clone [replxx](https://github.com/AmokHuginnsson/replxx) to 10 | 11 | (Path of minischeme)\..\replxx 12 | 13 | Open the Visual Studio command prompt and change directory 14 | 15 | > cd (Path of minischeme)\example 16 | 17 | ### Build an executable 18 | 19 | To build an executable, just run 20 | 21 | > msvcbuild.bat 22 | 23 | and find it under bin\ 24 | 25 | > bin\repl.exe 26 | -------------------------------------------------------------------------------- /example/msvcbuild.bat: -------------------------------------------------------------------------------- 1 | @setlocal 2 | cd ..\..\replxx 3 | @if not exist build\ ( 4 | mkdir build 5 | ) 6 | cd build 7 | cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS_RELEASE="/MT /O2 /Ob2 /DNDEBUG" .. 8 | msbuild replxx.vcxproj /p:Configuration=Release 9 | @endlocal 10 | 11 | @setlocal 12 | cd ..\src 13 | call msvcbuild.bat static 14 | @endlocal 15 | 16 | @setlocal 17 | cd ..\ext\re 18 | call msvcbuild.bat 19 | @endlocal 20 | 21 | @setlocal 22 | cd ..\ext\tsx 23 | call msvcbuild.bat 24 | @endlocal 25 | 26 | @setlocal 27 | @set REPLXX_H_DIR=..\..\replxx\include 28 | @set SCHEME_H_DIR=..\src 29 | @set MSCOMPILE=cl /nologo /O2 /W3 /c /D_CRT_SECURE_NO_WARNINGS /I%SCHEME_H_DIR% /I%REPLXX_H_DIR% 30 | @set MSLINK=link /nologo 31 | @set EXT_LIBS=..\ext\re\lib\re.lib ..\ext\tsx\lib\tsx.lib ..\..\replxx\build\Release\replxx.lib 32 | 33 | @if not exist bin\ ( 34 | mkdir bin 35 | ) 36 | 37 | %MSCOMPILE% /MT repl.c 38 | %MSLINK% /out:bin\repl.exe repl.obj ..\src\lib\miniscm.lib %EXT_LIBS% 39 | 40 | del repl.obj 41 | @endlocal 42 | -------------------------------------------------------------------------------- /example/repl.c: -------------------------------------------------------------------------------- 1 | #include "miniscm.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #ifdef _WIN32 8 | #include 9 | #endif 10 | 11 | #include "replxx.h" 12 | void init_re(void); 13 | void init_tsx(void); 14 | extern pointer oblist; 15 | 16 | static int get_input_length(const char *input) 17 | { 18 | int escaped = 0, dquotes = 0; 19 | int n_paren = 0, s_paren = 0, c_paren = 0; 20 | const char *p; 21 | 22 | if (input[0] == '\0') { 23 | return 0; 24 | } 25 | 26 | for (p = input; *p == '\t' || *p == ' '; p++); 27 | 28 | while (*p) { 29 | if (escaped) { 30 | escaped = 0; 31 | p++; 32 | continue; 33 | } 34 | switch (*p) { 35 | case '\\': 36 | escaped = 1; 37 | break; 38 | case '"': 39 | dquotes = !dquotes; 40 | break; 41 | case ';': 42 | if (dquotes == 0) { 43 | do { p++; } while (*p != '\r' && *p != '\n' && *p != '\0'); 44 | continue; 45 | } 46 | break; 47 | case '\t': 48 | case ' ': 49 | if (dquotes == 0 && n_paren <= 0 && s_paren <= 0 && c_paren <= 0) { 50 | return p - input; 51 | } 52 | break; 53 | case '(': 54 | case '[': 55 | case '{': 56 | if (dquotes == 0) { 57 | if (p - input > 0 && n_paren <= 0 && s_paren <= 0 && c_paren <= 0) { 58 | return p - input; 59 | } 60 | if (*p == '(') n_paren++; 61 | else if (*p == '[') s_paren++; 62 | else c_paren++; 63 | } 64 | break; 65 | case ')': 66 | case ']': 67 | case '}': 68 | if (dquotes == 0) { 69 | if (*p == ')') n_paren--; 70 | else if (*p == ']') s_paren--; 71 | else c_paren--; 72 | if (n_paren <= 0 && s_paren <= 0 && c_paren <= 0) { 73 | return p - input + 1; 74 | } 75 | } 76 | break; 77 | default: 78 | break; 79 | } 80 | p++; 81 | } 82 | 83 | return (n_paren > 0 || s_paren > 0 || c_paren > 0) ? -1 : p - input; 84 | } 85 | 86 | static void completionHook(char const* prefix, int bp, replxx_completions* lc, void* ud) 87 | { 88 | char **examples = (char **)ud; 89 | size_t i; 90 | for (i = 0; examples[i] != NULL; ++i) { 91 | if (strncmp(prefix + bp, examples[i], strlen(prefix) - bp) == 0) { 92 | replxx_add_completion(lc, examples[i]); 93 | } 94 | } 95 | } 96 | 97 | static void hintHook(char const* prefix, int bp, replxx_hints* lc, ReplxxColor* c, void* ud) 98 | { 99 | char **examples = (char **)ud; 100 | size_t i; 101 | int len = strlen(prefix); 102 | if (len > bp) { 103 | for (i = 0; examples[i] != NULL; ++i) { 104 | if (strncmp(prefix + bp, examples[i], strlen(prefix) - bp) == 0) { 105 | replxx_add_hint(lc, examples[i] + len - bp); 106 | } 107 | } 108 | } 109 | } 110 | 111 | static void colorHook(char const* str_, ReplxxColor* colors_, int size_, void* ud) 112 | { 113 | int i = 0; 114 | for (; i < size_; ++i) { 115 | if (isdigit(str_[i])) { 116 | colors_[i] = BRIGHTMAGENTA; 117 | } 118 | } 119 | } 120 | 121 | static int compare_hints(const void *a, const void *b) 122 | { 123 | return strcmp(*(char **)a, *(char **)b); 124 | } 125 | 126 | int main(int argc, char *argv[]) 127 | { 128 | int ret; 129 | pointer x; 130 | char *examples_base = NULL, **examples = NULL; 131 | size_t hints_size = 0, hints_num = 0; 132 | 133 | #ifdef _WIN32 134 | WSADATA wsaData; 135 | if (WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) { 136 | return 1; 137 | } 138 | #endif 139 | 140 | scheme_init(); 141 | init_re(); 142 | init_tsx(); 143 | 144 | ret = scheme_load_string("(define call/cc call-with-current-continuation)"); 145 | if (ret != 0) { 146 | fprintf(stderr, "Unable to init.\n"); 147 | return 1; 148 | } 149 | 150 | for (x = oblist; x != NIL; x = cdr(x)) { 151 | hints_size += strlen(symname(car(x))) + 1; 152 | hints_num++; 153 | } 154 | 155 | examples_base = (char *)malloc(hints_size + 1); 156 | if (examples_base == NULL) { 157 | fprintf(stderr, "Fatal: malloc failed (%u bytes).\n", hints_size + 1); 158 | return 1; 159 | } 160 | 161 | examples = (char **)malloc((hints_num + 1) * sizeof (char *)); 162 | if (examples == NULL) { 163 | fprintf(stderr, "Fatal: malloc failed (%u bytes).\n", (hints_num + 1) * sizeof (char *)); 164 | return 1; 165 | } 166 | 167 | hints_size = hints_num = 0; 168 | examples[0] = examples_base; 169 | for (x = oblist; x != NIL; x = cdr(x)) { 170 | hints_size = strlen(symname(car(x))) + 1; 171 | memcpy(examples[hints_num], symname(car(x)), hints_size); 172 | examples[hints_num + 1] = examples[hints_num] + hints_size; 173 | hints_num++; 174 | } 175 | examples[hints_num] = NULL; 176 | 177 | qsort(examples, hints_num, sizeof (char *), compare_hints); 178 | 179 | char const* prompt0 = "\x1b[1;32m>>>\x1b[0m "; 180 | char const* prompt1 = "\x1b[1;32m...\x1b[0m "; 181 | 182 | Replxx* replxx = replxx_init(); 183 | replxx_install_window_change_handler(replxx); 184 | replxx_set_completion_callback(replxx, completionHook, (void *)examples); 185 | replxx_set_highlighter_callback(replxx, colorHook, NULL); 186 | replxx_set_hint_callback(replxx, hintHook, (void *)examples); 187 | replxx_set_word_break_characters(replxx, " \\\"'`;@{([])}"); 188 | 189 | if (argc > 1) { 190 | FILE *fin = fopen(argv[1], "rb"); 191 | if (fin == NULL) { 192 | fprintf(stderr, "Unable to open %s\n", argv[1]); 193 | return 1; 194 | } 195 | scheme_load_file(fin); 196 | } 197 | 198 | printf("Hello, This is Mini-Scheme REPL.\n"); 199 | 200 | ret = 0; 201 | while (ret <= 0) { 202 | char const* result; 203 | do { 204 | result = replxx_input(replxx, prompt0); 205 | } while ((result == NULL) && (errno == EAGAIN)); 206 | 207 | if (result == NULL) { 208 | break; 209 | } 210 | replxx_history_add(replxx, result); 211 | 212 | char *line1 = (char *)malloc(strlen(result) + 2); 213 | if (line1 == NULL) { 214 | fprintf(stderr, "Fatal: malloc failed (%u bytes).\n", strlen(result) + 2); 215 | goto EXIT; 216 | } 217 | sprintf(line1, "%s\n", result); 218 | 219 | do { 220 | int i, len; 221 | char *line2, *line3; 222 | while ((len = get_input_length(line1)) == -1) { 223 | do { 224 | result = replxx_input(replxx, prompt1); 225 | } while ((result == NULL) && (errno == EAGAIN)); 226 | 227 | if (result == NULL) { 228 | goto EXIT; 229 | } 230 | replxx_history_add(replxx, result); 231 | 232 | line3 = (char *)malloc(strlen(line1) + strlen(result) + 2); 233 | if (line3 == NULL) { 234 | fprintf(stderr, "Fatal: malloc failed (%u bytes).\n", strlen(line1) + strlen(result) + 2); 235 | goto EXIT; 236 | } 237 | 238 | sprintf(line3, "%s%s\n", line1, result); 239 | free(line1); 240 | line1 = line3; 241 | } 242 | 243 | line2 = (char *)malloc(strlen(line1) - len + 1); 244 | if (line2 == NULL) { 245 | fprintf(stderr, "Fatal: malloc failed (%u bytes).\n", strlen(line1) - len + 1); 246 | goto EXIT; 247 | } 248 | strcpy(line2, line1 + len); 249 | 250 | for (i = 0; i < len; i++) { 251 | if (line1[i] == ';') { 252 | i = len; 253 | break; 254 | } else if (line1[i] != '\r' && line1[i] != '\n' && line1[i] != '\t' && line1[i] != ' ') { 255 | break; 256 | } 257 | } 258 | if (i < len) { 259 | line1[len] = '\0'; 260 | line3 = (char *)malloc(len + 20); 261 | if (line3 == NULL) { 262 | fprintf(stderr, "Fatal: malloc failed (%u bytes).\n", len + 20); 263 | goto EXIT; 264 | } 265 | sprintf(line3, "(display %s)(newline)", line1); 266 | 267 | ret = scheme_load_string(line3); 268 | free(line3); 269 | } 270 | 271 | strcpy(line1, line2); 272 | free(line2); 273 | } while (strlen(line1) > 0 && ret <= 0); 274 | 275 | free(line1); 276 | } 277 | 278 | EXIT: 279 | replxx_end(replxx); 280 | free(examples); 281 | free(examples_base); 282 | scheme_deinit(); 283 | #ifdef _WIN32 284 | WSACleanup(); 285 | #endif 286 | 287 | return ret; 288 | } 289 | -------------------------------------------------------------------------------- /ext/re/COPYRIGHT: -------------------------------------------------------------------------------- 1 | Copyright 1992, 1993, 1994, 1997 Henry Spencer. All rights reserved. 2 | This software is not subject to any license of the American Telephone 3 | and Telegraph Company or of the Regents of the University of California. 4 | 5 | Permission is granted to anyone to use this software for any purpose on 6 | any computer system, and to alter it and redistribute it, subject 7 | to the following restrictions: 8 | 9 | 1. The author is not responsible for the consequences of use of this 10 | software, no matter how awful, even if they arise from flaws in it. 11 | 12 | 2. The origin of this software must not be misrepresented, either by 13 | explicit claim or by omission. Since few users ever read sources, 14 | credits must appear in the documentation. 15 | 16 | 3. Altered versions must be plainly marked as such, and must not be 17 | misrepresented as being the original software. Since few users 18 | ever read sources, credits must appear in the documentation. 19 | 20 | 4. This notice may not be removed or altered. 21 | -------------------------------------------------------------------------------- /ext/re/Makefile: -------------------------------------------------------------------------------- 1 | # You probably want to take -DREDEBUG out of CFLAGS, and put something like 2 | # -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of 3 | # internal assertion checking and some debugging facilities). 4 | # Put -Dconst= in for a pre-ANSI compiler. 5 | # Do not take -DPOSIX_MISTAKE out. 6 | # REGCFLAGS isn't important to you (it's for my use in some special contexts). 7 | CFLAGS=-I. -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS) 8 | 9 | # If you have a pre-ANSI compiler, put -o into MKHFLAGS. If you want 10 | # the Berkeley __P macro, put -b in. 11 | MKHFLAGS= 12 | 13 | # Flags for linking but not compiling, if any. 14 | LDFLAGS= 15 | 16 | # Extra libraries for linking, if any. 17 | LIBS= 18 | 19 | # Internal stuff, should not need changing. 20 | OBJPRODN=regcomp.o regexec.o regerror.o regfree.o 21 | OBJS=$(OBJPRODN) split.o debug.o main.o 22 | H=cclass.h cname.h regex2.h utils.h 23 | REGSRC=regcomp.c regerror.c regexec.c regfree.c 24 | ALLSRC=$(REGSRC) engine.c debug.c main.c split.c 25 | 26 | # Stuff that matters only if you're trying to lint the package. 27 | LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG 28 | LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c main.c 29 | JUNKLINT=possible pointer alignment|null effect 30 | 31 | # arrangements to build forward-reference header files 32 | .SUFFIXES: .ih .h 33 | .c.ih: 34 | sh ./mkh $(MKHFLAGS) -p $< >$@ 35 | 36 | default: r 37 | 38 | lib: purge $(OBJPRODN) 39 | rm -f libregex.a 40 | ar crv libregex.a $(OBJPRODN) 41 | 42 | purge: 43 | rm -f *.o 44 | 45 | # stuff to build regex.h 46 | REGEXH=regex.h 47 | REGEXHSRC=regex2.h $(REGSRC) 48 | $(REGEXH): $(REGEXHSRC) mkh 49 | sh ./mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.tmp 50 | cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h 51 | rm -f regex.tmp 52 | 53 | # dependencies 54 | $(OBJPRODN) debug.o: utils.h regex.h regex2.h 55 | regcomp.o: cclass.h cname.h regcomp.ih 56 | regexec.o: engine.c engine.ih 57 | regerror.o: regerror.ih 58 | debug.o: debug.ih 59 | main.o: main.ih 60 | 61 | # tester 62 | re: $(OBJS) 63 | $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@ 64 | 65 | # regression test 66 | r: re tests 67 | ./re &1 | egrep -v '$(JUNKLINT)' | tee lint 90 | 91 | fullprint: 92 | ti README WHATSNEW notes todo | list 93 | ti *.h | list 94 | list *.c 95 | list regex.3 regex.7 96 | 97 | print: 98 | ti README WHATSNEW notes todo | list 99 | ti *.h | list 100 | list reg*.c engine.c 101 | 102 | 103 | mf.tmp: Makefile 104 | sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@ 105 | 106 | DTRH=cclass.h cname.h regex2.h utils.h 107 | PRE=COPYRIGHT README WHATSNEW 108 | POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch] 109 | FILES=$(PRE) Makefile $(POST) 110 | DTR=$(PRE) Makefile=mf.tmp $(POST) 111 | dtr: $(FILES) mf.tmp 112 | makedtr $(DTR) >$@ 113 | rm mf.tmp 114 | 115 | cio: $(FILES) 116 | cio $(FILES) 117 | 118 | rdf: $(FILES) 119 | rcsdiff -c $(FILES) 2>&1 | p 120 | 121 | # various forms of cleanup 122 | tidy: 123 | rm -f junk* core core.* *.core dtr *.tmp lint 124 | 125 | clean: tidy 126 | rm -f *.o *.s *.ih re libregex.a 127 | 128 | # don't do this one unless you know what you're doing 129 | spotless: clean 130 | rm -f mkh regex.h 131 | -------------------------------------------------------------------------------- /ext/re/README: -------------------------------------------------------------------------------- 1 | alpha3.8 release. 2 | Tue Aug 10 15:51:48 EDT 1999 3 | henry@spsystems.net (formerly henry@zoo.toronto.edu) 4 | 5 | See WHATSNEW for change listing. 6 | 7 | installation notes: 8 | -------- 9 | Read the comments at the beginning of Makefile before running. 10 | 11 | Utils.h contains some things that just might have to be modified on 12 | some systems, as well as a nested include (ugh) of . 13 | 14 | The "fake" directory contains quick-and-dirty fakes for some header 15 | files and routines that old systems may not have. Note also that 16 | -DUSEBCOPY will make utils.h substitute bcopy() for memmove(). 17 | 18 | After that, "make r" will build regcomp.o, regexec.o, regfree.o, 19 | and regerror.o (the actual routines), bundle them together into a test 20 | program, and run regression tests on them. No output is good output. 21 | 22 | "make lib" builds just the .o files for the actual routines (when 23 | you're happy with testing and have adjusted CFLAGS for production), 24 | and puts them together into libregex.a. You can pick up either the 25 | library or *.o ("make lib" makes sure there are no other .o files left 26 | around to confuse things). 27 | 28 | Main.c, debug.c, split.c are used for regression testing but are not part 29 | of the RE routines themselves. 30 | 31 | Regex.h goes in /usr/include. All other .h files are internal only. 32 | -------- 33 | -------------------------------------------------------------------------------- /ext/re/WHATSNEW: -------------------------------------------------------------------------------- 1 | New in alpha3.8p1: Bug fix for integer overflow in regcomp for excessively 2 | long pattern strings. CERT Vulnerability Note VU#695940. Found by Guido 3 | Vranken. - Gary Houston, 25 February 2015. 4 | 5 | New in alpha3.8: Bug fix for signed/unsigned mixup, found and fixed 6 | by the FreeBSD folks. 7 | 8 | New in alpha3.7: A bit of cleanup aimed at maximizing portability, 9 | possibly at slight cost in efficiency. "ul" suffixes and "unsigned long" 10 | no longer appear, in particular. 11 | 12 | New in alpha3.6: A couple more portability glitches fixed. 13 | 14 | New in alpha3.5: Active development of this code has been stopped -- 15 | I'm working on a complete reimplementation -- but folks have found some 16 | minor portability glitches and the like, hence this release to fix them. 17 | One penalty: slightly reduced compatibility with old compilers, because 18 | the ANSI C `unsigned long' type and `ul' constant suffix are used in a 19 | few places (I could avoid this but it would be considerably more work). 20 | 21 | New in alpha3.4: The complex bug alluded to below has been fixed (in a 22 | slightly kludgey temporary way that may hurt efficiency a bit; this is 23 | another "get it out the door for 4.4" release). The tests at the end of 24 | the tests file have accordingly been uncommented. The primary sign of 25 | the bug was that something like a?b matching ab matched b rather than ab. 26 | (The bug was essentially specific to this exact situation, else it would 27 | have shown up earlier.) 28 | 29 | New in alpha3.3: The definition of word boundaries has been altered 30 | slightly, to more closely match the usual programming notion that "_" 31 | is an alphabetic. Stuff used for pre-ANSI systems is now in a subdir, 32 | and the makefile no longer alludes to it in mysterious ways. The 33 | makefile has generally been cleaned up some. Fixes have been made 34 | (again!) so that the regression test will run without -DREDEBUG, at 35 | the cost of weaker checking. A workaround for a bug in some folks' 36 | has been added. And some more things have been added to 37 | tests, including a couple right at the end which are commented out 38 | because the code currently flunks them (complex bug; fix coming). 39 | Plus the usual minor cleanup. 40 | 41 | New in alpha3.2: Assorted bits of cleanup and portability improvement 42 | (the development base is now a BSDI system using GCC instead of an ancient 43 | Sun system, and the newer compiler exposed some glitches). Fix for a 44 | serious bug that affected REs using many [] (including REG_ICASE REs 45 | because of the way they are implemented), *sometimes*, depending on 46 | memory-allocation patterns. The header-file prototypes no longer name 47 | the parameters, avoiding possible name conflicts. The possibility that 48 | some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is 49 | now handled gracefully. "uchar" is no longer used as an internal type 50 | name (too many people have the same idea). Still the same old lousy 51 | performance, alas. 52 | 53 | New in alpha3.1: Basically nothing, this release is just a bookkeeping 54 | convenience. Stay tuned. 55 | 56 | New in alpha3.0: Performance is no better, alas, but some fixes have been 57 | made and some functionality has been added. (This is basically the "get 58 | it out the door in time for 4.4" release.) One bug fix: regfree() didn't 59 | free the main internal structure (how embarrassing). It is now possible 60 | to put NULs in either the RE or the target string, using (resp.) a new 61 | REG_PEND flag and the old REG_STARTEND flag. The REG_NOSPEC flag to 62 | regcomp() makes all characters ordinary, so you can match a literal 63 | string easily (this will become more useful when performance improves!). 64 | There are now primitives to match beginnings and ends of words, although 65 | the syntax is disgusting and so is the implementation. The REG_ATOI 66 | debugging interface has changed a bit. And there has been considerable 67 | internal cleanup of various kinds. 68 | 69 | New in alpha2.3: Split change list out of README, and moved flags notes 70 | into Makefile. Macro-ized the name of regex(7) in regex(3), since it has 71 | to change for 4.4BSD. Cleanup work in engine.c, and some new regression 72 | tests to catch tricky cases thereof. 73 | 74 | New in alpha2.2: Out-of-date manpages updated. Regerror() acquires two 75 | small extensions -- REG_ITOA and REG_ATOI -- which avoid debugging kludges 76 | in my own test program and might be useful to others for similar purposes. 77 | The regression test will now compile (and run) without REDEBUG. The 78 | BRE \$ bug is fixed. Most uses of "uchar" are gone; it's all chars now. 79 | Char/uchar parameters are now written int/unsigned, to avoid possible 80 | portability problems with unpromoted parameters. Some unsigned casts have 81 | been introduced to minimize portability problems with shifting into sign 82 | bits. 83 | 84 | New in alpha2.1: Lots of little stuff, cleanup and fixes. The one big 85 | thing is that regex.h is now generated, using mkh, rather than being 86 | supplied in the distribution; due to circularities in dependencies, 87 | you have to build regex.h explicitly by "make h". The two known bugs 88 | have been fixed (and the regression test now checks for them), as has a 89 | problem with assertions not being suppressed in the absence of REDEBUG. 90 | No performance work yet. 91 | 92 | New in alpha2: Backslash-anything is an ordinary character, not an 93 | error (except, of course, for the handful of backslashed metacharacters 94 | in BREs), which should reduce script breakage. The regression test 95 | checks *where* null strings are supposed to match, and has generally 96 | been tightened up somewhat. Small bug fixes in parameter passing (not 97 | harmful, but technically errors) and some other areas. Debugging 98 | invoked by defining REDEBUG rather than not defining NDEBUG. 99 | 100 | New in alpha+3: full prototyping for internal routines, using a little 101 | helper program, mkh, which extracts prototypes given in stylized comments. 102 | More minor cleanup. Buglet fix: it's CHAR_BIT, not CHAR_BITS. Simple 103 | pre-screening of input when a literal string is known to be part of the 104 | RE; this does wonders for performance. 105 | 106 | New in alpha+2: minor bits of cleanup. Notably, the number "32" for the 107 | word width isn't hardwired into regexec.c any more, the public header 108 | file prototypes the functions if __STDC__ is defined, and some small typos 109 | in the manpages have been fixed. 110 | 111 | New in alpha+1: improvements to the manual pages, and an important 112 | extension, the REG_STARTEND option to regexec(). 113 | -------------------------------------------------------------------------------- /ext/re/cclass.h: -------------------------------------------------------------------------------- 1 | /* character-class table */ 2 | static struct cclass { 3 | char *name; 4 | char *chars; 5 | char *multis; 6 | } cclasses[] = { 7 | "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 8 | 0123456789", "", 9 | "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", 10 | "", 11 | "blank", " \t", "", 12 | "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ 13 | \25\26\27\30\31\32\33\34\35\36\37\177", "", 14 | "digit", "0123456789", "", 15 | "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 16 | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", 17 | "", 18 | "lower", "abcdefghijklmnopqrstuvwxyz", 19 | "", 20 | "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 21 | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", 22 | "", 23 | "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", 24 | "", 25 | "space", "\t\n\v\f\r ", "", 26 | "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 27 | "", 28 | "xdigit", "0123456789ABCDEFabcdef", 29 | "", 30 | NULL, 0, "" 31 | }; 32 | -------------------------------------------------------------------------------- /ext/re/cname.h: -------------------------------------------------------------------------------- 1 | /* character-name table */ 2 | static struct cname { 3 | char *name; 4 | char code; 5 | } cnames[] = { 6 | "NUL", '\0', 7 | "SOH", '\001', 8 | "STX", '\002', 9 | "ETX", '\003', 10 | "EOT", '\004', 11 | "ENQ", '\005', 12 | "ACK", '\006', 13 | "BEL", '\007', 14 | "alert", '\007', 15 | "BS", '\010', 16 | "backspace", '\b', 17 | "HT", '\011', 18 | "tab", '\t', 19 | "LF", '\012', 20 | "newline", '\n', 21 | "VT", '\013', 22 | "vertical-tab", '\v', 23 | "FF", '\014', 24 | "form-feed", '\f', 25 | "CR", '\015', 26 | "carriage-return", '\r', 27 | "SO", '\016', 28 | "SI", '\017', 29 | "DLE", '\020', 30 | "DC1", '\021', 31 | "DC2", '\022', 32 | "DC3", '\023', 33 | "DC4", '\024', 34 | "NAK", '\025', 35 | "SYN", '\026', 36 | "ETB", '\027', 37 | "CAN", '\030', 38 | "EM", '\031', 39 | "SUB", '\032', 40 | "ESC", '\033', 41 | "IS4", '\034', 42 | "FS", '\034', 43 | "IS3", '\035', 44 | "GS", '\035', 45 | "IS2", '\036', 46 | "RS", '\036', 47 | "IS1", '\037', 48 | "US", '\037', 49 | "space", ' ', 50 | "exclamation-mark", '!', 51 | "quotation-mark", '"', 52 | "number-sign", '#', 53 | "dollar-sign", '$', 54 | "percent-sign", '%', 55 | "ampersand", '&', 56 | "apostrophe", '\'', 57 | "left-parenthesis", '(', 58 | "right-parenthesis", ')', 59 | "asterisk", '*', 60 | "plus-sign", '+', 61 | "comma", ',', 62 | "hyphen", '-', 63 | "hyphen-minus", '-', 64 | "period", '.', 65 | "full-stop", '.', 66 | "slash", '/', 67 | "solidus", '/', 68 | "zero", '0', 69 | "one", '1', 70 | "two", '2', 71 | "three", '3', 72 | "four", '4', 73 | "five", '5', 74 | "six", '6', 75 | "seven", '7', 76 | "eight", '8', 77 | "nine", '9', 78 | "colon", ':', 79 | "semicolon", ';', 80 | "less-than-sign", '<', 81 | "equals-sign", '=', 82 | "greater-than-sign", '>', 83 | "question-mark", '?', 84 | "commercial-at", '@', 85 | "left-square-bracket", '[', 86 | "backslash", '\\', 87 | "reverse-solidus", '\\', 88 | "right-square-bracket", ']', 89 | "circumflex", '^', 90 | "circumflex-accent", '^', 91 | "underscore", '_', 92 | "low-line", '_', 93 | "grave-accent", '`', 94 | "left-brace", '{', 95 | "left-curly-bracket", '{', 96 | "vertical-line", '|', 97 | "right-brace", '}', 98 | "right-curly-bracket", '}', 99 | "tilde", '~', 100 | "DEL", '\177', 101 | NULL, 0, 102 | }; 103 | -------------------------------------------------------------------------------- /ext/re/debug.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "utils.h" 10 | #include "regex2.h" 11 | #include "debug.ih" 12 | 13 | /* 14 | - regprint - print a regexp for debugging 15 | == void regprint(regex_t *r, FILE *d); 16 | */ 17 | void 18 | regprint(r, d) 19 | regex_t *r; 20 | FILE *d; 21 | { 22 | register struct re_guts *g = r->re_g; 23 | register int i; 24 | register int c; 25 | register int last; 26 | int nincat[NC]; 27 | 28 | fprintf(d, "%ld states, %d categories", (long)g->nstates, 29 | g->ncategories); 30 | fprintf(d, ", first %ld last %ld", (long)g->firststate, 31 | (long)g->laststate); 32 | if (g->iflags&USEBOL) 33 | fprintf(d, ", USEBOL"); 34 | if (g->iflags&USEEOL) 35 | fprintf(d, ", USEEOL"); 36 | if (g->iflags&BAD) 37 | fprintf(d, ", BAD"); 38 | if (g->nsub > 0) 39 | fprintf(d, ", nsub=%ld", (long)g->nsub); 40 | if (g->must != NULL) 41 | fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen, 42 | g->must); 43 | if (g->backrefs) 44 | fprintf(d, ", backrefs"); 45 | if (g->nplus > 0) 46 | fprintf(d, ", nplus %ld", (long)g->nplus); 47 | fprintf(d, "\n"); 48 | s_print(g, d); 49 | for (i = 0; i < g->ncategories; i++) { 50 | nincat[i] = 0; 51 | for (c = CHAR_MIN; c <= CHAR_MAX; c++) 52 | if (g->categories[c] == i) 53 | nincat[i]++; 54 | } 55 | fprintf(d, "cc0#%d", nincat[0]); 56 | for (i = 1; i < g->ncategories; i++) 57 | if (nincat[i] == 1) { 58 | for (c = CHAR_MIN; c <= CHAR_MAX; c++) 59 | if (g->categories[c] == i) 60 | break; 61 | fprintf(d, ", %d=%s", i, regchar(c)); 62 | } 63 | fprintf(d, "\n"); 64 | for (i = 1; i < g->ncategories; i++) 65 | if (nincat[i] != 1) { 66 | fprintf(d, "cc%d\t", i); 67 | last = -1; 68 | for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */ 69 | if (c <= CHAR_MAX && g->categories[c] == i) { 70 | if (last < 0) { 71 | fprintf(d, "%s", regchar(c)); 72 | last = c; 73 | } 74 | } else { 75 | if (last >= 0) { 76 | if (last != c-1) 77 | fprintf(d, "-%s", 78 | regchar(c-1)); 79 | last = -1; 80 | } 81 | } 82 | fprintf(d, "\n"); 83 | } 84 | } 85 | 86 | /* 87 | - s_print - print the strip for debugging 88 | == static void s_print(register struct re_guts *g, FILE *d); 89 | */ 90 | static void 91 | s_print(g, d) 92 | register struct re_guts *g; 93 | FILE *d; 94 | { 95 | register sop *s; 96 | register cset *cs; 97 | register int i; 98 | register int done = 0; 99 | register sop opnd; 100 | register int col = 0; 101 | register int last; 102 | register sopno offset = 2; 103 | # define GAP() { if (offset % 5 == 0) { \ 104 | if (col > 40) { \ 105 | fprintf(d, "\n\t"); \ 106 | col = 0; \ 107 | } else { \ 108 | fprintf(d, " "); \ 109 | col++; \ 110 | } \ 111 | } else \ 112 | col++; \ 113 | offset++; \ 114 | } 115 | 116 | if (OP(g->strip[0]) != OEND) 117 | fprintf(d, "missing initial OEND!\n"); 118 | for (s = &g->strip[1]; !done; s++) { 119 | opnd = OPND(*s); 120 | switch (OP(*s)) { 121 | case OEND: 122 | fprintf(d, "\n"); 123 | done = 1; 124 | break; 125 | case OCHAR: 126 | if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL) 127 | fprintf(d, "\\%c", (char)opnd); 128 | else 129 | fprintf(d, "%s", regchar((char)opnd)); 130 | break; 131 | case OBOL: 132 | fprintf(d, "^"); 133 | break; 134 | case OEOL: 135 | fprintf(d, "$"); 136 | break; 137 | case OBOW: 138 | fprintf(d, "\\{"); 139 | break; 140 | case OEOW: 141 | fprintf(d, "\\}"); 142 | break; 143 | case OANY: 144 | fprintf(d, "."); 145 | break; 146 | case OANYOF: 147 | fprintf(d, "[(%ld)", (long)opnd); 148 | cs = &g->sets[opnd]; 149 | last = -1; 150 | for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */ 151 | if (CHIN(cs, i) && i < g->csetsize) { 152 | if (last < 0) { 153 | fprintf(d, "%s", regchar(i)); 154 | last = i; 155 | } 156 | } else { 157 | if (last >= 0) { 158 | if (last != i-1) 159 | fprintf(d, "-%s", 160 | regchar(i-1)); 161 | last = -1; 162 | } 163 | } 164 | fprintf(d, "]"); 165 | break; 166 | case OBACK_: 167 | fprintf(d, "(\\<%ld>", (long)opnd); 168 | break; 169 | case O_BACK: 170 | fprintf(d, "<%ld>\\)", (long)opnd); 171 | break; 172 | case OPLUS_: 173 | fprintf(d, "(+"); 174 | if (OP(*(s+opnd)) != O_PLUS) 175 | fprintf(d, "<%ld>", (long)opnd); 176 | break; 177 | case O_PLUS: 178 | if (OP(*(s-opnd)) != OPLUS_) 179 | fprintf(d, "<%ld>", (long)opnd); 180 | fprintf(d, "+)"); 181 | break; 182 | case OQUEST_: 183 | fprintf(d, "(?"); 184 | if (OP(*(s+opnd)) != O_QUEST) 185 | fprintf(d, "<%ld>", (long)opnd); 186 | break; 187 | case O_QUEST: 188 | if (OP(*(s-opnd)) != OQUEST_) 189 | fprintf(d, "<%ld>", (long)opnd); 190 | fprintf(d, "?)"); 191 | break; 192 | case OLPAREN: 193 | fprintf(d, "((<%ld>", (long)opnd); 194 | break; 195 | case ORPAREN: 196 | fprintf(d, "<%ld>))", (long)opnd); 197 | break; 198 | case OCH_: 199 | fprintf(d, "<"); 200 | if (OP(*(s+opnd)) != OOR2) 201 | fprintf(d, "<%ld>", (long)opnd); 202 | break; 203 | case OOR1: 204 | if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_) 205 | fprintf(d, "<%ld>", (long)opnd); 206 | fprintf(d, "|"); 207 | break; 208 | case OOR2: 209 | fprintf(d, "|"); 210 | if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH) 211 | fprintf(d, "<%ld>", (long)opnd); 212 | break; 213 | case O_CH: 214 | if (OP(*(s-opnd)) != OOR1) 215 | fprintf(d, "<%ld>", (long)opnd); 216 | fprintf(d, ">"); 217 | break; 218 | default: 219 | fprintf(d, "!%d(%d)!", OP(*s), opnd); 220 | break; 221 | } 222 | if (!done) 223 | GAP(); 224 | } 225 | } 226 | 227 | /* 228 | - regchar - make a character printable 229 | == static char *regchar(int ch); 230 | */ 231 | static char * /* -> representation */ 232 | regchar(ch) 233 | int ch; 234 | { 235 | static char buf[10]; 236 | 237 | if (isprint(ch) || ch == ' ') 238 | sprintf(buf, "%c", ch); 239 | else 240 | sprintf(buf, "\\%o", ch); 241 | return(buf); 242 | } 243 | -------------------------------------------------------------------------------- /ext/re/engine.ih: -------------------------------------------------------------------------------- 1 | /* ========= begin header generated by ./mkh ========= */ 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* === engine.c === */ 7 | static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags); 8 | static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); 9 | static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev); 10 | static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); 11 | static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); 12 | static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft); 13 | #define BOL (OUT+1) 14 | #define EOL (BOL+1) 15 | #define BOLEOL (BOL+2) 16 | #define NOTHING (BOL+3) 17 | #define BOW (BOL+4) 18 | #define EOW (BOL+5) 19 | #define CODEMAX (BOL+5) /* highest code used */ 20 | #define NONCHAR(c) ((c) > CHAR_MAX) 21 | #define NNONCHAR (CODEMAX-CHAR_MAX) 22 | #ifdef REDEBUG 23 | static void print(struct match *m, char *caption, states st, int ch, FILE *d); 24 | #endif 25 | #ifdef REDEBUG 26 | static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst); 27 | #endif 28 | #ifdef REDEBUG 29 | static char *pchar(int ch); 30 | #endif 31 | 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | /* ========= end header generated by ./mkh ========= */ 36 | -------------------------------------------------------------------------------- /ext/re/fake/limits.h: -------------------------------------------------------------------------------- 1 | #define _POSIX2_RE_DUP_MAX 255 2 | #define CHAR_MIN (-128) 3 | #define CHAR_MAX 127 4 | #define CHAR_BIT 8 5 | -------------------------------------------------------------------------------- /ext/re/fake/memmove.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | - memmove - fake ANSI C routine 7 | */ 8 | char * 9 | memmove(dst, src, count) 10 | char *dst; 11 | char *src; 12 | size_t count; 13 | { 14 | register char *s; 15 | register char *d; 16 | register size_t n; 17 | 18 | if (dst > src) 19 | for (d = dst+count, s = src+count, n = count; n > 0; n--) 20 | *--d = *--s; 21 | else 22 | for (d = dst, s = src, n = count; n > 0; n--) 23 | *d++ = *s++; 24 | 25 | return(dst); 26 | } 27 | -------------------------------------------------------------------------------- /ext/re/fake/stdlib.h: -------------------------------------------------------------------------------- 1 | extern char *malloc(); 2 | extern char *realloc(); 3 | -------------------------------------------------------------------------------- /ext/re/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "main.ih" 8 | 9 | char *progname; 10 | int debug = 0; 11 | int line = 0; 12 | int status = 0; 13 | 14 | int copts = REG_EXTENDED; 15 | int eopts = 0; 16 | regoff_t startoff = 0; 17 | regoff_t endoff = 0; 18 | 19 | 20 | extern int split(); 21 | extern void regprint(); 22 | 23 | /* 24 | - main - do the simple case, hand off to regress() for regression 25 | */ 26 | main(argc, argv) 27 | int argc; 28 | char *argv[]; 29 | { 30 | regex_t re; 31 | # define NS 10 32 | regmatch_t subs[NS]; 33 | char erbuf[100]; 34 | int err; 35 | size_t len; 36 | int c; 37 | int errflg = 0; 38 | register int i; 39 | extern int optind; 40 | extern char *optarg; 41 | 42 | progname = argv[0]; 43 | 44 | while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF) 45 | switch (c) { 46 | case 'c': /* compile options */ 47 | copts = options('c', optarg); 48 | break; 49 | case 'e': /* execute options */ 50 | eopts = options('e', optarg); 51 | break; 52 | case 'S': /* start offset */ 53 | startoff = (regoff_t)atoi(optarg); 54 | break; 55 | case 'E': /* end offset */ 56 | endoff = (regoff_t)atoi(optarg); 57 | break; 58 | case 'x': /* Debugging. */ 59 | debug++; 60 | break; 61 | case '?': 62 | default: 63 | errflg++; 64 | break; 65 | } 66 | if (errflg) { 67 | fprintf(stderr, "usage: %s ", progname); 68 | fprintf(stderr, "[-c copt][-C][-d] [re]\n"); 69 | exit(2); 70 | } 71 | 72 | if (optind >= argc) { 73 | regress(stdin); 74 | exit(status); 75 | } 76 | 77 | err = regcomp(&re, argv[optind++], copts); 78 | if (err) { 79 | len = regerror(err, &re, erbuf, sizeof(erbuf)); 80 | fprintf(stderr, "error %s, %d/%d `%s'\n", 81 | eprint(err), len, sizeof(erbuf), erbuf); 82 | exit(status); 83 | } 84 | regprint(&re, stdout); 85 | 86 | if (optind >= argc) { 87 | regfree(&re); 88 | exit(status); 89 | } 90 | 91 | if (eopts®_STARTEND) { 92 | subs[0].rm_so = startoff; 93 | subs[0].rm_eo = strlen(argv[optind]) - endoff; 94 | } 95 | err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); 96 | if (err) { 97 | len = regerror(err, &re, erbuf, sizeof(erbuf)); 98 | fprintf(stderr, "error %s, %d/%d `%s'\n", 99 | eprint(err), len, sizeof(erbuf), erbuf); 100 | exit(status); 101 | } 102 | if (!(copts®_NOSUB)) { 103 | len = (int)(subs[0].rm_eo - subs[0].rm_so); 104 | if (subs[0].rm_so != -1) { 105 | if (len != 0) 106 | printf("match `%.*s'\n", len, 107 | argv[optind] + subs[0].rm_so); 108 | else 109 | printf("match `'@%.1s\n", 110 | argv[optind] + subs[0].rm_so); 111 | } 112 | for (i = 1; i < NS; i++) 113 | if (subs[i].rm_so != -1) 114 | printf("(%d) `%.*s'\n", i, 115 | (int)(subs[i].rm_eo - subs[i].rm_so), 116 | argv[optind] + subs[i].rm_so); 117 | } 118 | exit(status); 119 | } 120 | 121 | /* 122 | - regress - main loop of regression test 123 | == void regress(FILE *in); 124 | */ 125 | void 126 | regress(in) 127 | FILE *in; 128 | { 129 | char inbuf[1000]; 130 | # define MAXF 10 131 | char *f[MAXF]; 132 | int nf; 133 | int i; 134 | char erbuf[100]; 135 | size_t ne; 136 | char *badpat = "invalid regular expression"; 137 | # define SHORT 10 138 | char *bpname = "REG_BADPAT"; 139 | regex_t re; 140 | 141 | while (fgets(inbuf, sizeof(inbuf), in) != NULL) { 142 | line++; 143 | if (inbuf[0] == '#' || inbuf[0] == '\n') 144 | continue; /* NOTE CONTINUE */ 145 | inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */ 146 | if (debug) 147 | fprintf(stdout, "%d:\n", line); 148 | nf = split(inbuf, f, MAXF, "\t\t"); 149 | if (nf < 3) { 150 | fprintf(stderr, "bad input, line %d\n", line); 151 | exit(1); 152 | } 153 | for (i = 0; i < nf; i++) 154 | if (strcmp(f[i], "\"\"") == 0) 155 | f[i] = ""; 156 | if (nf <= 3) 157 | f[3] = NULL; 158 | if (nf <= 4) 159 | f[4] = NULL; 160 | try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); 161 | if (opt('&', f[1])) /* try with either type of RE */ 162 | try(f[0], f[1], f[2], f[3], f[4], 163 | options('c', f[1]) &~ REG_EXTENDED); 164 | } 165 | 166 | ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); 167 | if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { 168 | fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", 169 | erbuf, badpat); 170 | status = 1; 171 | } 172 | ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT); 173 | if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || 174 | ne != strlen(badpat)+1) { 175 | fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", 176 | erbuf, SHORT-1, badpat); 177 | status = 1; 178 | } 179 | ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); 180 | if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) { 181 | fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", 182 | erbuf, bpname); 183 | status = 1; 184 | } 185 | re.re_endp = bpname; 186 | ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf)); 187 | if (atoi(erbuf) != (int)REG_BADPAT) { 188 | fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", 189 | erbuf, (long)REG_BADPAT); 190 | status = 1; 191 | } else if (ne != strlen(erbuf)+1) { 192 | fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", 193 | erbuf, (long)REG_BADPAT); 194 | status = 1; 195 | } 196 | } 197 | 198 | /* 199 | - try - try it, and report on problems 200 | == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); 201 | */ 202 | void 203 | try(f0, f1, f2, f3, f4, opts) 204 | char *f0; 205 | char *f1; 206 | char *f2; 207 | char *f3; 208 | char *f4; 209 | int opts; /* may not match f1 */ 210 | { 211 | regex_t re; 212 | # define NSUBS 10 213 | regmatch_t subs[NSUBS]; 214 | # define NSHOULD 15 215 | char *should[NSHOULD]; 216 | int nshould; 217 | char erbuf[100]; 218 | int err; 219 | int len; 220 | char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; 221 | register int i; 222 | char *grump; 223 | char f0copy[1000]; 224 | char f2copy[1000]; 225 | 226 | strcpy(f0copy, f0); 227 | re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; 228 | fixstr(f0copy); 229 | err = regcomp(&re, f0copy, opts); 230 | if (err != 0 && (!opt('C', f1) || err != efind(f2))) { 231 | /* unexpected error or wrong error */ 232 | len = regerror(err, &re, erbuf, sizeof(erbuf)); 233 | fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", 234 | line, type, eprint(err), len, 235 | sizeof(erbuf), erbuf); 236 | status = 1; 237 | } else if (err == 0 && opt('C', f1)) { 238 | /* unexpected success */ 239 | fprintf(stderr, "%d: %s should have given REG_%s\n", 240 | line, type, f2); 241 | status = 1; 242 | err = 1; /* so we won't try regexec */ 243 | } 244 | 245 | if (err != 0) { 246 | regfree(&re); 247 | return; 248 | } 249 | 250 | strcpy(f2copy, f2); 251 | fixstr(f2copy); 252 | 253 | if (options('e', f1)®_STARTEND) { 254 | if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) 255 | fprintf(stderr, "%d: bad STARTEND syntax\n", line); 256 | subs[0].rm_so = strchr(f2, '(') - f2 + 1; 257 | subs[0].rm_eo = strchr(f2, ')') - f2; 258 | } 259 | err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); 260 | 261 | if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { 262 | /* unexpected error or wrong error */ 263 | len = regerror(err, &re, erbuf, sizeof(erbuf)); 264 | fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", 265 | line, type, eprint(err), len, 266 | sizeof(erbuf), erbuf); 267 | status = 1; 268 | } else if (err != 0) { 269 | /* nothing more to check */ 270 | } else if (f3 == NULL) { 271 | /* unexpected success */ 272 | fprintf(stderr, "%d: %s exec should have failed\n", 273 | line, type); 274 | status = 1; 275 | err = 1; /* just on principle */ 276 | } else if (opts®_NOSUB) { 277 | /* nothing more to check */ 278 | } else if ((grump = check(f2, subs[0], f3)) != NULL) { 279 | fprintf(stderr, "%d: %s %s\n", line, type, grump); 280 | status = 1; 281 | err = 1; 282 | } 283 | 284 | if (err != 0 || f4 == NULL) { 285 | regfree(&re); 286 | return; 287 | } 288 | 289 | for (i = 1; i < NSHOULD; i++) 290 | should[i] = NULL; 291 | nshould = split(f4, should+1, NSHOULD-1, ","); 292 | if (nshould == 0) { 293 | nshould = 1; 294 | should[1] = ""; 295 | } 296 | for (i = 1; i < NSUBS; i++) { 297 | grump = check(f2, subs[i], should[i]); 298 | if (grump != NULL) { 299 | fprintf(stderr, "%d: %s $%d %s\n", line, 300 | type, i, grump); 301 | status = 1; 302 | err = 1; 303 | } 304 | } 305 | 306 | regfree(&re); 307 | } 308 | 309 | /* 310 | - options - pick options out of a regression-test string 311 | == int options(int type, char *s); 312 | */ 313 | int 314 | options(type, s) 315 | int type; /* 'c' compile, 'e' exec */ 316 | char *s; 317 | { 318 | register char *p; 319 | register int o = (type == 'c') ? copts : eopts; 320 | register char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; 321 | 322 | for (p = s; *p != '\0'; p++) 323 | if (strchr(legal, *p) != NULL) 324 | switch (*p) { 325 | case 'b': 326 | o &= ~REG_EXTENDED; 327 | break; 328 | case 'i': 329 | o |= REG_ICASE; 330 | break; 331 | case 's': 332 | o |= REG_NOSUB; 333 | break; 334 | case 'n': 335 | o |= REG_NEWLINE; 336 | break; 337 | case 'm': 338 | o &= ~REG_EXTENDED; 339 | o |= REG_NOSPEC; 340 | break; 341 | case 'p': 342 | o |= REG_PEND; 343 | break; 344 | case '^': 345 | o |= REG_NOTBOL; 346 | break; 347 | case '$': 348 | o |= REG_NOTEOL; 349 | break; 350 | case '#': 351 | o |= REG_STARTEND; 352 | break; 353 | case 't': /* trace */ 354 | o |= REG_TRACE; 355 | break; 356 | case 'l': /* force long representation */ 357 | o |= REG_LARGE; 358 | break; 359 | case 'r': /* force backref use */ 360 | o |= REG_BACKR; 361 | break; 362 | } 363 | return(o); 364 | } 365 | 366 | /* 367 | - opt - is a particular option in a regression string? 368 | == int opt(int c, char *s); 369 | */ 370 | int /* predicate */ 371 | opt(c, s) 372 | int c; 373 | char *s; 374 | { 375 | return(strchr(s, c) != NULL); 376 | } 377 | 378 | /* 379 | - fixstr - transform magic characters in strings 380 | == void fixstr(register char *p); 381 | */ 382 | void 383 | fixstr(p) 384 | register char *p; 385 | { 386 | if (p == NULL) 387 | return; 388 | 389 | for (; *p != '\0'; p++) 390 | if (*p == 'N') 391 | *p = '\n'; 392 | else if (*p == 'T') 393 | *p = '\t'; 394 | else if (*p == 'S') 395 | *p = ' '; 396 | else if (*p == 'Z') 397 | *p = '\0'; 398 | } 399 | 400 | /* 401 | - check - check a substring match 402 | == char *check(char *str, regmatch_t sub, char *should); 403 | */ 404 | char * /* NULL or complaint */ 405 | check(str, sub, should) 406 | char *str; 407 | regmatch_t sub; 408 | char *should; 409 | { 410 | register int len; 411 | register int shlen; 412 | register char *p; 413 | static char grump[500]; 414 | register char *at = NULL; 415 | 416 | if (should != NULL && strcmp(should, "-") == 0) 417 | should = NULL; 418 | if (should != NULL && should[0] == '@') { 419 | at = should + 1; 420 | should = ""; 421 | } 422 | 423 | /* check rm_so and rm_eo for consistency */ 424 | if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || 425 | (sub.rm_so != -1 && sub.rm_eo == -1) || 426 | (sub.rm_so != -1 && sub.rm_so < 0) || 427 | (sub.rm_eo != -1 && sub.rm_eo < 0) ) { 428 | sprintf(grump, "start %ld end %ld", (long)sub.rm_so, 429 | (long)sub.rm_eo); 430 | return(grump); 431 | } 432 | 433 | /* check for no match */ 434 | if (sub.rm_so == -1 && should == NULL) 435 | return(NULL); 436 | if (sub.rm_so == -1) 437 | return("did not match"); 438 | 439 | /* check for in range */ 440 | if (sub.rm_eo > strlen(str)) { 441 | sprintf(grump, "start %ld end %ld, past end of string", 442 | (long)sub.rm_so, (long)sub.rm_eo); 443 | return(grump); 444 | } 445 | 446 | len = (int)(sub.rm_eo - sub.rm_so); 447 | shlen = (int)strlen(should); 448 | p = str + sub.rm_so; 449 | 450 | /* check for not supposed to match */ 451 | if (should == NULL) { 452 | sprintf(grump, "matched `%.*s'", len, p); 453 | return(grump); 454 | } 455 | 456 | /* check for wrong match */ 457 | if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { 458 | sprintf(grump, "matched `%.*s' instead", len, p); 459 | return(grump); 460 | } 461 | if (shlen > 0) 462 | return(NULL); 463 | 464 | /* check null match in right place */ 465 | if (at == NULL) 466 | return(NULL); 467 | shlen = strlen(at); 468 | if (shlen == 0) 469 | shlen = 1; /* force check for end-of-string */ 470 | if (strncmp(p, at, shlen) != 0) { 471 | sprintf(grump, "matched null at `%.20s'", p); 472 | return(grump); 473 | } 474 | return(NULL); 475 | } 476 | 477 | /* 478 | - eprint - convert error number to name 479 | == static char *eprint(int err); 480 | */ 481 | static char * 482 | eprint(err) 483 | int err; 484 | { 485 | static char epbuf[100]; 486 | size_t len; 487 | 488 | len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf)); 489 | assert(len <= sizeof(epbuf)); 490 | return(epbuf); 491 | } 492 | 493 | /* 494 | - efind - convert error name to number 495 | == static int efind(char *name); 496 | */ 497 | static int 498 | efind(name) 499 | char *name; 500 | { 501 | static char efbuf[100]; 502 | size_t n; 503 | regex_t re; 504 | 505 | sprintf(efbuf, "REG_%s", name); 506 | assert(strlen(efbuf) < sizeof(efbuf)); 507 | re.re_endp = efbuf; 508 | (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf)); 509 | return(atoi(efbuf)); 510 | } 511 | -------------------------------------------------------------------------------- /ext/re/mkh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # mkh - pull headers out of C source 3 | PATH=/bin:/usr/bin ; export PATH 4 | 5 | # egrep pattern to pick out marked lines 6 | egrep='^ =([ ]|$)' 7 | 8 | # Sed program to process marked lines into lines for the header file. 9 | # The markers have already been removed. Two things are done here: removal 10 | # of backslashed newlines, and some fudging of comments. The first is done 11 | # because -o needs to have prototypes on one line to strip them down. 12 | # Getting comments into the output is tricky; we turn C++-style // comments 13 | # into /* */ comments, after altering any existing */'s to avoid trouble. 14 | peel=' /\\$/N 15 | /\\\n[ ]*/s///g 16 | /\/\//s;\*/;* /;g 17 | /\/\//s;//\(.*\);/*\1 */;' 18 | 19 | for a 20 | do 21 | case "$a" in 22 | -o) # old (pre-function-prototype) compiler 23 | # add code to comment out argument lists 24 | peel="$peel 25 | "'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1(/*\2*/);' 26 | shift 27 | ;; 28 | -b) # funny Berkeley __P macro 29 | peel="$peel 30 | "'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1 __P((\2));' 31 | shift 32 | ;; 33 | -s) # compiler doesn't like `static foo();' 34 | # add code to get rid of the `static' 35 | peel="$peel 36 | "'/^static[ ][^\/]*[a-zA-Z0-9_)](.*)/s;static.;;' 37 | shift 38 | ;; 39 | -p) # private declarations 40 | egrep='^ ==([ ]|$)' 41 | shift 42 | ;; 43 | -i) # wrap in #ifndef, argument is name 44 | ifndef="$2" 45 | shift ; shift 46 | ;; 47 | *) break 48 | ;; 49 | esac 50 | done 51 | 52 | if test " $ifndef" != " " 53 | then 54 | echo "#ifndef $ifndef" 55 | echo "#define $ifndef /* never again */" 56 | fi 57 | echo "/* ========= begin header generated by $0 ========= */" 58 | echo '#ifdef __cplusplus' 59 | echo 'extern "C" {' 60 | echo '#endif' 61 | for f 62 | do 63 | echo 64 | echo "/* === $f === */" 65 | egrep "$egrep" $f | sed 's/^ ==*[ ]//;s/^ ==*$//' | sed "$peel" 66 | echo 67 | done 68 | echo '#ifdef __cplusplus' 69 | echo '}' 70 | echo '#endif' 71 | echo "/* ========= end header generated by $0 ========= */" 72 | if test " $ifndef" != " " 73 | then 74 | echo "#endif" 75 | fi 76 | exit 0 77 | -------------------------------------------------------------------------------- /ext/re/msvcbuild.bat: -------------------------------------------------------------------------------- 1 | @setlocal 2 | @set SCHEME_H_DIR=..\..\src 3 | @set MSCOMPILE=cl /nologo /O2 /W3 /c /D_CRT_SECURE_NO_DEPRECATE /I%SCHEME_H_DIR% 4 | @set MSLIB=lib /nologo 5 | @set REGSRC=re.c regcomp.c regexec.c regerror.c regfree.c 6 | @set OBJPRODN=re.obj regcomp.obj regexec.obj regerror.obj regfree.obj 7 | 8 | @if not exist lib\ ( 9 | mkdir lib 10 | ) 11 | 12 | %MSCOMPILE% /MT %REGSRC% 13 | %MSLIB% /out:lib\re.lib %OBJPRODN% 14 | 15 | del %OBJPRODN% 16 | -------------------------------------------------------------------------------- /ext/re/re.c: -------------------------------------------------------------------------------- 1 | /* re.c */ 2 | /* Henry Spencer's implementation of Regular Expressions, 3 | used for MiniScheme */ 4 | /* Refurbished by Stephen Gildea */ 5 | #include 6 | #include 7 | #include "regex.h" 8 | #include "miniscm.h" 9 | 10 | pointer foreign_re_match(pointer args) 11 | { 12 | pointer retval = F; 13 | int retcode; 14 | regex_t rt; 15 | pointer first_arg, second_arg; 16 | pointer third_arg = NIL; 17 | char *string; 18 | char *pattern; 19 | int num = 0; 20 | 21 | if (!((args != NIL) && is_string((first_arg = car(args))) 22 | && (args=cdr(args)) 23 | && is_pair(args) && is_string((second_arg = car(args))))) { 24 | return F; 25 | } 26 | pattern = strvalue(first_arg); 27 | string = strvalue(second_arg); 28 | 29 | args = cdr(args); 30 | if (args != NIL) { 31 | if (!(is_pair(args) && is_vector((third_arg = car(args))))) { 32 | return F; 33 | } else { 34 | num = ivalue(third_arg); 35 | } 36 | } 37 | 38 | if (regcomp(&rt, pattern, REG_EXTENDED) != 0) { 39 | return F; 40 | } 41 | 42 | if (num == 0) { 43 | retcode = regexec(&rt, string, 0, 0, 0); 44 | } else { 45 | regmatch_t *pmatch = malloc((num + 1) * sizeof(regmatch_t)); 46 | if (pmatch != 0) { 47 | retcode = regexec(&rt, string, num + 1, pmatch, 0); 48 | if (retcode == 0) { 49 | int i; 50 | for (i = 0; i < num; i++) { 51 | mark_x = mk_integer(pmatch[i].rm_so); 52 | mark_y = mk_integer(pmatch[i].rm_eo); 53 | set_vector_elem(third_arg, i, cons(mark_x, mark_y)); 54 | } 55 | } 56 | free(pmatch); 57 | } else { 58 | retcode = -1; 59 | } 60 | } 61 | 62 | if (retcode == 0) { 63 | retval = T; 64 | } 65 | 66 | regfree(&rt); 67 | 68 | return retval; 69 | } 70 | 71 | void init_re(void) 72 | { 73 | scheme_register_foreign_func("re-match", foreign_re_match); 74 | } 75 | -------------------------------------------------------------------------------- /ext/re/regcomp.ih: -------------------------------------------------------------------------------- 1 | /* ========= begin header generated by ./mkh ========= */ 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* === regcomp.c === */ 7 | static void p_ere(register struct parse *p, int stop); 8 | static void p_ere_exp(register struct parse *p); 9 | static void p_str(register struct parse *p); 10 | static void p_bre(register struct parse *p, register int end1, register int end2); 11 | static int p_simp_re(register struct parse *p, int starordinary); 12 | static int p_count(register struct parse *p); 13 | static void p_bracket(register struct parse *p); 14 | static void p_b_term(register struct parse *p, register cset *cs); 15 | static void p_b_cclass(register struct parse *p, register cset *cs); 16 | static void p_b_eclass(register struct parse *p, register cset *cs); 17 | static char p_b_symbol(register struct parse *p); 18 | static char p_b_coll_elem(register struct parse *p, int endc); 19 | static char othercase(int ch); 20 | static void bothcases(register struct parse *p, int ch); 21 | static void ordinary(register struct parse *p, register int ch); 22 | static void nonnewline(register struct parse *p); 23 | static void repeat(register struct parse *p, sopno start, int from, int to); 24 | static int seterr(register struct parse *p, int e); 25 | static cset *allocset(register struct parse *p); 26 | static void freeset(register struct parse *p, register cset *cs); 27 | static int freezeset(register struct parse *p, register cset *cs); 28 | static int firstch(register struct parse *p, register cset *cs); 29 | static int nch(register struct parse *p, register cset *cs); 30 | static void mcadd(register struct parse *p, register cset *cs, register char *cp); 31 | static void mcsub(register cset *cs, register char *cp); 32 | static int mcin(register cset *cs, register char *cp); 33 | static char *mcfind(register cset *cs, register char *cp); 34 | static void mcinvert(register struct parse *p, register cset *cs); 35 | static void mccase(register struct parse *p, register cset *cs); 36 | static int isinsets(register struct re_guts *g, int c); 37 | static int samesets(register struct re_guts *g, int c1, int c2); 38 | static void categorize(struct parse *p, register struct re_guts *g); 39 | static sopno dupl(register struct parse *p, sopno start, sopno finish); 40 | static void doemit(register struct parse *p, sop op, size_t opnd); 41 | static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); 42 | static void dofwd(register struct parse *p, sopno pos, sop value); 43 | static void enlarge(register struct parse *p, sopno size); 44 | static void stripsnug(register struct parse *p, register struct re_guts *g); 45 | static void findmust(register struct parse *p, register struct re_guts *g); 46 | static sopno pluscount(register struct parse *p, register struct re_guts *g); 47 | 48 | #ifdef __cplusplus 49 | } 50 | #endif 51 | /* ========= end header generated by ./mkh ========= */ 52 | -------------------------------------------------------------------------------- /ext/re/regerror.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "regex.h" 8 | 9 | #include "utils.h" 10 | #include "regerror.ih" 11 | 12 | /* 13 | = #define REG_OKAY 0 14 | = #define REG_NOMATCH 1 15 | = #define REG_BADPAT 2 16 | = #define REG_ECOLLATE 3 17 | = #define REG_ECTYPE 4 18 | = #define REG_EESCAPE 5 19 | = #define REG_ESUBREG 6 20 | = #define REG_EBRACK 7 21 | = #define REG_EPAREN 8 22 | = #define REG_EBRACE 9 23 | = #define REG_BADBR 10 24 | = #define REG_ERANGE 11 25 | = #define REG_ESPACE 12 26 | = #define REG_BADRPT 13 27 | = #define REG_EMPTY 14 28 | = #define REG_ASSERT 15 29 | = #define REG_INVARG 16 30 | = #define REG_ATOI 255 // convert name to number (!) 31 | = #define REG_ITOA 0400 // convert number to name (!) 32 | */ 33 | static struct rerr { 34 | int code; 35 | char *name; 36 | char *explain; 37 | } rerrs[] = { 38 | REG_OKAY, "REG_OKAY", "no errors detected", 39 | REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match", 40 | REG_BADPAT, "REG_BADPAT", "invalid regular expression", 41 | REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element", 42 | REG_ECTYPE, "REG_ECTYPE", "invalid character class", 43 | REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)", 44 | REG_ESUBREG, "REG_ESUBREG", "invalid backreference number", 45 | REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced", 46 | REG_EPAREN, "REG_EPAREN", "parentheses not balanced", 47 | REG_EBRACE, "REG_EBRACE", "braces not balanced", 48 | REG_BADBR, "REG_BADBR", "invalid repetition count(s)", 49 | REG_ERANGE, "REG_ERANGE", "invalid character range", 50 | REG_ESPACE, "REG_ESPACE", "out of memory", 51 | REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid", 52 | REG_EMPTY, "REG_EMPTY", "empty (sub)expression", 53 | REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug", 54 | REG_INVARG, "REG_INVARG", "invalid argument to regex routine", 55 | -1, "", "*** unknown regexp error code ***", 56 | }; 57 | 58 | /* 59 | - regerror - the interface to error numbers 60 | = extern size_t regerror(int, const regex_t *, char *, size_t); 61 | */ 62 | /* ARGSUSED */ 63 | size_t 64 | regerror(errcode, preg, errbuf, errbuf_size) 65 | int errcode; 66 | const regex_t *preg; 67 | char *errbuf; 68 | size_t errbuf_size; 69 | { 70 | register struct rerr *r; 71 | register size_t len; 72 | register int target = errcode &~ REG_ITOA; 73 | register char *s; 74 | char convbuf[50]; 75 | 76 | if (errcode == REG_ATOI) 77 | s = regatoi(preg, convbuf); 78 | else { 79 | for (r = rerrs; r->code >= 0; r++) 80 | if (r->code == target) 81 | break; 82 | 83 | if (errcode®_ITOA) { 84 | if (r->code >= 0) 85 | (void) strcpy(convbuf, r->name); 86 | else 87 | sprintf(convbuf, "REG_0x%x", target); 88 | assert(strlen(convbuf) < sizeof(convbuf)); 89 | s = convbuf; 90 | } else 91 | s = r->explain; 92 | } 93 | 94 | len = strlen(s) + 1; 95 | if (errbuf_size > 0) { 96 | if (errbuf_size > len) 97 | (void) strcpy(errbuf, s); 98 | else { 99 | (void) strncpy(errbuf, s, errbuf_size-1); 100 | errbuf[errbuf_size-1] = '\0'; 101 | } 102 | } 103 | 104 | return(len); 105 | } 106 | 107 | /* 108 | - regatoi - internal routine to implement REG_ATOI 109 | == static char *regatoi(const regex_t *preg, char *localbuf); 110 | */ 111 | static char * 112 | regatoi(preg, localbuf) 113 | const regex_t *preg; 114 | char *localbuf; 115 | { 116 | register struct rerr *r; 117 | 118 | for (r = rerrs; r->code >= 0; r++) 119 | if (strcmp(r->name, preg->re_endp) == 0) 120 | break; 121 | if (r->code < 0) 122 | return("0"); 123 | 124 | sprintf(localbuf, "%d", r->code); 125 | return(localbuf); 126 | } 127 | -------------------------------------------------------------------------------- /ext/re/regerror.ih: -------------------------------------------------------------------------------- 1 | /* ========= begin header generated by ./mkh ========= */ 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | /* === regerror.c === */ 7 | static char *regatoi(const regex_t *preg, char *localbuf); 8 | 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | /* ========= end header generated by ./mkh ========= */ 13 | -------------------------------------------------------------------------------- /ext/re/regex.3: -------------------------------------------------------------------------------- 1 | .TH REGEX 3 "25 Sept 1997" 2 | .BY "Henry Spencer" 3 | .de ZR 4 | .\" one other place knows this name: the SEE ALSO section 5 | .IR regex (7) \\$1 6 | .. 7 | .SH NAME 8 | regcomp, regexec, regerror, regfree \- regular-expression library 9 | .SH SYNOPSIS 10 | .ft B 11 | .\".na 12 | #include 13 | .br 14 | #include 15 | .HP 10 16 | int regcomp(regex_t\ *preg, const\ char\ *pattern, int\ cflags); 17 | .HP 18 | int\ regexec(const\ regex_t\ *preg, const\ char\ *string, 19 | size_t\ nmatch, regmatch_t\ pmatch[], int\ eflags); 20 | .HP 21 | size_t\ regerror(int\ errcode, const\ regex_t\ *preg, 22 | char\ *errbuf, size_t\ errbuf_size); 23 | .HP 24 | void\ regfree(regex_t\ *preg); 25 | .\".ad 26 | .ft 27 | .SH DESCRIPTION 28 | These routines implement POSIX 1003.2 regular expressions (``RE''s); 29 | see 30 | .ZR . 31 | .I Regcomp 32 | compiles an RE written as a string into an internal form, 33 | .I regexec 34 | matches that internal form against a string and reports results, 35 | .I regerror 36 | transforms error codes from either into human-readable messages, 37 | and 38 | .I regfree 39 | frees any dynamically-allocated storage used by the internal form 40 | of an RE. 41 | .PP 42 | The header 43 | .I 44 | declares two structure types, 45 | .I regex_t 46 | and 47 | .IR regmatch_t , 48 | the former for compiled internal forms and the latter for match reporting. 49 | It also declares the four functions, 50 | a type 51 | .IR regoff_t , 52 | and a number of constants with names starting with ``REG_''. 53 | .PP 54 | .I Regcomp 55 | compiles the regular expression contained in the 56 | .I pattern 57 | string, 58 | subject to the flags in 59 | .IR cflags , 60 | and places the results in the 61 | .I regex_t 62 | structure pointed to by 63 | .IR preg . 64 | .I Cflags 65 | is the bitwise OR of zero or more of the following flags: 66 | .IP REG_EXTENDED \w'REG_EXTENDED'u+2n 67 | Compile modern (``extended'') REs, 68 | rather than the obsolete (``basic'') REs that 69 | are the default. 70 | .IP REG_BASIC 71 | This is a synonym for 0, 72 | provided as a counterpart to REG_EXTENDED to improve readability. 73 | This is an extension, 74 | compatible with but not specified by POSIX 1003.2, 75 | and should be used with 76 | caution in software intended to be portable to other systems. 77 | .IP REG_NOSPEC 78 | Compile with recognition of all special characters turned off. 79 | All characters are thus considered ordinary, 80 | so the ``RE'' is a literal string. 81 | This is an extension, 82 | compatible with but not specified by POSIX 1003.2, 83 | and should be used with 84 | caution in software intended to be portable to other systems. 85 | REG_EXTENDED and REG_NOSPEC may not be used 86 | in the same call to 87 | .IR regcomp . 88 | .IP REG_ICASE 89 | Compile for matching that ignores upper/lower case distinctions. 90 | See 91 | .ZR . 92 | .IP REG_NOSUB 93 | Compile for matching that need only report success or failure, 94 | not what was matched. 95 | .IP REG_NEWLINE 96 | Compile for newline-sensitive matching. 97 | By default, newline is a completely ordinary character with no special 98 | meaning in either REs or strings. 99 | With this flag, 100 | `[^' bracket expressions and `.' never match newline, 101 | a `^' anchor matches the null string after any newline in the string 102 | in addition to its normal function, 103 | and the `$' anchor matches the null string before any newline in the 104 | string in addition to its normal function. 105 | .IP REG_PEND 106 | The regular expression ends, 107 | not at the first NUL, 108 | but just before the character pointed to by the 109 | .I re_endp 110 | member of the structure pointed to by 111 | .IR preg . 112 | The 113 | .I re_endp 114 | member is of type 115 | .IR const\ char\ * . 116 | This flag permits inclusion of NULs in the RE; 117 | they are considered ordinary characters. 118 | This is an extension, 119 | compatible with but not specified by POSIX 1003.2, 120 | and should be used with 121 | caution in software intended to be portable to other systems. 122 | .PP 123 | When successful, 124 | .I regcomp 125 | returns 0 and fills in the structure pointed to by 126 | .IR preg . 127 | One member of that structure 128 | (other than 129 | .IR re_endp ) 130 | is publicized: 131 | .IR re_nsub , 132 | of type 133 | .IR size_t , 134 | contains the number of parenthesized subexpressions within the RE 135 | (except that the value of this member is undefined if the 136 | REG_NOSUB flag was used). 137 | If 138 | .I regcomp 139 | fails, it returns a non-zero error code; 140 | see DIAGNOSTICS. 141 | .PP 142 | .I Regexec 143 | matches the compiled RE pointed to by 144 | .I preg 145 | against the 146 | .IR string , 147 | subject to the flags in 148 | .IR eflags , 149 | and reports results using 150 | .IR nmatch , 151 | .IR pmatch , 152 | and the returned value. 153 | The RE must have been compiled by a previous invocation of 154 | .IR regcomp . 155 | The compiled form is not altered during execution of 156 | .IR regexec , 157 | so a single compiled RE can be used simultaneously by multiple threads. 158 | .PP 159 | By default, 160 | the NUL-terminated string pointed to by 161 | .I string 162 | is considered to be the text of an entire line, 163 | with the NUL indicating the end of the line. 164 | (That is, 165 | any other end-of-line marker is considered to have been removed 166 | and replaced by the NUL.) 167 | The 168 | .I eflags 169 | argument is the bitwise OR of zero or more of the following flags: 170 | .IP REG_NOTBOL \w'REG_STARTEND'u+2n 171 | The first character of 172 | the string 173 | is not the beginning of a line, so the `^' anchor should not match before it. 174 | This does not affect the behavior of newlines under REG_NEWLINE. 175 | .IP REG_NOTEOL 176 | The NUL terminating 177 | the string 178 | does not end a line, so the `$' anchor should not match before it. 179 | This does not affect the behavior of newlines under REG_NEWLINE. 180 | .IP REG_STARTEND 181 | The string is considered to start at 182 | \fIstring\fR\ + \fIpmatch\fR[0].\fIrm_so\fR 183 | and to have a terminating NUL located at 184 | \fIstring\fR\ + \fIpmatch\fR[0].\fIrm_eo\fR 185 | (there need not actually be a NUL at that location), 186 | regardless of the value of 187 | .IR nmatch . 188 | See below for the definition of 189 | .IR pmatch 190 | and 191 | .IR nmatch . 192 | This is an extension, 193 | compatible with but not specified by POSIX 1003.2, 194 | and should be used with 195 | caution in software intended to be portable to other systems. 196 | Note that a non-zero \fIrm_so\fR does not imply REG_NOTBOL; 197 | REG_STARTEND affects only the location of the string, 198 | not how it is matched. 199 | .PP 200 | See 201 | .ZR 202 | for a discussion of what is matched in situations where an RE or a 203 | portion thereof could match any of several substrings of 204 | .IR string . 205 | .PP 206 | Normally, 207 | .I regexec 208 | returns 0 for success and the non-zero code REG_NOMATCH for failure. 209 | Other non-zero error codes may be returned in exceptional situations; 210 | see DIAGNOSTICS. 211 | .PP 212 | If REG_NOSUB was specified in the compilation of the RE, 213 | or if 214 | .I nmatch 215 | is 0, 216 | .I regexec 217 | ignores the 218 | .I pmatch 219 | argument (but see below for the case where REG_STARTEND is specified). 220 | Otherwise, 221 | .I pmatch 222 | points to an array of 223 | .I nmatch 224 | structures of type 225 | .IR regmatch_t . 226 | Such a structure has at least the members 227 | .I rm_so 228 | and 229 | .IR rm_eo , 230 | both of type 231 | .I regoff_t 232 | (a signed arithmetic type at least as large as an 233 | .I off_t 234 | and a 235 | .IR ssize_t ), 236 | containing respectively the offset of the first character of a substring 237 | and the offset of the first character after the end of the substring. 238 | Offsets are measured from the beginning of the 239 | .I string 240 | argument given to 241 | .IR regexec . 242 | An empty substring is denoted by equal offsets, 243 | both indicating the character following the empty substring. 244 | .PP 245 | The 0th member of the 246 | .I pmatch 247 | array is filled in to indicate what substring of 248 | .I string 249 | was matched by the entire RE. 250 | Remaining members report what substring was matched by parenthesized 251 | subexpressions within the RE; 252 | member 253 | .I i 254 | reports subexpression 255 | .IR i , 256 | with subexpressions counted (starting at 1) by the order of their opening 257 | parentheses in the RE, left to right. 258 | Unused entries in the array\(emcorresponding either to subexpressions that 259 | did not participate in the match at all, or to subexpressions that do not 260 | exist in the RE (that is, \fIi\fR\ > \fIpreg\fR\->\fIre_nsub\fR)\(emhave both 261 | .I rm_so 262 | and 263 | .I rm_eo 264 | set to \-1. 265 | If a subexpression participated in the match several times, 266 | the reported substring is the last one it matched. 267 | (Note, as an example in particular, that when the RE `(b*)+' matches `bbb', 268 | the parenthesized subexpression matches the three `b's and then 269 | an infinite number of empty strings following the last `b', 270 | so the reported substring is one of the empties.) 271 | .PP 272 | If REG_STARTEND is specified, 273 | .I pmatch 274 | must point to at least one 275 | .I regmatch_t 276 | (even if 277 | .I nmatch 278 | is 0 or REG_NOSUB was specified), 279 | to hold the input offsets for REG_STARTEND. 280 | Use for output is still entirely controlled by 281 | .IR nmatch ; 282 | if 283 | .I nmatch 284 | is 0 or REG_NOSUB was specified, 285 | the value of 286 | .IR pmatch [0] 287 | will not be changed by a successful 288 | .IR regexec . 289 | .PP 290 | .I Regerror 291 | maps a non-zero 292 | .I errcode 293 | from either 294 | .I regcomp 295 | or 296 | .I regexec 297 | to a human-readable, printable message. 298 | If 299 | .I preg 300 | is non-NULL, 301 | the error code should have arisen from use of 302 | the 303 | .I regex_t 304 | pointed to by 305 | .IR preg , 306 | and if the error code came from 307 | .IR regcomp , 308 | it should have been the result from the most recent 309 | .I regcomp 310 | using that 311 | .IR regex_t . 312 | .RI ( Regerror 313 | may be able to supply a more detailed message using information 314 | from the 315 | .IR regex_t .) 316 | .I Regerror 317 | places the NUL-terminated message into the buffer pointed to by 318 | .IR errbuf , 319 | limiting the length (including the NUL) to at most 320 | .I errbuf_size 321 | bytes. 322 | If the whole message won't fit, 323 | as much of it as will fit before the terminating NUL is supplied. 324 | In any case, 325 | the returned value is the size of buffer needed to hold the whole 326 | message (including terminating NUL). 327 | If 328 | .I errbuf_size 329 | is 0, 330 | .I errbuf 331 | is ignored but the return value is still correct. 332 | .PP 333 | If the 334 | .I errcode 335 | given to 336 | .I regerror 337 | is first ORed with REG_ITOA, 338 | the ``message'' that results is the printable name of the error code, 339 | e.g. ``REG_NOMATCH'', 340 | rather than an explanation thereof. 341 | If 342 | .I errcode 343 | is REG_ATOI, 344 | then 345 | .I preg 346 | shall be non-NULL and the 347 | .I re_endp 348 | member of the structure it points to 349 | must point to the printable name of an error code; 350 | in this case, the result in 351 | .I errbuf 352 | is the decimal digits of 353 | the numeric value of the error code 354 | (0 if the name is not recognized). 355 | REG_ITOA and REG_ATOI are intended primarily as debugging facilities; 356 | they are extensions, 357 | compatible with but not specified by POSIX 1003.2, 358 | and should be used with 359 | caution in software intended to be portable to other systems. 360 | Be warned also that they are considered experimental and changes are possible. 361 | .PP 362 | .I Regfree 363 | frees any dynamically-allocated storage associated with the compiled RE 364 | pointed to by 365 | .IR preg . 366 | The remaining 367 | .I regex_t 368 | is no longer a valid compiled RE 369 | and the effect of supplying it to 370 | .I regexec 371 | or 372 | .I regerror 373 | is undefined. 374 | .PP 375 | None of these functions references global variables except for tables 376 | of constants; 377 | all are safe for use from multiple threads if the arguments are safe. 378 | .SH IMPLEMENTATION CHOICES 379 | There are a number of decisions that 1003.2 leaves up to the implementor, 380 | either by explicitly saying ``undefined'' or by virtue of them being 381 | forbidden by the RE grammar. 382 | This implementation treats them as follows. 383 | .PP 384 | See 385 | .ZR 386 | for a discussion of the definition of case-independent matching. 387 | .PP 388 | There is no particular limit on the length of REs, 389 | except insofar as memory is limited. 390 | Memory usage is approximately linear in RE size, and largely insensitive 391 | to RE complexity, except for bounded repetitions. 392 | See BUGS for one short RE using them 393 | that will run almost any system out of memory. 394 | .PP 395 | A backslashed character other than one specifically given a magic meaning 396 | by 1003.2 (such magic meanings occur only in obsolete [``basic''] REs) 397 | is taken as an ordinary character. 398 | .PP 399 | Any unmatched [ is a REG_EBRACK error. 400 | .PP 401 | Equivalence classes cannot begin or end bracket-expression ranges. 402 | The endpoint of one range cannot begin another. 403 | .PP 404 | RE_DUP_MAX, the limit on repetition counts in bounded repetitions, is 255. 405 | .PP 406 | A repetition operator (?, *, +, or bounds) cannot follow another 407 | repetition operator. 408 | A repetition operator cannot begin an expression or subexpression 409 | or follow `^' or `|'. 410 | .PP 411 | `|' cannot appear first or last in a (sub)expression or after another `|', 412 | i.e. an operand of `|' cannot be an empty subexpression. 413 | An empty parenthesized subexpression, `()', is legal and matches an 414 | empty (sub)string. 415 | An empty string is not a legal RE. 416 | .PP 417 | A `{' followed by a digit is considered the beginning of bounds for a 418 | bounded repetition, which must then follow the syntax for bounds. 419 | A `{' \fInot\fR followed by a digit is considered an ordinary character. 420 | .PP 421 | `^' and `$' beginning and ending subexpressions in obsolete (``basic'') 422 | REs are anchors, not ordinary characters. 423 | .SH SEE ALSO 424 | grep(1), regex(7) 425 | .PP 426 | POSIX 1003.2, sections 2.8 (Regular Expression Notation) 427 | and 428 | B.5 (C Binding for Regular Expression Matching). 429 | .SH DIAGNOSTICS 430 | Non-zero error codes from 431 | .I regcomp 432 | and 433 | .I regexec 434 | include the following: 435 | .PP 436 | .nf 437 | .ta \w'REG_ECOLLATE'u+3n 438 | REG_NOMATCH regexec() failed to match 439 | REG_BADPAT invalid regular expression 440 | REG_ECOLLATE invalid collating element 441 | REG_ECTYPE invalid character class 442 | REG_EESCAPE \e applied to unescapable character 443 | REG_ESUBREG invalid backreference number 444 | REG_EBRACK brackets [ ] not balanced 445 | REG_EPAREN parentheses ( ) not balanced 446 | REG_EBRACE braces { } not balanced 447 | REG_BADBR invalid repetition count(s) in { } 448 | REG_ERANGE invalid character range in [ ] 449 | REG_ESPACE ran out of memory 450 | REG_BADRPT ?, *, or + operand invalid 451 | REG_EMPTY empty (sub)expression 452 | REG_ASSERT ``can't happen''\(emyou found a bug 453 | REG_INVARG invalid argument, e.g. negative-length string 454 | .fi 455 | .SH HISTORY 456 | Written by Henry Spencer, 457 | henry@zoo.toronto.edu. 458 | .SH BUGS 459 | This is an alpha release with known defects. 460 | Please report problems. 461 | .PP 462 | There is one known functionality bug. 463 | The implementation of internationalization is incomplete: 464 | the locale is always assumed to be the default one of 1003.2, 465 | and only the collating elements etc. of that locale are available. 466 | .PP 467 | The back-reference code is subtle and doubts linger about its correctness 468 | in complex cases. 469 | .PP 470 | .I Regexec 471 | performance is poor. 472 | This will improve with later releases. 473 | .I Nmatch 474 | exceeding 0 is expensive; 475 | .I nmatch 476 | exceeding 1 is worse. 477 | .I Regexec 478 | is largely insensitive to RE complexity \fIexcept\fR that back 479 | references are massively expensive. 480 | RE length does matter; in particular, there is a strong speed bonus 481 | for keeping RE length under about 30 characters, 482 | with most special characters counting roughly double. 483 | .PP 484 | .I Regcomp 485 | implements bounded repetitions by macro expansion, 486 | which is costly in time and space if counts are large 487 | or bounded repetitions are nested. 488 | An RE like, say, 489 | `((((a{1,100}){1,100}){1,100}){1,100}){1,100}' 490 | will (eventually) run almost any existing machine out of swap space. 491 | .PP 492 | There are suspected problems with response to obscure error conditions. 493 | Notably, 494 | certain kinds of internal overflow, 495 | produced only by truly enormous REs or by multiply nested bounded repetitions, 496 | are probably not handled well. 497 | .PP 498 | Due to a mistake in 1003.2, things like `a)b' are legal REs because `)' is 499 | a special character only in the presence of a previous unmatched `('. 500 | This can't be fixed until the spec is fixed. 501 | .PP 502 | The standard's definition of back references is vague. 503 | For example, does 504 | `a\e(\e(b\e)*\e2\e)*d' match `abbbd'? 505 | Until the standard is clarified, 506 | behavior in such cases should not be relied on. 507 | .PP 508 | The implementation of word-boundary matching is a bit of a kludge, 509 | and bugs may lurk in combinations of word-boundary matching and anchoring. 510 | -------------------------------------------------------------------------------- /ext/re/regex.7: -------------------------------------------------------------------------------- 1 | .TH REGEX 7 "25 Oct 1995" 2 | .BY "Henry Spencer" 3 | .SH NAME 4 | regex \- POSIX 1003.2 regular expressions 5 | .SH DESCRIPTION 6 | Regular expressions (``RE''s), 7 | as defined in POSIX 1003.2, come in two forms: 8 | modern REs (roughly those of 9 | .IR egrep ; 10 | 1003.2 calls these ``extended'' REs) 11 | and obsolete REs (roughly those of 12 | .IR ed ; 13 | 1003.2 ``basic'' REs). 14 | Obsolete REs mostly exist for backward compatibility in some old programs; 15 | they will be discussed at the end. 16 | 1003.2 leaves some aspects of RE syntax and semantics open; 17 | `\(dg' marks decisions on these aspects that 18 | may not be fully portable to other 1003.2 implementations. 19 | .PP 20 | A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR, 21 | separated by `|'. 22 | It matches anything that matches one of the branches. 23 | .PP 24 | A branch is one\(dg or more \fIpieces\fR, concatenated. 25 | It matches a match for the first, followed by a match for the second, etc. 26 | .PP 27 | A piece is an \fIatom\fR possibly followed 28 | by a single\(dg `*', `+', `?', or \fIbound\fR. 29 | An atom followed by `*' matches a sequence of 0 or more matches of the atom. 30 | An atom followed by `+' matches a sequence of 1 or more matches of the atom. 31 | An atom followed by `?' matches a sequence of 0 or 1 matches of the atom. 32 | .PP 33 | A \fIbound\fR is `{' followed by an unsigned decimal integer, 34 | possibly followed by `,' 35 | possibly followed by another unsigned decimal integer, 36 | always followed by `}'. 37 | The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive, 38 | and if there are two of them, the first may not exceed the second. 39 | An atom followed by a bound containing one integer \fIi\fR 40 | and no comma matches 41 | a sequence of exactly \fIi\fR matches of the atom. 42 | An atom followed by a bound 43 | containing one integer \fIi\fR and a comma matches 44 | a sequence of \fIi\fR or more matches of the atom. 45 | An atom followed by a bound 46 | containing two integers \fIi\fR and \fIj\fR matches 47 | a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom. 48 | .PP 49 | An atom is a regular expression enclosed in `()' (matching a match for the 50 | regular expression), 51 | an empty set of `()' (matching the null string)\(dg, 52 | a \fIbracket expression\fR (see below), `.' 53 | (matching any single character), `^' (matching the null string at the 54 | beginning of a line), `$' (matching the null string at the 55 | end of a line), a `\e' followed by one of the characters 56 | `^.[$()|*+?{\e' 57 | (matching that character taken as an ordinary character), 58 | a `\e' followed by any other character\(dg 59 | (matching that character taken as an ordinary character, 60 | as if the `\e' had not been present\(dg), 61 | or a single character with no other significance (matching that character). 62 | A `{' followed by a character other than a digit is an ordinary 63 | character, not the beginning of a bound\(dg. 64 | It is illegal to end an RE with `\e'. 65 | .PP 66 | A \fIbracket expression\fR is a list of characters enclosed in `[]'. 67 | It normally matches any single character from the list (but see below). 68 | If the list begins with `^', 69 | it matches any single character 70 | (but see below) \fInot\fR from the rest of the list. 71 | If two characters in the list are separated by `\-', this is shorthand 72 | for the full \fIrange\fR of characters between those two (inclusive) in the 73 | collating sequence, 74 | e.g. `[0\-9]' in ASCII matches any decimal digit. 75 | It is illegal\(dg for two ranges to share an 76 | endpoint, e.g. `a\-c\-e'. 77 | Ranges are very collating-sequence-dependent, 78 | and portable programs should avoid relying on them. 79 | .PP 80 | To include a literal `]' in the list, make it the first character 81 | (following a possible `^'). 82 | To include a literal `\-', make it the first or last character, 83 | or the second endpoint of a range. 84 | To use a literal `\-' as the first endpoint of a range, 85 | enclose it in `[.' and `.]' to make it a collating element (see below). 86 | With the exception of these and some combinations using `[' (see next 87 | paragraphs), all other special characters, including `\e', lose their 88 | special significance within a bracket expression. 89 | .PP 90 | Within a bracket expression, a collating element (a character, 91 | a multi-character sequence that collates as if it were a single character, 92 | or a collating-sequence name for either) 93 | enclosed in `[.' and `.]' stands for the 94 | sequence of characters of that collating element. 95 | The sequence is a single element of the bracket expression's list. 96 | A bracket expression containing a multi-character collating element 97 | can thus match more than one character, 98 | e.g. if the collating sequence includes a `ch' collating element, 99 | then the RE `[[.ch.]]*c' matches the first five characters 100 | of `chchcc'. 101 | .PP 102 | Within a bracket expression, a collating element enclosed in `[=' and 103 | `=]' is an equivalence class, standing for the sequences of characters 104 | of all collating elements equivalent to that one, including itself. 105 | (If there are no other equivalent collating elements, 106 | the treatment is as if the enclosing delimiters were `[.' and `.]'.) 107 | For example, if o and \o'o^' are the members of an equivalence class, 108 | then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous. 109 | An equivalence class may not\(dg be an endpoint 110 | of a range. 111 | .PP 112 | Within a bracket expression, the name of a \fIcharacter class\fR enclosed 113 | in `[:' and `:]' stands for the list of all characters belonging to that 114 | class. 115 | Standard character class names are: 116 | .PP 117 | .RS 118 | .nf 119 | .ta 3c 6c 9c 120 | alnum digit punct 121 | alpha graph space 122 | blank lower upper 123 | cntrl print xdigit 124 | .fi 125 | .RE 126 | .PP 127 | These stand for the character classes defined in 128 | .IR ctype (3). 129 | A locale may provide others. 130 | A character class may not be used as an endpoint of a range. 131 | .PP 132 | There are two special cases\(dg of bracket expressions: 133 | the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at 134 | the beginning and end of a word respectively. 135 | A word is defined as a sequence of 136 | word characters 137 | which is neither preceded nor followed by 138 | word characters. 139 | A word character is an 140 | .I alnum 141 | character (as defined by 142 | .IR ctype (3)) 143 | or an underscore. 144 | This is an extension, 145 | compatible with but not specified by POSIX 1003.2, 146 | and should be used with 147 | caution in software intended to be portable to other systems. 148 | .PP 149 | In the event that an RE could match more than one substring of a given 150 | string, 151 | the RE matches the one starting earliest in the string. 152 | If the RE could match more than one substring starting at that point, 153 | it matches the longest. 154 | Subexpressions also match the longest possible substrings, subject to 155 | the constraint that the whole match be as long as possible, 156 | with subexpressions starting earlier in the RE taking priority over 157 | ones starting later. 158 | Note that higher-level subexpressions thus take priority over 159 | their lower-level component subexpressions. 160 | .PP 161 | Match lengths are measured in characters, not collating elements. 162 | A null string is considered longer than no match at all. 163 | For example, 164 | `bb*' matches the three middle characters of `abbbc', 165 | `(wee|week)(knights|nights)' matches all ten characters of `weeknights', 166 | when `(.*).*' is matched against `abc' the parenthesized subexpression 167 | matches all three characters, and 168 | when `(a*)*' is matched against `bc' both the whole RE and the parenthesized 169 | subexpression match the null string. 170 | .PP 171 | If case-independent matching is specified, 172 | the effect is much as if all case distinctions had vanished from the 173 | alphabet. 174 | When an alphabetic that exists in multiple cases appears as an 175 | ordinary character outside a bracket expression, it is effectively 176 | transformed into a bracket expression containing both cases, 177 | e.g. `x' becomes `[xX]'. 178 | When it appears inside a bracket expression, all case counterparts 179 | of it are added to the bracket expression, so that (e.g.) `[x]' 180 | becomes `[xX]' and `[^x]' becomes `[^xX]'. 181 | .PP 182 | No particular limit is imposed on the length of REs\(dg. 183 | Programs intended to be portable should not employ REs longer 184 | than 256 bytes, 185 | as an implementation can refuse to accept such REs and remain 186 | POSIX-compliant. 187 | .PP 188 | Obsolete (``basic'') regular expressions differ in several respects. 189 | `|', `+', and `?' are ordinary characters and there is no equivalent 190 | for their functionality. 191 | The delimiters for bounds are `\e{' and `\e}', 192 | with `{' and `}' by themselves ordinary characters. 193 | The parentheses for nested subexpressions are `\e(' and `\e)', 194 | with `(' and `)' by themselves ordinary characters. 195 | `^' is an ordinary character except at the beginning of the 196 | RE or\(dg the beginning of a parenthesized subexpression, 197 | `$' is an ordinary character except at the end of the 198 | RE or\(dg the end of a parenthesized subexpression, 199 | and `*' is an ordinary character if it appears at the beginning of the 200 | RE or the beginning of a parenthesized subexpression 201 | (after a possible leading `^'). 202 | Finally, there is one new type of atom, a \fIback reference\fR: 203 | `\e' followed by a non-zero decimal digit \fId\fR 204 | matches the same sequence of characters 205 | matched by the \fId\fRth parenthesized subexpression 206 | (numbering subexpressions by the positions of their opening parentheses, 207 | left to right), 208 | so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'. 209 | .SH SEE ALSO 210 | regex(3) 211 | .PP 212 | POSIX 1003.2, section 2.8 (Regular Expression Notation). 213 | .SH HISTORY 214 | Written by Henry Spencer, based on the 1003.2 spec. 215 | .SH BUGS 216 | Having two kinds of REs is a botch. 217 | .PP 218 | The current 1003.2 spec says that `)' is an ordinary character in 219 | the absence of an unmatched `('; 220 | this was an unintentional result of a wording error, 221 | and change is likely. 222 | Avoid relying on it. 223 | .PP 224 | Back references are a dreadful botch, 225 | posing major problems for efficient implementations. 226 | They are also somewhat vaguely defined 227 | (does 228 | `a\e(\e(b\e)*\e2\e)*d' match `abbbd'?). 229 | Avoid using them. 230 | .PP 231 | 1003.2's specification of case-independent matching is vague. 232 | The ``one case implies all cases'' definition given above 233 | is current consensus among implementors as to the right interpretation. 234 | .PP 235 | The syntax for word boundaries is incredibly ugly. 236 | -------------------------------------------------------------------------------- /ext/re/regex.h: -------------------------------------------------------------------------------- 1 | #ifndef _REGEX_H_ 2 | #define _REGEX_H_ /* never again */ 3 | /* ========= begin header generated by ././mkh ========= */ 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | /* === regex2.h === */ 9 | typedef off_t regoff_t; 10 | typedef struct { 11 | int re_magic; 12 | size_t re_nsub; /* number of parenthesized subexpressions */ 13 | const char *re_endp; /* end pointer for REG_PEND */ 14 | struct re_guts *re_g; /* none of your business :-) */ 15 | } regex_t; 16 | typedef struct { 17 | regoff_t rm_so; /* start of match */ 18 | regoff_t rm_eo; /* end of match */ 19 | } regmatch_t; 20 | 21 | 22 | /* === regcomp.c === */ 23 | extern int regcomp(regex_t *, const char *, int); 24 | #define REG_BASIC 0000 25 | #define REG_EXTENDED 0001 26 | #define REG_ICASE 0002 27 | #define REG_NOSUB 0004 28 | #define REG_NEWLINE 0010 29 | #define REG_NOSPEC 0020 30 | #define REG_PEND 0040 31 | #define REG_DUMP 0200 32 | 33 | 34 | /* === regerror.c === */ 35 | #define REG_OKAY 0 36 | #define REG_NOMATCH 1 37 | #define REG_BADPAT 2 38 | #define REG_ECOLLATE 3 39 | #define REG_ECTYPE 4 40 | #define REG_EESCAPE 5 41 | #define REG_ESUBREG 6 42 | #define REG_EBRACK 7 43 | #define REG_EPAREN 8 44 | #define REG_EBRACE 9 45 | #define REG_BADBR 10 46 | #define REG_ERANGE 11 47 | #define REG_ESPACE 12 48 | #define REG_BADRPT 13 49 | #define REG_EMPTY 14 50 | #define REG_ASSERT 15 51 | #define REG_INVARG 16 52 | #define REG_ATOI 255 /* convert name to number (!) */ 53 | #define REG_ITOA 0400 /* convert number to name (!) */ 54 | extern size_t regerror(int, const regex_t *, char *, size_t); 55 | 56 | 57 | /* === regexec.c === */ 58 | extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int); 59 | #define REG_NOTBOL 00001 60 | #define REG_NOTEOL 00002 61 | #define REG_STARTEND 00004 62 | #define REG_TRACE 00400 /* tracing of execution */ 63 | #define REG_LARGE 01000 /* force large representation */ 64 | #define REG_BACKR 02000 /* force use of backref code */ 65 | 66 | 67 | /* === regfree.c === */ 68 | extern void regfree(regex_t *); 69 | 70 | #ifdef __cplusplus 71 | } 72 | #endif 73 | /* ========= end header generated by ././mkh ========= */ 74 | #endif 75 | -------------------------------------------------------------------------------- /ext/re/regex2.h: -------------------------------------------------------------------------------- 1 | /* 2 | * First, the stuff that ends up in the outside-world include file 3 | = typedef off_t regoff_t; 4 | = typedef struct { 5 | = int re_magic; 6 | = size_t re_nsub; // number of parenthesized subexpressions 7 | = const char *re_endp; // end pointer for REG_PEND 8 | = struct re_guts *re_g; // none of your business :-) 9 | = } regex_t; 10 | = typedef struct { 11 | = regoff_t rm_so; // start of match 12 | = regoff_t rm_eo; // end of match 13 | = } regmatch_t; 14 | */ 15 | /* 16 | * internals of regex_t 17 | */ 18 | #define MAGIC1 ((('r'^0200)<<8) | 'e') 19 | 20 | /* 21 | * The internal representation is a *strip*, a sequence of 22 | * operators ending with an endmarker. (Some terminology etc. is a 23 | * historical relic of earlier versions which used multiple strips.) 24 | * Certain oddities in the representation are there to permit running 25 | * the machinery backwards; in particular, any deviation from sequential 26 | * flow must be marked at both its source and its destination. Some 27 | * fine points: 28 | * 29 | * - OPLUS_ and O_PLUS are *inside* the loop they create. 30 | * - OQUEST_ and O_QUEST are *outside* the bypass they create. 31 | * - OCH_ and O_CH are *outside* the multi-way branch they create, while 32 | * OOR1 and OOR2 are respectively the end and the beginning of one of 33 | * the branches. Note that there is an implicit OOR2 following OCH_ 34 | * and an implicit OOR1 preceding O_CH. 35 | * 36 | * In state representations, an operator's bit is on to signify a state 37 | * immediately *preceding* "execution" of that operator. 38 | */ 39 | typedef long sop; /* strip operator */ 40 | typedef long sopno; 41 | #define OPRMASK 0x7c000000 42 | #define OPDMASK 0x03ffffff 43 | #define OPSHIFT (26) 44 | #define OP(n) ((n)&OPRMASK) 45 | #define OPND(n) ((n)&OPDMASK) 46 | #define SOP(op, opnd) ((op)|(opnd)) 47 | /* operators meaning operand */ 48 | /* (back, fwd are offsets) */ 49 | #define OEND (1< uch [csetsize] */ 84 | uch mask; /* bit within array */ 85 | uch hash; /* hash code */ 86 | size_t smultis; 87 | char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ 88 | } cset; 89 | /* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ 90 | #define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) 91 | #define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) 92 | #define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) 93 | #define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */ 94 | #define MCsub(p, cs, cp) mcsub(p, cs, cp) 95 | #define MCin(p, cs, cp) mcin(p, cs, cp) 96 | 97 | /* stuff for character categories */ 98 | typedef unsigned char cat_t; 99 | 100 | /* 101 | * main compiled-expression structure 102 | */ 103 | struct re_guts { 104 | int magic; 105 | # define MAGIC2 ((('R'^0200)<<8)|'E') 106 | sop *strip; /* malloced area for strip */ 107 | int csetsize; /* number of bits in a cset vector */ 108 | int ncsets; /* number of csets in use */ 109 | cset *sets; /* -> cset [ncsets] */ 110 | uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ 111 | int cflags; /* copy of regcomp() cflags argument */ 112 | sopno nstates; /* = number of sops */ 113 | sopno firststate; /* the initial OEND (normally 0) */ 114 | sopno laststate; /* the final OEND */ 115 | int iflags; /* internal flags */ 116 | # define USEBOL 01 /* used ^ */ 117 | # define USEEOL 02 /* used $ */ 118 | # define BAD 04 /* something wrong */ 119 | int nbol; /* number of ^ used */ 120 | int neol; /* number of $ used */ 121 | int ncategories; /* how many character categories */ 122 | cat_t *categories; /* ->catspace[-CHAR_MIN] */ 123 | char *must; /* match must contain this string */ 124 | int mlen; /* length of must */ 125 | size_t nsub; /* copy of re_nsub */ 126 | int backrefs; /* does it use back references? */ 127 | sopno nplus; /* how deep does it nest +s? */ 128 | /* catspace must be last */ 129 | cat_t catspace[1]; /* actually [NC] */ 130 | }; 131 | 132 | /* misc utilities */ 133 | #define OUT (CHAR_MAX+1) /* a non-character value */ 134 | #define ISWORD(c) (isalnum(c) || (c) == '_') 135 | -------------------------------------------------------------------------------- /ext/re/regexec.c: -------------------------------------------------------------------------------- 1 | /* 2 | * the outer shell of regexec() 3 | * 4 | * This file includes engine.c *twice*, after muchos fiddling with the 5 | * macros that code uses. This lets the same code operate on two different 6 | * representations for state sets. 7 | */ 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "regex.h" 15 | 16 | #include "utils.h" 17 | #include "regex2.h" 18 | 19 | static int nope = 0; /* for use in asserts; shuts lint up */ 20 | 21 | /* macros for manipulating states, small version */ 22 | #define states unsigned 23 | #define states1 unsigned /* for later use in regexec() decision */ 24 | #define CLEAR(v) ((v) = 0) 25 | #define SET0(v, n) ((v) &= ~((unsigned)1 << (n))) 26 | #define SET1(v, n) ((v) |= (unsigned)1 << (n)) 27 | #define ISSET(v, n) ((v) & ((unsigned)1 << (n))) 28 | #define ASSIGN(d, s) ((d) = (s)) 29 | #define EQ(a, b) ((a) == (b)) 30 | #define STATEVARS int dummy /* dummy version */ 31 | #define STATESETUP(m, n) /* nothing */ 32 | #define STATETEARDOWN(m) /* nothing */ 33 | #define SETUP(v) ((v) = 0) 34 | #define onestate unsigned 35 | #define INIT(o, n) ((o) = (unsigned)1 << (n)) 36 | #define INC(o) ((o) <<= 1) 37 | #define ISSTATEIN(v, o) ((v) & (o)) 38 | /* some abbreviations; note that some of these know variable names! */ 39 | /* do "if I'm here, I can also be there" etc without branches */ 40 | #define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n)) 41 | #define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n)) 42 | #define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n))) 43 | /* function names */ 44 | #define SNAMES /* engine.c looks after details */ 45 | 46 | #include "engine.c" 47 | 48 | /* now undo things */ 49 | #undef states 50 | #undef CLEAR 51 | #undef SET0 52 | #undef SET1 53 | #undef ISSET 54 | #undef ASSIGN 55 | #undef EQ 56 | #undef STATEVARS 57 | #undef STATESETUP 58 | #undef STATETEARDOWN 59 | #undef SETUP 60 | #undef onestate 61 | #undef INIT 62 | #undef INC 63 | #undef ISSTATEIN 64 | #undef FWD 65 | #undef BACK 66 | #undef ISSETBACK 67 | #undef SNAMES 68 | 69 | /* macros for manipulating states, large version */ 70 | #define states char * 71 | #define CLEAR(v) memset(v, 0, m->g->nstates) 72 | #define SET0(v, n) ((v)[n] = 0) 73 | #define SET1(v, n) ((v)[n] = 1) 74 | #define ISSET(v, n) ((v)[n]) 75 | #define ASSIGN(d, s) memcpy(d, s, m->g->nstates) 76 | #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) 77 | #define STATEVARS int vn; char *space 78 | #define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ 79 | if ((m)->space == NULL) return(REG_ESPACE); \ 80 | (m)->vn = 0; } 81 | #define STATETEARDOWN(m) { free((m)->space); } 82 | #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) 83 | #define onestate int 84 | #define INIT(o, n) ((o) = (n)) 85 | #define INC(o) ((o)++) 86 | #define ISSTATEIN(v, o) ((v)[o]) 87 | /* some abbreviations; note that some of these know variable names! */ 88 | /* do "if I'm here, I can also be there" etc without branches */ 89 | #define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) 90 | #define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) 91 | #define ISSETBACK(v, n) ((v)[here - (n)]) 92 | /* function names */ 93 | #define LNAMES /* flag */ 94 | 95 | #include "engine.c" 96 | 97 | /* 98 | - regexec - interface for matching 99 | = extern int regexec(const regex_t *, const char *, size_t, \ 100 | = regmatch_t [], int); 101 | = #define REG_NOTBOL 00001 102 | = #define REG_NOTEOL 00002 103 | = #define REG_STARTEND 00004 104 | = #define REG_TRACE 00400 // tracing of execution 105 | = #define REG_LARGE 01000 // force large representation 106 | = #define REG_BACKR 02000 // force use of backref code 107 | * 108 | * We put this here so we can exploit knowledge of the state representation 109 | * when choosing which matcher to call. Also, by this point the matchers 110 | * have been prototyped. 111 | */ 112 | int /* 0 success, REG_NOMATCH failure */ 113 | regexec(preg, string, nmatch, pmatch, eflags) 114 | const regex_t *preg; 115 | const char *string; 116 | size_t nmatch; 117 | regmatch_t pmatch[]; 118 | int eflags; 119 | { 120 | register struct re_guts *g = preg->re_g; 121 | #ifdef REDEBUG 122 | # define GOODFLAGS(f) (f) 123 | #else 124 | # define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) 125 | #endif 126 | 127 | if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) 128 | return(REG_BADPAT); 129 | assert(!(g->iflags&BAD)); 130 | if (g->iflags&BAD) /* backstop for no-debug case */ 131 | return(REG_BADPAT); 132 | eflags = GOODFLAGS(eflags); 133 | 134 | if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) 135 | return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); 136 | else 137 | return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); 138 | } 139 | -------------------------------------------------------------------------------- /ext/re/regfree.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "regex.h" 5 | 6 | #include "utils.h" 7 | #include "regex2.h" 8 | 9 | /* 10 | - regfree - free everything 11 | = extern void regfree(regex_t *); 12 | */ 13 | void 14 | regfree(preg) 15 | regex_t *preg; 16 | { 17 | register struct re_guts *g; 18 | 19 | if (preg->re_magic != MAGIC1) /* oops */ 20 | return; /* nice to complain, but hard */ 21 | 22 | g = preg->re_g; 23 | if (g == NULL || g->magic != MAGIC2) /* oops again */ 24 | return; 25 | preg->re_magic = 0; /* mark it invalid */ 26 | g->magic = 0; /* mark it invalid */ 27 | 28 | if (g->strip != NULL) 29 | free((char *)g->strip); 30 | if (g->sets != NULL) 31 | free((char *)g->sets); 32 | if (g->setbits != NULL) 33 | free((char *)g->setbits); 34 | if (g->must != NULL) 35 | free(g->must); 36 | free((char *)g); 37 | } 38 | -------------------------------------------------------------------------------- /ext/re/split.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | - split - divide a string into fields, like awk split() 6 | = int split(char *string, char *fields[], int nfields, char *sep); 7 | */ 8 | int /* number of fields, including overflow */ 9 | split(string, fields, nfields, sep) 10 | char *string; 11 | char *fields[]; /* list is not NULL-terminated */ 12 | int nfields; /* number of entries available in fields[] */ 13 | char *sep; /* "" white, "c" single char, "ab" [ab]+ */ 14 | { 15 | register char *p = string; 16 | register char c; /* latest character */ 17 | register char sepc = sep[0]; 18 | register char sepc2; 19 | register int fn; 20 | register char **fp = fields; 21 | register char *sepp; 22 | register int trimtrail; 23 | 24 | /* white space */ 25 | if (sepc == '\0') { 26 | while ((c = *p++) == ' ' || c == '\t') 27 | continue; 28 | p--; 29 | trimtrail = 1; 30 | sep = " \t"; /* note, code below knows this is 2 long */ 31 | sepc = ' '; 32 | } else 33 | trimtrail = 0; 34 | sepc2 = sep[1]; /* now we can safely pick this up */ 35 | 36 | /* catch empties */ 37 | if (*p == '\0') 38 | return(0); 39 | 40 | /* single separator */ 41 | if (sepc2 == '\0') { 42 | fn = nfields; 43 | for (;;) { 44 | *fp++ = p; 45 | fn--; 46 | if (fn == 0) 47 | break; 48 | while ((c = *p++) != sepc) 49 | if (c == '\0') 50 | return(nfields - fn); 51 | *(p-1) = '\0'; 52 | } 53 | /* we have overflowed the fields vector -- just count them */ 54 | fn = nfields; 55 | for (;;) { 56 | while ((c = *p++) != sepc) 57 | if (c == '\0') 58 | return(fn); 59 | fn++; 60 | } 61 | /* not reached */ 62 | } 63 | 64 | /* two separators */ 65 | if (sep[2] == '\0') { 66 | fn = nfields; 67 | for (;;) { 68 | *fp++ = p; 69 | fn--; 70 | while ((c = *p++) != sepc && c != sepc2) 71 | if (c == '\0') { 72 | if (trimtrail && **(fp-1) == '\0') 73 | fn++; 74 | return(nfields - fn); 75 | } 76 | if (fn == 0) 77 | break; 78 | *(p-1) = '\0'; 79 | while ((c = *p++) == sepc || c == sepc2) 80 | continue; 81 | p--; 82 | } 83 | /* we have overflowed the fields vector -- just count them */ 84 | fn = nfields; 85 | while (c != '\0') { 86 | while ((c = *p++) == sepc || c == sepc2) 87 | continue; 88 | p--; 89 | fn++; 90 | while ((c = *p++) != '\0' && c != sepc && c != sepc2) 91 | continue; 92 | } 93 | /* might have to trim trailing white space */ 94 | if (trimtrail) { 95 | p--; 96 | while ((c = *--p) == sepc || c == sepc2) 97 | continue; 98 | p++; 99 | if (*p != '\0') { 100 | if (fn == nfields+1) 101 | *p = '\0'; 102 | fn--; 103 | } 104 | } 105 | return(fn); 106 | } 107 | 108 | /* n separators */ 109 | fn = 0; 110 | for (;;) { 111 | if (fn < nfields) 112 | *fp++ = p; 113 | fn++; 114 | for (;;) { 115 | c = *p++; 116 | if (c == '\0') 117 | return(fn); 118 | sepp = sep; 119 | while ((sepc = *sepp++) != '\0' && sepc != c) 120 | continue; 121 | if (sepc != '\0') /* it was a separator */ 122 | break; 123 | } 124 | if (fn < nfields) 125 | *(p-1) = '\0'; 126 | for (;;) { 127 | c = *p++; 128 | sepp = sep; 129 | while ((sepc = *sepp++) != '\0' && sepc != c) 130 | continue; 131 | if (sepc == '\0') /* it wasn't a separator */ 132 | break; 133 | } 134 | p--; 135 | } 136 | 137 | /* not reached */ 138 | } 139 | 140 | #ifdef TEST_SPLIT 141 | 142 | 143 | /* 144 | * test program 145 | * pgm runs regression 146 | * pgm sep splits stdin lines by sep 147 | * pgm str sep splits str by sep 148 | * pgm str sep n splits str by sep n times 149 | */ 150 | int 151 | main(argc, argv) 152 | int argc; 153 | char *argv[]; 154 | { 155 | char buf[512]; 156 | register int n; 157 | # define MNF 10 158 | char *fields[MNF]; 159 | 160 | if (argc > 4) 161 | for (n = atoi(argv[3]); n > 0; n--) { 162 | (void) strcpy(buf, argv[1]); 163 | } 164 | else if (argc > 3) 165 | for (n = atoi(argv[3]); n > 0; n--) { 166 | (void) strcpy(buf, argv[1]); 167 | (void) split(buf, fields, MNF, argv[2]); 168 | } 169 | else if (argc > 2) 170 | dosplit(argv[1], argv[2]); 171 | else if (argc > 1) 172 | while (fgets(buf, sizeof(buf), stdin) != NULL) { 173 | buf[strlen(buf)-1] = '\0'; /* stomp newline */ 174 | dosplit(buf, argv[1]); 175 | } 176 | else 177 | regress(); 178 | 179 | exit(0); 180 | } 181 | 182 | dosplit(string, seps) 183 | char *string; 184 | char *seps; 185 | { 186 | # define NF 5 187 | char *fields[NF]; 188 | register int nf; 189 | 190 | nf = split(string, fields, NF, seps); 191 | print(nf, NF, fields); 192 | } 193 | 194 | print(nf, nfp, fields) 195 | int nf; 196 | int nfp; 197 | char *fields[]; 198 | { 199 | register int fn; 200 | register int bound; 201 | 202 | bound = (nf > nfp) ? nfp : nf; 203 | printf("%d:\t", nf); 204 | for (fn = 0; fn < bound; fn++) 205 | printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); 206 | } 207 | 208 | #define RNF 5 /* some table entries know this */ 209 | struct { 210 | char *str; 211 | char *seps; 212 | int nf; 213 | char *fi[RNF]; 214 | } tests[] = { 215 | "", " ", 0, { "" }, 216 | " ", " ", 2, { "", "" }, 217 | "x", " ", 1, { "x" }, 218 | "xy", " ", 1, { "xy" }, 219 | "x y", " ", 2, { "x", "y" }, 220 | "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, 221 | " a bcd", " ", 4, { "", "", "a", "bcd" }, 222 | "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 223 | " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 224 | 225 | "", " _", 0, { "" }, 226 | " ", " _", 2, { "", "" }, 227 | "x", " _", 1, { "x" }, 228 | "x y", " _", 2, { "x", "y" }, 229 | "ab _ cd", " _", 2, { "ab", "cd" }, 230 | " a_b c ", " _", 5, { "", "a", "b", "c", "" }, 231 | "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, 232 | " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, 233 | 234 | "", " _~", 0, { "" }, 235 | " ", " _~", 2, { "", "" }, 236 | "x", " _~", 1, { "x" }, 237 | "x y", " _~", 2, { "x", "y" }, 238 | "ab _~ cd", " _~", 2, { "ab", "cd" }, 239 | " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, 240 | "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, 241 | "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, 242 | 243 | "", " _~-", 0, { "" }, 244 | " ", " _~-", 2, { "", "" }, 245 | "x", " _~-", 1, { "x" }, 246 | "x y", " _~-", 2, { "x", "y" }, 247 | "ab _~- cd", " _~-", 2, { "ab", "cd" }, 248 | " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, 249 | "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, 250 | "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, 251 | 252 | "", " ", 0, { "" }, 253 | " ", " ", 2, { "", "" }, 254 | "x", " ", 1, { "x" }, 255 | "xy", " ", 1, { "xy" }, 256 | "x y", " ", 2, { "x", "y" }, 257 | "abc def g ", " ", 4, { "abc", "def", "g", "" }, 258 | " a bcd", " ", 3, { "", "a", "bcd" }, 259 | "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 260 | " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 261 | 262 | "", "", 0, { "" }, 263 | " ", "", 0, { "" }, 264 | "x", "", 1, { "x" }, 265 | "xy", "", 1, { "xy" }, 266 | "x y", "", 2, { "x", "y" }, 267 | "abc def g ", "", 3, { "abc", "def", "g" }, 268 | "\t a bcd", "", 2, { "a", "bcd" }, 269 | " a \tb\t c ", "", 3, { "a", "b", "c" }, 270 | "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, 271 | "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, 272 | " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, 273 | 274 | NULL, NULL, 0, { NULL }, 275 | }; 276 | 277 | regress() 278 | { 279 | char buf[512]; 280 | register int n; 281 | char *fields[RNF+1]; 282 | register int nf; 283 | register int i; 284 | register int printit; 285 | register char *f; 286 | 287 | for (n = 0; tests[n].str != NULL; n++) { 288 | (void) strcpy(buf, tests[n].str); 289 | fields[RNF] = NULL; 290 | nf = split(buf, fields, RNF, tests[n].seps); 291 | printit = 0; 292 | if (nf != tests[n].nf) { 293 | printf("split `%s' by `%s' gave %d fields, not %d\n", 294 | tests[n].str, tests[n].seps, nf, tests[n].nf); 295 | printit = 1; 296 | } else if (fields[RNF] != NULL) { 297 | printf("split() went beyond array end\n"); 298 | printit = 1; 299 | } else { 300 | for (i = 0; i < nf && i < RNF; i++) { 301 | f = fields[i]; 302 | if (f == NULL) 303 | f = "(NULL)"; 304 | if (strcmp(f, tests[n].fi[i]) != 0) { 305 | printf("split `%s' by `%s', field %d is `%s', not `%s'\n", 306 | tests[n].str, tests[n].seps, 307 | i, fields[i], tests[n].fi[i]); 308 | printit = 1; 309 | } 310 | } 311 | } 312 | if (printit) 313 | print(nf, RNF, fields); 314 | } 315 | } 316 | #endif 317 | -------------------------------------------------------------------------------- /ext/re/tests: -------------------------------------------------------------------------------- 1 | # regular expression test set 2 | # Lines are at least three fields, separated by one or more tabs. "" stands 3 | # for an empty field. First field is an RE. Second field is flags. If 4 | # C flag given, regcomp() is expected to fail, and the third field is the 5 | # error name (minus the leading REG_). 6 | # 7 | # Otherwise it is expected to succeed, and the third field is the string to 8 | # try matching it against. If there is no fourth field, the match is 9 | # expected to fail. If there is a fourth field, it is the substring that 10 | # the RE is expected to match. If there is a fifth field, it is a comma- 11 | # separated list of what the subexpressions should match, with - indicating 12 | # no match for that one. In both the fourth and fifth fields, a (sub)field 13 | # starting with @ indicates that the (sub)expression is expected to match 14 | # a null string followed by the stuff after the @; this provides a way to 15 | # test where null strings match. The character `N' in REs and strings 16 | # is newline, `S' is space, `T' is tab, `Z' is NUL. 17 | # 18 | # The full list of flags: 19 | # - placeholder, does nothing 20 | # b RE is a BRE, not an ERE 21 | # & try it as both an ERE and a BRE 22 | # C regcomp() error expected, third field is error name 23 | # i REG_ICASE 24 | # m ("mundane") REG_NOSPEC 25 | # s REG_NOSUB (not really testable) 26 | # n REG_NEWLINE 27 | # ^ REG_NOTBOL 28 | # $ REG_NOTEOL 29 | # # REG_STARTEND (see below) 30 | # p REG_PEND 31 | # 32 | # For REG_STARTEND, the start/end offsets are those of the substring 33 | # enclosed in (). 34 | 35 | # basics 36 | a & a a 37 | abc & abc abc 38 | abc|de - abc abc 39 | a|b|c - abc a 40 | 41 | # parentheses and perversions thereof 42 | a(b)c - abc abc 43 | a\(b\)c b abc abc 44 | a( C EPAREN 45 | a( b a( a( 46 | a\( - a( a( 47 | a\( bC EPAREN 48 | a\(b bC EPAREN 49 | a(b C EPAREN 50 | a(b b a(b a(b 51 | # gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly) 52 | a) - a) a) 53 | ) - ) ) 54 | # end gagging (in a just world, those *should* give EPAREN) 55 | a) b a) a) 56 | a\) bC EPAREN 57 | \) bC EPAREN 58 | a()b - ab ab 59 | a\(\)b b ab ab 60 | 61 | # anchoring and REG_NEWLINE 62 | ^abc$ & abc abc 63 | a^b - a^b 64 | a^b b a^b a^b 65 | a$b - a$b 66 | a$b b a$b a$b 67 | ^ & abc @abc 68 | $ & abc @ 69 | ^$ & "" @ 70 | $^ - "" @ 71 | \($\)\(^\) b "" @ 72 | # stop retching, those are legitimate (although disgusting) 73 | ^^ - "" @ 74 | $$ - "" @ 75 | b$ & abNc 76 | b$ &n abNc b 77 | ^b$ & aNbNc 78 | ^b$ &n aNbNc b 79 | ^$ &n aNNb @Nb 80 | ^$ n abc 81 | ^$ n abcN @ 82 | $^ n aNNb @Nb 83 | \($\)\(^\) bn aNNb @Nb 84 | ^^ n^ aNNb @Nb 85 | $$ n aNNb @NN 86 | ^a ^ a 87 | a$ $ a 88 | ^a ^n aNb 89 | ^b ^n aNb b 90 | a$ $n bNa 91 | b$ $n bNa b 92 | a*(^b$)c* - b b 93 | a*\(^b$\)c* b b b 94 | 95 | # certain syntax errors and non-errors 96 | | C EMPTY 97 | | b | | 98 | * C BADRPT 99 | * b * * 100 | + C BADRPT 101 | ? C BADRPT 102 | "" &C EMPTY 103 | () - abc @abc 104 | \(\) b abc @abc 105 | a||b C EMPTY 106 | |ab C EMPTY 107 | ab| C EMPTY 108 | (|a)b C EMPTY 109 | (a|)b C EMPTY 110 | (*a) C BADRPT 111 | (+a) C BADRPT 112 | (?a) C BADRPT 113 | ({1}a) C BADRPT 114 | \(\{1\}a\) bC BADRPT 115 | (a|*b) C BADRPT 116 | (a|+b) C BADRPT 117 | (a|?b) C BADRPT 118 | (a|{1}b) C BADRPT 119 | ^* C BADRPT 120 | ^* b * * 121 | ^+ C BADRPT 122 | ^? C BADRPT 123 | ^{1} C BADRPT 124 | ^\{1\} bC BADRPT 125 | 126 | # metacharacters, backslashes 127 | a.c & abc abc 128 | a[bc]d & abd abd 129 | a\*c & a*c a*c 130 | a\\b & a\b a\b 131 | a\\\*b & a\*b a\*b 132 | a\bc & abc abc 133 | a\ &C EESCAPE 134 | a\\bc & a\bc a\bc 135 | \{ bC BADRPT 136 | a\[b & a[b a[b 137 | a[b &C EBRACK 138 | # trailing $ is a peculiar special case for the BRE code 139 | a$ & a a 140 | a$ & a$ 141 | a\$ & a 142 | a\$ & a$ a$ 143 | a\\$ & a 144 | a\\$ & a$ 145 | a\\$ & a\$ 146 | a\\$ & a\ a\ 147 | 148 | # back references, ugh 149 | a\(b\)\2c bC ESUBREG 150 | a\(b\1\)c bC ESUBREG 151 | a\(b*\)c\1d b abbcbbd abbcbbd bb 152 | a\(b*\)c\1d b abbcbd 153 | a\(b*\)c\1d b abbcbbbd 154 | ^\(.\)\1 b abc 155 | a\([bc]\)\1d b abcdabbd abbd b 156 | a\(\([bc]\)\2\)*d b abbccd abbccd 157 | a\(\([bc]\)\2\)*d b abbcbd 158 | # actually, this next one probably ought to fail, but the spec is unclear 159 | a\(\(b\)*\2\)*d b abbbd abbbd 160 | # here is a case that no NFA implementation does right 161 | \(ab*\)[ab]*\1 b ababaaa ababaaa a 162 | # check out normal matching in the presence of back refs 163 | \(a\)\1bcd b aabcd aabcd 164 | \(a\)\1bc*d b aabcd aabcd 165 | \(a\)\1bc*d b aabd aabd 166 | \(a\)\1bc*d b aabcccd aabcccd 167 | \(a\)\1bc*[ce]d b aabcccd aabcccd 168 | ^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd 169 | 170 | # ordinary repetitions 171 | ab*c & abc abc 172 | ab+c - abc abc 173 | ab?c - abc abc 174 | a\(*\)b b a*b a*b 175 | a\(**\)b b ab ab 176 | a\(***\)b bC BADRPT 177 | *a b *a *a 178 | **a b a a 179 | ***a bC BADRPT 180 | 181 | # the dreaded bounded repetitions 182 | { & { { 183 | {abc & {abc {abc 184 | {1 C BADRPT 185 | {1} C BADRPT 186 | a{b & a{b a{b 187 | a{1}b - ab ab 188 | a\{1\}b b ab ab 189 | a{1,}b - ab ab 190 | a\{1,\}b b ab ab 191 | a{1,2}b - aab aab 192 | a\{1,2\}b b aab aab 193 | a{1 C EBRACE 194 | a\{1 bC EBRACE 195 | a{1a C EBRACE 196 | a\{1a bC EBRACE 197 | a{1a} C BADBR 198 | a\{1a\} bC BADBR 199 | a{,2} - a{,2} a{,2} 200 | a\{,2\} bC BADBR 201 | a{,} - a{,} a{,} 202 | a\{,\} bC BADBR 203 | a{1,x} C BADBR 204 | a\{1,x\} bC BADBR 205 | a{1,x C EBRACE 206 | a\{1,x bC EBRACE 207 | a{300} C BADBR 208 | a\{300\} bC BADBR 209 | a{1,0} C BADBR 210 | a\{1,0\} bC BADBR 211 | ab{0,0}c - abcac ac 212 | ab\{0,0\}c b abcac ac 213 | ab{0,1}c - abcac abc 214 | ab\{0,1\}c b abcac abc 215 | ab{0,3}c - abbcac abbc 216 | ab\{0,3\}c b abbcac abbc 217 | ab{1,1}c - acabc abc 218 | ab\{1,1\}c b acabc abc 219 | ab{1,3}c - acabc abc 220 | ab\{1,3\}c b acabc abc 221 | ab{2,2}c - abcabbc abbc 222 | ab\{2,2\}c b abcabbc abbc 223 | ab{2,4}c - abcabbc abbc 224 | ab\{2,4\}c b abcabbc abbc 225 | ((a{1,10}){1,10}){1,10} - a a a,a 226 | 227 | # multiple repetitions 228 | a** &C BADRPT 229 | a++ C BADRPT 230 | a?? C BADRPT 231 | a*+ C BADRPT 232 | a*? C BADRPT 233 | a+* C BADRPT 234 | a+? C BADRPT 235 | a?* C BADRPT 236 | a?+ C BADRPT 237 | a{1}{1} C BADRPT 238 | a*{1} C BADRPT 239 | a+{1} C BADRPT 240 | a?{1} C BADRPT 241 | a{1}* C BADRPT 242 | a{1}+ C BADRPT 243 | a{1}? C BADRPT 244 | a*{b} - a{b} a{b} 245 | a\{1\}\{1\} bC BADRPT 246 | a*\{1\} bC BADRPT 247 | a\{1\}* bC BADRPT 248 | 249 | # brackets, and numerous perversions thereof 250 | a[b]c & abc abc 251 | a[ab]c & abc abc 252 | a[^ab]c & adc adc 253 | a[]b]c & a]c a]c 254 | a[[b]c & a[c a[c 255 | a[-b]c & a-c a-c 256 | a[^]b]c & adc adc 257 | a[^-b]c & adc adc 258 | a[b-]c & a-c a-c 259 | a[b &C EBRACK 260 | a[] &C EBRACK 261 | a[1-3]c & a2c a2c 262 | a[3-1]c &C ERANGE 263 | a[1-3-5]c &C ERANGE 264 | a[[.-.]--]c & a-c a-c 265 | a[1- &C ERANGE 266 | a[[. &C EBRACK 267 | a[[.x &C EBRACK 268 | a[[.x. &C EBRACK 269 | a[[.x.] &C EBRACK 270 | a[[.x.]] & ax ax 271 | a[[.x,.]] &C ECOLLATE 272 | a[[.one.]]b & a1b a1b 273 | a[[.notdef.]]b &C ECOLLATE 274 | a[[.].]]b & a]b a]b 275 | a[[:alpha:]]c & abc abc 276 | a[[:notdef:]]c &C ECTYPE 277 | a[[: &C EBRACK 278 | a[[:alpha &C EBRACK 279 | a[[:alpha:] &C EBRACK 280 | a[[:alpha,:] &C ECTYPE 281 | a[[:]:]]b &C ECTYPE 282 | a[[:-:]]b &C ECTYPE 283 | a[[:alph:]] &C ECTYPE 284 | a[[:alphabet:]] &C ECTYPE 285 | [[:alnum:]]+ - -%@a0X- a0X 286 | [[:alpha:]]+ - -%@aX0- aX 287 | [[:blank:]]+ - aSSTb SST 288 | [[:cntrl:]]+ - aNTb NT 289 | [[:digit:]]+ - a019b 019 290 | [[:graph:]]+ - Sa%bS a%b 291 | [[:lower:]]+ - AabC ab 292 | [[:print:]]+ - NaSbN aSb 293 | [[:punct:]]+ - S%-&T %-& 294 | [[:space:]]+ - aSNTb SNT 295 | [[:upper:]]+ - aBCd BC 296 | [[:xdigit:]]+ - p0f3Cq 0f3C 297 | a[[=b=]]c & abc abc 298 | a[[= &C EBRACK 299 | a[[=b &C EBRACK 300 | a[[=b= &C EBRACK 301 | a[[=b=] &C EBRACK 302 | a[[=b,=]] &C ECOLLATE 303 | a[[=one=]]b & a1b a1b 304 | 305 | # complexities 306 | a(((b)))c - abc abc 307 | a(b|(c))d - abd abd 308 | a(b*|c)d - abbd abbd 309 | # just gotta have one DFA-buster, of course 310 | a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab 311 | # and an inline expansion in case somebody gets tricky 312 | a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab 313 | # and in case somebody just slips in an NFA... 314 | a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights 315 | # fish for anomalies as the number of states passes 32 316 | 12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789 317 | 123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890 318 | 1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901 319 | 12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012 320 | 123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123 321 | # and one really big one, beyond any plausible word width 322 | 1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890 323 | # fish for problems as brackets go past 8 324 | [ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm 325 | [ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo 326 | [ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq 327 | [ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq 328 | 329 | # subtleties of matching 330 | abc & xabcy abc 331 | a\(b\)?c\1d b acd 332 | aBc i Abc Abc 333 | a[Bc]*d i abBCcd abBCcd 334 | 0[[:upper:]]1 &i 0a1 0a1 335 | 0[[:lower:]]1 &i 0A1 0A1 336 | a[^b]c &i abc 337 | a[^b]c &i aBc 338 | a[^b]c &i adc adc 339 | [a]b[c] - abc abc 340 | [a]b[a] - aba aba 341 | [abc]b[abc] - abc abc 342 | [abc]b[abd] - abd abd 343 | a(b?c)+d - accd accd 344 | (wee|week)(knights|night) - weeknights weeknights 345 | (we|wee|week|frob)(knights|night|day) - weeknights weeknights 346 | a[bc]d - xyzaaabcaababdacd abd 347 | a[ab]c - aaabc abc 348 | abc s abc abc 349 | a* & b @b 350 | 351 | # Let's have some fun -- try to match a C comment. 352 | # first the obvious, which looks okay at first glance... 353 | /\*.*\*/ - /*x*/ /*x*/ 354 | # but... 355 | /\*.*\*/ - /*x*/y/*z*/ /*x*/y/*z*/ 356 | # okay, we must not match */ inside; try to do that... 357 | /\*([^*]|\*[^/])*\*/ - /*x*/ /*x*/ 358 | /\*([^*]|\*[^/])*\*/ - /*x*/y/*z*/ /*x*/ 359 | # but... 360 | /\*([^*]|\*[^/])*\*/ - /*x**/y/*z*/ /*x**/y/*z*/ 361 | # and a still fancier version, which does it right (I think)... 362 | /\*([^*]|\*+[^*/])*\*+/ - /*x*/ /*x*/ 363 | /\*([^*]|\*+[^*/])*\*+/ - /*x*/y/*z*/ /*x*/ 364 | /\*([^*]|\*+[^*/])*\*+/ - /*x**/y/*z*/ /*x**/ 365 | /\*([^*]|\*+[^*/])*\*+/ - /*x****/y/*z*/ /*x****/ 366 | /\*([^*]|\*+[^*/])*\*+/ - /*x**x*/y/*z*/ /*x**x*/ 367 | /\*([^*]|\*+[^*/])*\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/ 368 | 369 | # subexpressions 370 | .* - abc abc - 371 | a(b)(c)d - abcd abcd b,c 372 | a(((b)))c - abc abc b,b,b 373 | a(b|(c))d - abd abd b,- 374 | a(b*|c|e)d - abbd abbd bb 375 | a(b*|c|e)d - acd acd c 376 | a(b*|c|e)d - ad ad @d 377 | a(b?)c - abc abc b 378 | a(b?)c - ac ac @c 379 | a(b+)c - abc abc b 380 | a(b+)c - abbbc abbbc bbb 381 | a(b*)c - ac ac @c 382 | (a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de 383 | # the regression tester only asks for 9 subexpressions 384 | a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j 385 | a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k 386 | a([bc]?)c - abc abc b 387 | a([bc]?)c - ac ac @c 388 | a([bc]+)c - abc abc b 389 | a([bc]+)c - abcc abcc bc 390 | a([bc]+)bc - abcbc abcbc bc 391 | a(bb+|b)b - abb abb b 392 | a(bbb+|bb+|b)b - abb abb b 393 | a(bbb+|bb+|b)b - abbb abbb bb 394 | a(bbb+|bb+|b)bb - abbb abbb b 395 | (.*).* - abcdef abcdef abcdef 396 | (a*)* - bc @b @b 397 | 398 | # do we get the right subexpression when it is used more than once? 399 | a(b|c)*d - ad ad - 400 | a(b|c)*d - abcd abcd c 401 | a(b|c)+d - abd abd b 402 | a(b|c)+d - abcd abcd c 403 | a(b|c?)+d - ad ad @d 404 | a(b|c?)+d - abcd abcd @d 405 | a(b|c){0,0}d - ad ad - 406 | a(b|c){0,1}d - ad ad - 407 | a(b|c){0,1}d - abd abd b 408 | a(b|c){0,2}d - ad ad - 409 | a(b|c){0,2}d - abcd abcd c 410 | a(b|c){0,}d - ad ad - 411 | a(b|c){0,}d - abcd abcd c 412 | a(b|c){1,1}d - abd abd b 413 | a(b|c){1,1}d - acd acd c 414 | a(b|c){1,2}d - abd abd b 415 | a(b|c){1,2}d - abcd abcd c 416 | a(b|c){1,}d - abd abd b 417 | a(b|c){1,}d - abcd abcd c 418 | a(b|c){2,2}d - acbd acbd b 419 | a(b|c){2,2}d - abcd abcd c 420 | a(b|c){2,4}d - abcd abcd c 421 | a(b|c){2,4}d - abcbd abcbd b 422 | a(b|c){2,4}d - abcbcd abcbcd c 423 | a(b|c){2,}d - abcd abcd c 424 | a(b|c){2,}d - abcbd abcbd b 425 | a(b+|((c)*))+d - abd abd @d,@d,- 426 | a(b+|((c)*))+d - abcd abcd @d,@d,- 427 | 428 | # check out the STARTEND option 429 | [abc] &# a(b)c b 430 | [abc] &# a(d)c 431 | [abc] &# a(bc)d b 432 | [abc] &# a(dc)d c 433 | . &# a()c 434 | b.*c &# b(bc)c bc 435 | b.* &# b(bc)c bc 436 | .*c &# b(bc)c bc 437 | 438 | # plain strings, with the NOSPEC flag 439 | abc m abc abc 440 | abc m xabcy abc 441 | abc m xyz 442 | a*b m aba*b a*b 443 | a*b m ab 444 | "" mC EMPTY 445 | 446 | # cases involving NULs 447 | aZb & a a 448 | aZb &p a 449 | aZb &p# (aZb) aZb 450 | aZ*b &p# (ab) ab 451 | a.b &# (aZb) aZb 452 | a.* &# (aZb)c aZb 453 | 454 | # word boundaries (ick) 455 | [[:<:]]a & a a 456 | [[:<:]]a & ba 457 | [[:<:]]a & -a a 458 | a[[:>:]] & a a 459 | a[[:>:]] & ab 460 | a[[:>:]] & a- a 461 | [[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc 462 | [[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc 463 | [[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc 464 | [[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc 465 | [[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_ 466 | [[:<:]]a_b[[:>:]] & x_a_b 467 | 468 | # past problems, and suspected problems 469 | (A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1 470 | abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop 471 | abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv 472 | (ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11 473 | CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11 474 | Char \([a-z0-9_]*\)\[.* b Char xyz[k Char xyz[k xyz 475 | a?b - ab ab 476 | -\{0,1\}[0-9]*$ b -5 -5 477 | a*a*a*a*a*a*a* & aaaaaa aaaaaa 478 | -------------------------------------------------------------------------------- /ext/re/utils.h: -------------------------------------------------------------------------------- 1 | /* utility definitions */ 2 | #ifdef _POSIX2_RE_DUP_MAX 3 | #define DUPMAX _POSIX2_RE_DUP_MAX 4 | #else 5 | #define DUPMAX 255 6 | #endif 7 | #define INFINITY (DUPMAX + 1) 8 | #define NC (CHAR_MAX - CHAR_MIN + 1) 9 | typedef unsigned char uch; 10 | 11 | /* switch off assertions (if not already off) if no REDEBUG */ 12 | #ifndef REDEBUG 13 | #ifndef NDEBUG 14 | #define NDEBUG /* no assertions please */ 15 | #endif 16 | #endif 17 | #include 18 | 19 | /* for old systems with bcopy() but no memmove() */ 20 | #ifdef USEBCOPY 21 | #define memmove(d, s, c) bcopy(s, d, c) 22 | #endif 23 | -------------------------------------------------------------------------------- /ext/tsx/LICENSE: -------------------------------------------------------------------------------- 1 | LICENSE TERMS 2 | 3 | (c) 2002 Manuel Heras-Gilsanz (manuel@heras-gilsanz.com) 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are 8 | met: 9 | 10 | Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | 13 | Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | Neither the name of Manuel Heras-Gilsanz nor the names of the 18 | contributors may be used to endorse or promote products derived from 19 | this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /ext/tsx/Makefile: -------------------------------------------------------------------------------- 1 | #DEBUG=-g 2 | DEBUG= 3 | SCHEME_H_DIR=.. 4 | CC=gcc 5 | CFLAGS=-DUSE_DL=1 -I $(SCHEME_H_DIR) 6 | 7 | tsx.so : tsx.c tsx.h Makefile 8 | $(CC) -shared -Wall -fPIC $(CFLAGS) -o tsx.so $(DEBUG) tsx.c 9 | strip tsx.so 10 | ls -l tsx.so 11 | 12 | .PHONY : clean 13 | clean: 14 | rm -f *.o 15 | rm -f tsx.so 16 | rm -f *~ -------------------------------------------------------------------------------- /ext/tsx/README: -------------------------------------------------------------------------------- 1 | TinyScheme Extensions (TSX) 1.1 [September, 2002] 2 | (c) 2002 Manuel Heras-Gilsanz (manuel@heras-gilsanz.com) 3 | 4 | This software is subject to the license terms contained in the 5 | LICENSE file. 6 | 7 | Changelog: 8 | 1.1 (Sept. 2002) Updated to tinyscheme 1.31 9 | 1.0 (April 2002) First released version 10 | 11 | 12 | WHAT IS TSX? 13 | 14 | TinyScheme Extensions is a set of dynamic libraries incorporating 15 | additional funcionality to TinyScheme, a lightweight 16 | implementation of the Scheme programming language. TinyScheme 17 | (http://tinyscheme.sourceforge.net) is maintained by D. Souflis 18 | (dsouflis@acm.org), and is based on MiniSCHEME version 0.85k4. 19 | 20 | Scheme is a very nice and powerful programming language, but the 21 | basic language is very minimalistic in terms of library functions; 22 | only basic file input / output functionality is specified. 23 | Different implementations of the language (MIT Scheme, GUILE, 24 | Bigloo...) provide their own extension libraries. TSX attempts to 25 | provide commonly needed functions at a small cost in terms of 26 | additional program footprint. The library is modularized, so that 27 | it is possible (and easy!) to select desired functionality via 28 | #defines in tsx.h. 29 | 30 | 31 | INSTALLATION 32 | 33 | TSX has been tested on GNU/Linux 2.4.2 with gcc 2.96 and 34 | libc-2.2.2, with TinyScheme 1.31. 35 | 36 | To install, copy the distribution file to the directory 37 | where TinyScheme is installed (and where scheme.h lies), 38 | and run make. If building succeeds, a file called tsx.so 39 | should be created. This file can be loaded as a TinyScheme 40 | extension with 41 | 42 | (load-extension "tsx-1.0/tsx") 43 | 44 | After loading TSX, you can make use of its functions. 45 | To reduce footprint, you can choose the functionality which 46 | will be included. To do so, have a look at tsx.h and 47 | comment the #defines for unneeded modules. 48 | 49 | If you get compiler errors, make sure you have enabled 50 | dynamic modules in your tinyscheme runtime (define USE_DL 51 | somewhere near the top in scheme.h). 52 | 53 | 54 | SAMPLE APPLICATIONS 55 | 56 | Three sample applications are distributed with TSX 1.0. 57 | The code is not particularly elegant, nor written in proper 58 | functional style, but is provided for illustration of the 59 | implemented calls. 60 | 61 | -smtp.scm 62 | Sends an email to the user getting the username from 63 | the USER shell variable, connecting to the SMTP port 64 | on the local machine. 65 | 66 | -listhome.scm 67 | Provides a list of all the files on the user's home 68 | directory (obtained with the HOME environment variable). 69 | 70 | -srepl.scm 71 | Provides a socket-based read-eval-print-loop. It listens 72 | for connections on the 9000 port of the local machines, 73 | and executes the commands received. To test it, run 74 | 75 | telnet localhost 9000 76 | 77 | after starting the sample application, and type Scheme 78 | expressions. You will get the evaluations. To exit the 79 | session, type "quit" and TinyScheme will exit, closing 80 | the socket. The output of some functions will not 81 | be the same as you would obtain on TinyScheme's 82 | "command line", because standard output is not 83 | redirected to the socket, but most commands work ok. 84 | 85 | You should copy these applications to the directory where 86 | TinyScheme is installed (i.e., where the "scheme" binary 87 | file resides), and can be runned with: 88 | 89 | ./scheme listhome.scm 90 | ./scheme smtp.scm 91 | ./scheme srepl.scm 92 | 93 | 94 | TSX FUNCTIONS 95 | 96 | The extension functions implemented by TinyScheme Extensions are 97 | documented in the file "tsx-functions.txt". 98 | 99 | END 100 | -------------------------------------------------------------------------------- /ext/tsx/listhome.scm: -------------------------------------------------------------------------------- 1 | ; listhome.scm 2 | ; Sample usage of TinyScheme Extension 3 | ; This simple program lists the directory entries on the 4 | ; user's home directory. 5 | 6 | ; It uses the following TinyScheme Extension functions: 7 | ; getenv 8 | ; Used to get HOME environment variable. 9 | ; open-dir-stream 10 | ; Used to open directory stream. 11 | ; read-dir-entry 12 | ; Used to read directory entries. 13 | ; close-dir-entry 14 | ; Used at the end, to close directory stream when done. 15 | 16 | ; check that extensions are enabled 17 | (if (not (defined? 'load-extension)) 18 | (begin 19 | (display "TinyScheme has extensions disabled. Enable them!!") 20 | (newline) 21 | (quit))) 22 | 23 | ; load TinyScheme extension 24 | (load-extension "tsx-1.0/tsx") 25 | 26 | ; check that the necessary functions are available (the user 27 | ; might have removed some functionality...) 28 | (if (or 29 | (not (defined? 'getenv)) 30 | (not (defined? 'open-dir-stream)) 31 | (not (defined? 'read-dir-entry)) 32 | (not (defined? 'close-dir-stream))) 33 | (begin 34 | (display "Some necessary functions are not available. Exiting!") 35 | (newline) 36 | (quit))) 37 | 38 | ; get user's home dir from HOME environment var 39 | (define homedir (getenv "HOME")) 40 | (display "Listing contents of ") (display homedir) (newline) 41 | 42 | ; create directory stream to read dir entries 43 | (define dirstream (open-dir-stream homedir)) 44 | (if (not dirstream) 45 | (begin 46 | (display "Unable to open home directory!! Check value of HOME environment var.") 47 | (quit))) 48 | 49 | (let listentry ((entry (read-dir-entry dirstream))) 50 | (if (eof-object? entry) 51 | #t 52 | (begin 53 | (display entry) 54 | (newline) 55 | (listentry (read-dir-entry dirstream))))) 56 | 57 | (close-dir-stream dirstream) 58 | 59 | -------------------------------------------------------------------------------- /ext/tsx/msvcbuild.bat: -------------------------------------------------------------------------------- 1 | @setlocal 2 | @set SCHEME_H_DIR=..\..\src 3 | @set MSCOMPILE=cl /nologo /O2 /W3 /c /D_CRT_SECURE_NO_DEPRECATE /D_WINSOCK_DEPRECATED_NO_WARNINGS /I%SCHEME_H_DIR% 4 | @set MSLIB=lib /nologo 5 | @set MSLINK=link /nologo 6 | 7 | @if not exist lib\ ( 8 | mkdir lib 9 | ) 10 | 11 | %MSCOMPILE% /MT tsx.c 12 | %MSLIB% /out:lib\tsx.lib tsx.obj ws2_32.lib 13 | 14 | del tsx.obj 15 | -------------------------------------------------------------------------------- /ext/tsx/smtp.scm: -------------------------------------------------------------------------------- 1 | ; smtp.scm 2 | ; Sample usage of TinyScheme Extensions 3 | ; This very simple program sends a message using SMTP to the local machine. 4 | 5 | ; It uses the following TinyScheme Extension functions: 6 | ; getenv 7 | ; used to get name of current user, wich is the recipient 8 | ; of the message. 9 | ; make-client-socket 10 | ; used to connect to SMTP port on local machine 11 | ; send 12 | ; used to send commands and email message 13 | ; recv-new-string 14 | ; used to read responses from SMTP server 15 | ; close-socket 16 | ; used to free socket at the end 17 | 18 | ; check that string ports are available... 19 | (if (not (defined? 'open-output-string)) 20 | (begin 21 | (display "We need string ports!! Recompile TinyScheme with string ports, if you want to run this sample...") 22 | (quit))) 23 | 24 | ; check that extensions are enabled 25 | (if (not (defined? 'load-extension)) 26 | (begin 27 | (display "TinyScheme has extensions disabled. Enable them!!") 28 | (newline) 29 | (quit))) 30 | 31 | ; load TinyScheme Extensions 32 | (load-extension "tsx-1.0/tsx") 33 | 34 | ; check that the necessary functions are available (the user 35 | ; might have removed some functionality...) 36 | (if (or 37 | (not (defined? 'getenv)) 38 | (not (defined? 'make-client-socket)) 39 | (not (defined? 'send)) 40 | (not (defined? 'close-socket)) 41 | (not (defined? 'recv-new-string))) 42 | (begin 43 | (display "Some necessary functions are not available. Exiting!") 44 | (newline) 45 | (quit))) 46 | 47 | ; get current user name 48 | (define user-name (getenv "USER")) 49 | 50 | ; if unable to get user name, use "nobody" 51 | (if (not user-name) 52 | (set! user-name "nobody")) 53 | 54 | ; create client socket to SMTP port (25) 55 | (define sock (make-client-socket "localhost" 25)) 56 | (display "Socket: ") (display sock) (newline) 57 | 58 | ; if unable to open socket, exit TinyScheme 59 | (if (not sock) 60 | (begin 61 | (display "Unable to open socket! Is SMTP enabled on this machine?") 62 | (quit))) 63 | 64 | ; define string buffers to send and receive 65 | (define recv-buf '()) 66 | 67 | ; receive SMTP welcome message onto recv-buf var 68 | (set! recv-buf (recv-new-string sock)) 69 | 70 | (display "Received:") (display recv-buf) (newline) 71 | 72 | (define helo "HELO localhost\n") 73 | (display "Sending HELO...") (newline) 74 | (send sock helo) 75 | 76 | ; receive response from server 77 | (set! recv-buf (recv-new-string sock)) 78 | (display "Received:") (display recv-buf) (newline) 79 | 80 | (define mailfrom (make-string (+ 20 (string-length user-name)))) 81 | (define mailfromport (open-output-string mailfrom)) 82 | (display "MAIL FROM: " mailfromport) 83 | (display user-name mailfromport) 84 | (display "\n" mailfromport) 85 | (close-output-port mailfromport) 86 | 87 | ; send MAIL FROM: command and receive response 88 | (display "Sending MAIL FROM:...") (newline) 89 | (send sock mailfrom) 90 | (set! recv-buf (recv-new-string sock)) 91 | (display "Received:") (display recv-buf) (newline) 92 | 93 | ; send RCPT TO: command and receive response 94 | (display "Sending RCPT TO:...") (newline) 95 | (define rcptto (make-string (+ 20 (string-length user-name)))) 96 | (define rcpttoport (open-output-string rcptto)) 97 | (display "RCPT TO: " rcpttoport) 98 | (display user-name rcpttoport) 99 | (display "\n" rcpttoport) 100 | (close-output-port rcpttoport) 101 | (send sock rcptto) 102 | (set! recv-buf (recv-new-string sock)) 103 | (display "Received:") (display recv-buf) (newline) 104 | 105 | 106 | ; send DATA command 107 | (display "Sending DATA...") (newline) 108 | (define data "DATA\n") 109 | (send sock data) 110 | (set! recv-buf (recv-new-string sock)) 111 | (display "Received:") (display recv-buf) (newline) 112 | 113 | ; send message 114 | (display "Sending message...") (newline) 115 | (define message "Hello!\nThis is a sample message sent from TinyScheme!\n\n.\n") 116 | (send sock message) 117 | (set! recv-buf (recv-new-string sock)) 118 | (display "Received:") (display recv-buf) (newline) 119 | 120 | ; send QUIT command 121 | (display "Sending QUIT command...") (newline) 122 | (define quit "QUIT\n") 123 | (send sock quit) 124 | (set! recv-buf (recv-new-string sock)) 125 | (display "Received:") (display recv-buf) (newline) 126 | 127 | ; close socket 128 | (close-socket sock) 129 | 130 | -------------------------------------------------------------------------------- /ext/tsx/srepl.scm: -------------------------------------------------------------------------------- 1 | ; srepl.scm 2 | ; Sample usage of TinyScheme Extensions 3 | ; This program provides a socket-based read-eval-print-loop. 4 | 5 | ; It uses the following TinyScheme Extension functions: 6 | ; make-server-socket 7 | ; used to create server socket on port 9000 8 | ; accept 9 | ; used to accept client requests for connection 10 | ; recv-new-string 11 | ; used to receive user's requests 12 | ; send 13 | ; used to send evaluation results 14 | ; close-socket 15 | ; used to free socket at the end 16 | 17 | ; check that string ports are available... 18 | (if (not (defined? 'open-output-string)) 19 | (begin 20 | (display "We need string ports!! Recompile TinyScheme with string ports,") 21 | (display " if you want to run this sample...") 22 | (quit))) 23 | 24 | ; check that extensions are enabled 25 | (if (not (defined? 'load-extension)) 26 | (begin 27 | (display "TinyScheme has extensions disabled. Enable them!!") 28 | (newline) 29 | (quit))) 30 | 31 | ; load TinyScheme Extensions 32 | (load-extension "tsx-1.0/tsx") 33 | 34 | ; check that the necessary functions are available (the user 35 | ; might have removed some functionality...) 36 | (if (or 37 | (not (defined? 'make-server-socket)) 38 | (not (defined? 'accept)) 39 | (not (defined? 'send)) 40 | (not (defined? 'close-socket)) 41 | (not (defined? 'recv-new-string))) 42 | (begin 43 | (display "Some necessary functions are not available. Exiting!") 44 | (newline) 45 | (quit))) 46 | 47 | ; create server socket on port 9000 48 | (define server-socket (make-server-socket 9000)) 49 | 50 | ; wait for client requests 51 | (define connected-socket (accept server-socket)) 52 | 53 | ; send welcome message 54 | (send connected-socket "Welcome to TinyScheme Extensions socket-REPL!\n") 55 | 56 | ; define auxiliary variables 57 | (define command '()) 58 | (define command-port '()) 59 | (define result '()) 60 | (define result-port '()) 61 | (define to-eval '()) 62 | 63 | (define extenv (current-environment)) 64 | (let repl () 65 | (send connected-socket "> ") 66 | (set! command (recv-new-string connected-socket)) 67 | (set! command-port (open-input-string command)) 68 | (set! to-eval (read command-port)) 69 | (set! result (make-string 250)) 70 | (set! result-port (open-output-string result)) 71 | (display (eval to-eval) result-port) 72 | (send connected-socket result) 73 | (send connected-socket "\n") 74 | (close-input-port command-port) 75 | (close-output-port result-port) 76 | (repl) 77 | ) 78 | -------------------------------------------------------------------------------- /ext/tsx/tsx-functions.txt: -------------------------------------------------------------------------------- 1 | TinyScheme Extensions (TSX) 1.1 [September, 2002] 2 | (c) 2002 Manuel Heras-Gilsanz (manuel@heras-gilsanz.com) 3 | 4 | This software is subject to the license terms contained in the 5 | LICENSE file. 6 | 7 | 8 | TSX FUNCTIONS 9 | 10 | TSX incorporates the following functions: 11 | 12 | *Sockets (included if HAVE_SOCKETS is defined in tsx.h) 13 | 14 | (make-client-socket host port) 15 | host: string (IP address or host name) 16 | port: integer number 17 | 18 | Returns a socket which is already connected to the 19 | specified host and port, or #f if the connection could 20 | not be performed. 21 | 22 | (make-server-socket port) 23 | port: integer number 24 | 25 | Returns a socket which is bound to the specified port on 26 | the local machine, and ready to accept connections. If the 27 | socket could not be created (e.g., because the port is 28 | already in use, or it is a privileged port and the user has 29 | no permissions on it), #f is returned. 30 | 31 | (recv! sock buff) 32 | sock: socket obtained with make-client-socket or accept 33 | buff: string 34 | 35 | Waits for received data through the specified socket, and 36 | stores it on the buffer. The return value indicates the 37 | number of received bytes. This call blocks until some data 38 | is received, but does not guarantee that buff gets 39 | completely filled. If an error occurs (e.g., the other 40 | peer disconnects) then #f is returned. 41 | 42 | (recv-new-string sock) 43 | sock: socket obtained with make-client-socket or accept 44 | 45 | Waits for received data through the specified socket, and 46 | returns it in a new string. This call blocks until some 47 | data is received. If an error occurs, then #f is returned. 48 | 49 | (send sock buff) 50 | sock: socket obtained with make-client-socket or accept 51 | buff: string 52 | 53 | Sends the data contained in the string through the socket. 54 | It returns the number of transmitted bytes (could be 55 | different than the size of the string!), or #f if an error 56 | occured (e.g., the other peer disconnected). 57 | 58 | (accept server-sock) 59 | server-sock: socket obtained with make-server-socket 60 | 61 | Waits until a connection is received on the specified 62 | server socket, and returns the connected socket. If an 63 | error occurs (e.g., the network interface shuts down), it 64 | returns #f instead. 65 | 66 | (close-socket sock) 67 | sock: socket obtained with make-server-socket, 68 | make-client-socket or accept 69 | 70 | The socket is closed. No further calls should be performed 71 | on this socket. 72 | 73 | (sock-is-data-ready? sock) 74 | sock: socket obtained with make-server-socket, 75 | make-client-socket or accept 76 | 77 | This function allows non-blocking operation with sockets. 78 | It returns #t if data is available for reception on this 79 | socket, and #f if no data has been received. 80 | 81 | (sock-peek sock) 82 | sock: socket obtained with make-server-socket, 83 | make-client-socket or accept 84 | 85 | This function returns (as a newly created string) the 86 | data received in this socket. The information is not 87 | removed from the input queue. 88 | 89 | *File system (included if HAVE_FILESYSTEM is defined in tsx.h) 90 | 91 | Scheme already defines functions to read and write files. These 92 | functions allow access to the filesystem to check if a certain 93 | file exists, to get its size, etc. 94 | 95 | (file-size filename) 96 | filename: string 97 | 98 | This function returns the size (in bytes) of the 99 | indicated file, or #f if the file does not exists or 100 | is not accessible to the requesting user. 101 | 102 | (file-exists? filename) 103 | filename: string 104 | 105 | This function returns #t if the indicated file exists, and 106 | #f if it does not exists or it is not accessible to the 107 | requesting user. 108 | 109 | (delete-file filename) 110 | filename: string 111 | 112 | Removes the specified file. It returns #t if the operation 113 | succeeds, or #f otherwise (e.g., because the file is 114 | read-only, or because the file does not exist). 115 | 116 | (open-dir-stream path) 117 | path: string 118 | 119 | Opens a "directory stream" on the provided directory path. 120 | This stream will provide all the files within the directory, 121 | using the function read-dir-entry. The stream should be closed 122 | at the end with close-dir-stream. 123 | 124 | (read-dir-entry dirstream) 125 | dirstream: directory stream, obtained with open-dir-stream. 126 | 127 | It returns the name of the following directory entry, or eof 128 | if all the entries were provided. Check the return value with 129 | with eof-object?. 130 | 131 | (close-dir-stream dirstream) 132 | dirstream: directory stream, obtained with open-dir-stream. 133 | 134 | Close directory stream. No further calls to read-dir-entry should 135 | be performed. 136 | 137 | 138 | *Time (available if HAVE_TIME is defined in tsx.h) 139 | 140 | (time) 141 | Returns the current local time, as a list of integer 142 | containing: 143 | (year month day-of-month hour min sec millisec) 144 | The year is expressed as an offsett from 1900. 145 | 146 | (gettimeofday) 147 | Returns a list containing the number of seconds from 148 | the beginning of the day, and microseconds within the 149 | current second. 150 | 151 | (usleep microsec) 152 | microsec: integer 153 | 154 | Suspends execution of the calling thread during the 155 | specified number of microseconds. 156 | 157 | 158 | *Miscellaneous functions (available if HAVE_MISC is defined) 159 | 160 | (getenv varname) 161 | varname: string 162 | 163 | Returns a string with the content of the specified 164 | environment variable, or #f if the variable is not 165 | defined. 166 | 167 | (system command) 168 | command: string 169 | 170 | Executes a command on the /bin/sh shell. Returns #f if 171 | it is unable to run /bin/sh or another error occurs, 172 | or an integer return code which is the value returned 173 | by the command to the shell. 174 | 175 | END 176 | 177 | -------------------------------------------------------------------------------- /ext/tsx/tsx.c: -------------------------------------------------------------------------------- 1 | /* TinyScheme Extensions 2 | * (c) 2002 Visual Tools, S.A. 3 | * Manuel Heras-Gilsanz (manuel@heras-gilsanz.com) 4 | * 5 | * This software is subject to the terms stated in the 6 | * LICENSE file. 7 | */ 8 | 9 | #include "miniscm.h" 10 | #include 11 | #ifdef _WIN32 12 | #include 13 | #include 14 | #include 15 | #include 16 | #else 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #endif 29 | #include "tsx.h" 30 | 31 | #ifdef HAVE_MISC 32 | pointer foreign_getenv(pointer args) 33 | { 34 | pointer first_arg; 35 | pointer ret; 36 | char * varname; 37 | char * value; 38 | 39 | if(args == NIL) 40 | { 41 | return F; 42 | } 43 | 44 | first_arg = car(args); 45 | 46 | if(!is_string(first_arg)) 47 | { 48 | return F; 49 | } 50 | 51 | varname = strvalue(first_arg); 52 | value = getenv(varname); 53 | if (0 == value) 54 | { 55 | ret = F; 56 | } 57 | else 58 | { 59 | ret = mk_string(value); 60 | } 61 | return ret; 62 | } 63 | 64 | pointer foreign_system(pointer args) 65 | { 66 | pointer first_arg; 67 | char * command; 68 | int retcode; 69 | 70 | if(args == NIL) 71 | return F; 72 | 73 | first_arg = car(args); 74 | if(!is_string(first_arg)) 75 | return F; 76 | 77 | command = strvalue(first_arg); 78 | if(0 == command) 79 | return F; 80 | 81 | retcode = system(command); 82 | if( (127 == retcode) || (-1 == retcode) ) 83 | return F; 84 | 85 | return (mk_integer(retcode)); 86 | } 87 | #endif /* defined (HAVE_MISC) */ 88 | 89 | #ifdef HAVE_FILESYSTEM 90 | pointer foreign_filesize(pointer args) 91 | { 92 | pointer first_arg; 93 | pointer ret; 94 | struct stat buf; 95 | char * filename; 96 | int retcode; 97 | 98 | if(args == NIL) 99 | return F; 100 | 101 | first_arg = car(args); 102 | if(!is_string(first_arg)) { 103 | return F; 104 | } 105 | 106 | filename = strvalue(first_arg); 107 | retcode = stat(filename, &buf); 108 | if (0 == retcode) 109 | { 110 | ret = mk_integer(buf.st_size); 111 | } 112 | else 113 | { 114 | ret = F; 115 | } 116 | return ret; 117 | } 118 | 119 | pointer foreign_fileexists(pointer args) 120 | { 121 | pointer first_arg; 122 | pointer ret; 123 | struct stat buf; 124 | char * filename; 125 | int retcode; 126 | 127 | if(args == NIL) 128 | return F; 129 | 130 | first_arg = car(args); 131 | if(!is_string(first_arg)) { 132 | return F; 133 | } 134 | 135 | filename = strvalue(first_arg); 136 | retcode = stat(filename, &buf); 137 | if (0 == retcode) 138 | { 139 | ret = T; 140 | } 141 | else 142 | { 143 | ret = F; 144 | } 145 | return ret; 146 | } 147 | 148 | pointer foreign_deletefile(pointer args) 149 | { 150 | pointer first_arg; 151 | pointer ret; 152 | char * filename; 153 | int retcode; 154 | 155 | if(args == NIL) 156 | return F; 157 | 158 | first_arg = car(args); 159 | if(!is_string(first_arg)) { 160 | return F; 161 | } 162 | 163 | filename = strvalue(first_arg); 164 | #ifdef _WIN32 165 | retcode = _unlink(filename); 166 | #else 167 | retcode = unlink(filename); 168 | #endif 169 | if (0 == retcode) { 170 | ret = T; 171 | } 172 | else { 173 | ret = F; 174 | } 175 | return ret; 176 | } 177 | 178 | pointer foreign_opendirstream(pointer args) 179 | { 180 | pointer first_arg; 181 | char * dirpath; 182 | #ifdef _WIN32 183 | long dir; 184 | struct _finddata_t entry; 185 | #else 186 | DIR * dir; 187 | #endif 188 | 189 | if(args == NIL) 190 | return F; 191 | 192 | first_arg = car(args); 193 | if(!is_string(first_arg)) 194 | return F; 195 | 196 | dirpath = strvalue(first_arg); 197 | 198 | #ifdef _WIN32 199 | dir = _findfirst(dirpath, &entry); 200 | if(-1L == dir) 201 | #else 202 | dir = opendir(dirpath); 203 | if(0 == dir) 204 | #endif 205 | return F; 206 | 207 | return (mk_integer((int) dir)); 208 | } 209 | 210 | pointer foreign_readdirentry(pointer args) 211 | { 212 | pointer first_arg; 213 | #ifdef _WIN32 214 | long dir; 215 | struct _finddata_t entry; 216 | #else 217 | DIR * dir; 218 | struct dirent * entry; 219 | #endif 220 | 221 | if(args == NIL) 222 | return F; 223 | 224 | first_arg = car(args); 225 | if(!is_number(first_arg)) 226 | return F; 227 | 228 | #ifdef _WIN32 229 | dir = (long) ivalue(first_arg); 230 | if(-1L == dir) 231 | #else 232 | dir = (DIR *) ivalue(first_arg); 233 | if(0 == dir) 234 | #endif 235 | return F; 236 | 237 | #ifdef _WIN32 238 | dir = _findnext(dir, &entry); 239 | if(-1L == dir) 240 | #else 241 | entry = readdir(dir); 242 | if(0 == entry) 243 | #endif 244 | return EOF_OBJ; 245 | 246 | #ifdef _WIN32 247 | return (mk_string(entry.name)); 248 | #else 249 | return (mk_string(entry->d_name)); 250 | #endif 251 | } 252 | 253 | pointer foreign_closedirstream(pointer args) 254 | { 255 | pointer first_arg; 256 | #ifdef _WIN32 257 | long dir; 258 | #else 259 | DIR * dir; 260 | #endif 261 | 262 | if(args == NIL) 263 | return F; 264 | 265 | first_arg = car(args); 266 | if(!is_number(first_arg)) 267 | return F; 268 | 269 | #ifdef _WIN32 270 | dir = (long) ivalue(first_arg); 271 | if(-1L == dir) 272 | #else 273 | dir = (DIR *) ivalue(first_arg); 274 | if(0 == dir) 275 | #endif 276 | return F; 277 | 278 | #ifdef _WIN32 279 | _findclose(dir); 280 | #else 281 | closedir(dir); 282 | #endif 283 | return T; 284 | } 285 | #endif /* defined (HAVE_FILESYSTEM) */ 286 | 287 | #ifdef HAVE_TIME 288 | pointer foreign_time(pointer args) 289 | { 290 | time_t now; 291 | struct tm * now_tm; 292 | pointer ret; 293 | 294 | if(args != NIL) 295 | { 296 | return F; 297 | } 298 | 299 | time(&now); 300 | now_tm = localtime(&now); 301 | 302 | ret = cons(mk_integer((long) now_tm->tm_year), 303 | cons(mk_integer((long) now_tm->tm_mon), 304 | cons(mk_integer((long) now_tm->tm_mday), 305 | cons(mk_integer((long) now_tm->tm_hour), 306 | cons(mk_integer((long) now_tm->tm_min), 307 | cons(mk_integer((long) now_tm->tm_sec),NIL)))))); 308 | 309 | return ret; 310 | } 311 | 312 | pointer foreign_gettimeofday(pointer args) 313 | { 314 | struct timeval tv; 315 | pointer ret; 316 | 317 | #ifdef _WIN32 318 | struct _timeb tb; 319 | 320 | _ftime(&tb); 321 | tv.tv_sec = (long) tb.time; 322 | tv.tv_usec = tb.millitm * 1000; 323 | #else 324 | gettimeofday(&tv, 0); 325 | #endif 326 | 327 | ret = cons(mk_integer((long) tv.tv_sec), 328 | cons(mk_integer((long) tv.tv_usec), 329 | NIL)); 330 | 331 | return ret; 332 | } 333 | 334 | pointer foreign_usleep(pointer args) 335 | { 336 | pointer first_arg; 337 | long usec; 338 | 339 | if(args == NIL) 340 | return F; 341 | 342 | first_arg = car(args); 343 | if(!is_integer(first_arg)) { 344 | return F; 345 | } 346 | 347 | usec = ivalue(first_arg); 348 | #ifdef _WIN32 349 | if (usec > 0) { 350 | HANDLE hTimer; 351 | LARGE_INTEGER dueTime; 352 | 353 | dueTime.QuadPart = -10LL * usec; 354 | 355 | hTimer = CreateWaitableTimer(NULL, TRUE, NULL); 356 | SetWaitableTimer(hTimer, &dueTime, 0, NULL, NULL, 0); 357 | WaitForSingleObject(hTimer, INFINITE); 358 | CloseHandle(hTimer); 359 | } 360 | #else 361 | usleep(usec); 362 | #endif 363 | 364 | return T; 365 | } 366 | #endif /* defined (HAVE_TIME) */ 367 | 368 | #ifdef HAVE_SOCKETS 369 | pointer foreign_makeclientsocket(pointer args) 370 | { 371 | pointer first_arg; 372 | pointer second_arg; 373 | pointer ret; 374 | struct sockaddr_in address; 375 | struct in_addr inaddr; 376 | struct hostent * host; 377 | char * hostname; 378 | int retcode; 379 | long port; 380 | int sock; 381 | #ifdef _WIN32 382 | int size = sizeof(address); 383 | #endif 384 | 385 | if(args == NIL) 386 | return F; 387 | 388 | first_arg = car(args); 389 | if(!is_string(first_arg)) { 390 | return F; 391 | } 392 | args = cdr(args); 393 | second_arg = car(args); 394 | if(!is_number(second_arg)) { 395 | return F; 396 | } 397 | 398 | hostname = strvalue(first_arg); 399 | port = ivalue(second_arg); 400 | 401 | #ifdef _WIN32 402 | /* inet_pton() is not implemented in Windows XP, 2003 */ 403 | if (WSAStringToAddress(hostname, AF_INET, NULL, (struct sockaddr *)&address, &size) == 0) { 404 | inaddr = address.sin_addr; 405 | #else 406 | if(inet_aton(hostname, &inaddr)) { 407 | #endif 408 | host = gethostbyaddr((char *) &inaddr, sizeof(inaddr), AF_INET); 409 | } else 410 | host = gethostbyname(hostname); 411 | 412 | if(0 == host) { 413 | return F; 414 | } 415 | 416 | sock = socket(PF_INET, SOCK_STREAM, 0); 417 | if(-1==sock) { 418 | return F; 419 | } 420 | 421 | address.sin_family = AF_INET; 422 | address.sin_port = htons((u_short) port); 423 | memcpy(&address.sin_addr, host->h_addr_list[0], sizeof(address.sin_addr)); 424 | 425 | retcode = connect(sock, (struct sockaddr *)&address, sizeof(address)); 426 | if (0 == retcode) { 427 | ret = mk_integer(sock); 428 | } 429 | else { 430 | ret = F; 431 | } 432 | return ret; 433 | } 434 | 435 | pointer foreign_makeserversocket(pointer args) 436 | { 437 | pointer first_arg; 438 | struct sockaddr_in address; 439 | long port; 440 | #ifdef _WIN32 441 | const char one = 1; 442 | #else 443 | int one = 1; 444 | #endif 445 | int sock; 446 | 447 | if(args == NIL) 448 | return F; 449 | 450 | first_arg = car(args); 451 | if(!is_number(first_arg)) { 452 | return F; 453 | } 454 | 455 | port = ivalue(first_arg); 456 | 457 | sock = socket(PF_INET, SOCK_STREAM, 0); 458 | if(-1==sock) { 459 | return F; 460 | } 461 | 462 | setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); 463 | 464 | address.sin_family = AF_INET; 465 | address.sin_port = htons((u_short) port); 466 | memset(&address.sin_addr, 0, sizeof(address.sin_addr)); 467 | 468 | if(bind(sock, (struct sockaddr *) &address, sizeof(address))) { 469 | return F; 470 | } 471 | 472 | if(listen(sock, 1)) { 473 | return F; 474 | } 475 | 476 | return (mk_integer(sock)); 477 | } 478 | 479 | pointer foreign_recv(pointer args) 480 | { 481 | pointer first_arg; 482 | pointer second_arg; 483 | int sock; 484 | char * buf; 485 | pointer ret; 486 | int retcode; 487 | 488 | if(args == NIL) 489 | return F; 490 | 491 | first_arg = car(args); 492 | if(!is_number(first_arg)) { 493 | return F; 494 | } 495 | args = cdr(args); 496 | second_arg = car(args); 497 | if(!is_string(second_arg)) { 498 | return F; 499 | } 500 | 501 | sock = ivalue(first_arg); 502 | buf = strvalue(second_arg); 503 | 504 | retcode = recv(sock, buf, strlen(buf), 0); 505 | if (-1 == retcode) { 506 | ret = F; 507 | } 508 | else { 509 | ret = mk_integer(retcode); 510 | } 511 | 512 | return ret; 513 | } 514 | 515 | pointer foreign_recvnewbuf(pointer args) 516 | { 517 | pointer first_arg; 518 | int sock; 519 | pointer ret; 520 | int lenreceived; 521 | char buf[2500]; 522 | 523 | if(args == NIL) return F; 524 | 525 | first_arg = car(args); 526 | if(!is_number(first_arg)) return F; 527 | 528 | sock = ivalue(first_arg); 529 | 530 | lenreceived = recv(sock, buf, sizeof(buf) - 1, 0); 531 | if (-1 == lenreceived) return F; 532 | 533 | buf[lenreceived] = 0; 534 | ret = mk_string(buf); 535 | 536 | return ret; 537 | } 538 | 539 | pointer foreign_isdataready(pointer args) 540 | { 541 | pointer first_arg; 542 | int sock; 543 | struct timeval tv; 544 | fd_set fds; 545 | fd_set fdsin; 546 | 547 | if(args == NIL) return F; 548 | 549 | first_arg = car(args); 550 | if(!is_number(first_arg)) return F; 551 | 552 | sock = ivalue(first_arg); 553 | 554 | tv.tv_sec = 0; 555 | tv.tv_usec = 0; 556 | 557 | FD_ZERO(&fds); 558 | FD_SET(sock, &fds); 559 | fdsin = fds; 560 | if (select(1+sock, &fdsin, NULL, NULL, &tv) < 0) 561 | { 562 | return F; 563 | } 564 | if (FD_ISSET(sock, &fdsin)) 565 | return T; 566 | return F; 567 | } 568 | 569 | pointer foreign_sockpeek(pointer args) 570 | { 571 | pointer first_arg; 572 | int sock; 573 | pointer ret; 574 | int lenreceived; 575 | char buf[2500]; 576 | 577 | if(args == NIL) return F; 578 | 579 | first_arg = car(args); 580 | if(!is_number(first_arg)) return F; 581 | 582 | sock = ivalue(first_arg); 583 | 584 | lenreceived = recv(sock, buf, sizeof(buf) - 1, MSG_PEEK); 585 | if (-1 == lenreceived) return F; 586 | 587 | buf[lenreceived] = 0; 588 | ret = mk_string(buf); 589 | 590 | return ret; 591 | } 592 | 593 | pointer foreign_send(pointer args) 594 | { 595 | pointer first_arg; 596 | pointer second_arg; 597 | int sock; 598 | char * buf; 599 | pointer ret; 600 | int retcode; 601 | 602 | if(args == NIL) 603 | return F; 604 | 605 | first_arg = car(args); 606 | if(!is_number(first_arg)) { 607 | return F; 608 | } 609 | args = cdr(args); 610 | second_arg = car(args); 611 | if(!is_string(second_arg)) { 612 | return F; 613 | } 614 | 615 | sock = ivalue(first_arg); 616 | buf = strvalue(second_arg); 617 | 618 | retcode = send(sock, buf, strlen(buf), 0); 619 | if (-1 == retcode) { 620 | ret = F; 621 | } 622 | else { 623 | ret = mk_integer(retcode); 624 | } 625 | 626 | return ret; 627 | } 628 | 629 | pointer foreign_accept(pointer args) 630 | { 631 | pointer first_arg; 632 | int sock; 633 | struct sockaddr_in addr; 634 | pointer ret; 635 | socklen_t addr_len = sizeof(struct sockaddr_in); 636 | int retcode; 637 | 638 | if(args == NIL) 639 | return F; 640 | 641 | first_arg = car(args); 642 | if(!is_number(first_arg)) { 643 | return F; 644 | } 645 | 646 | sock = ivalue(first_arg); 647 | 648 | retcode = accept(sock, (struct sockaddr *)&addr, &addr_len); 649 | if (-1 == retcode) { 650 | ret = F; 651 | } 652 | else { 653 | ret = mk_integer(retcode); 654 | } 655 | 656 | return ret; 657 | } 658 | 659 | pointer foreign_closesocket(pointer args) 660 | { 661 | pointer first_arg; 662 | int sock; 663 | int retcode; 664 | 665 | if(args == NIL) 666 | return F; 667 | 668 | first_arg = car(args); 669 | if(!is_number(first_arg)) 670 | return F; 671 | 672 | sock = ivalue(first_arg); 673 | 674 | #ifdef _WIN32 675 | retcode = _close(sock); 676 | #else 677 | retcode = close(sock); 678 | #endif 679 | if (-1 == retcode) 680 | return F; 681 | 682 | return T; 683 | } 684 | #endif /* defined (HAVE_SOCKETS) */ 685 | 686 | 687 | /* This function gets called when MiniScheme is loading the extension */ 688 | void init_tsx(void) 689 | { 690 | #ifdef HAVE_MISC 691 | scheme_register_foreign_func("getenv", foreign_getenv); 692 | scheme_register_foreign_func("system", foreign_system); 693 | #endif /* defined (HAVE_MISC) */ 694 | #ifdef HAVE_TIME 695 | scheme_register_foreign_func("time", foreign_time); 696 | scheme_register_foreign_func("gettimeofday", foreign_gettimeofday); 697 | scheme_register_foreign_func("usleep", foreign_usleep); 698 | #endif /* defined (HAVE_TIME) */ 699 | #ifdef HAVE_FILESYSTEM 700 | scheme_register_foreign_func("file-size", foreign_filesize); 701 | scheme_register_foreign_func("file-exists?", foreign_fileexists); 702 | scheme_register_foreign_func("delete-file", foreign_deletefile); 703 | scheme_register_foreign_func("open-dir-stream", foreign_opendirstream); 704 | scheme_register_foreign_func("read-dir-entry", foreign_readdirentry); 705 | scheme_register_foreign_func("close-dir-stream", foreign_closedirstream); 706 | #endif /* defined (HAVE_FILESYSTEM) */ 707 | #ifdef HAVE_SOCKETS 708 | scheme_register_foreign_func("make-client-socket", foreign_makeclientsocket); 709 | scheme_register_foreign_func("make-server-socket", foreign_makeserversocket); 710 | scheme_register_foreign_func("recv!", foreign_recv); 711 | scheme_register_foreign_func("recv-new-string", foreign_recvnewbuf); 712 | scheme_register_foreign_func("sock-peek", foreign_sockpeek); 713 | scheme_register_foreign_func("sock-is-data-ready?", foreign_isdataready); 714 | scheme_register_foreign_func("send", foreign_send); 715 | scheme_register_foreign_func("accept", foreign_accept); 716 | scheme_register_foreign_func("close-socket", foreign_closesocket); 717 | #endif /* defined (HAVE_SOCKETS) */ 718 | } 719 | -------------------------------------------------------------------------------- /ext/tsx/tsx.h: -------------------------------------------------------------------------------- 1 | /* TinyScheme Extensions 2 | * (c) 2001 Manuel Heras-Gilsanz 3 | * 4 | * This software is subject to the terms stated in the 5 | * LICENSE file. 6 | */ 7 | 8 | /* Comment those #defines whose functionality you don't 9 | * want to include. 10 | */ 11 | 12 | /* Comment the following line if you don't need sockets */ 13 | #define HAVE_SOCKETS 14 | 15 | /* Comment the following line if you don't need filesystem 16 | * functionality (file-size, file-exists?, etc). 17 | */ 18 | #define HAVE_FILESYSTEM 19 | 20 | /* Comment the following line if you don't need time functions */ 21 | #define HAVE_TIME 22 | 23 | /* Comment the following line if you don't need getenv and system */ 24 | #define HAVE_MISC 25 | -------------------------------------------------------------------------------- /src/README: -------------------------------------------------------------------------------- 1 | ===================================================================== 2 | 3 | ---------- Mini-Scheme Interpreter Version 0.85 ---------- 4 | 5 | coded by Atsushi Moriwaki (11/5/1989) 6 | 7 | E-MAIL : moriwaki@kurims.kurims.kyoto-u.ac.jp 8 | MIX : riemann 9 | NIFTY : PBB01074 10 | (Note that these addresses are now obsolete, see below) 11 | 12 | ===================================================================== 13 | 14 | Revised by Akira KIDA 15 | 16 | Version 0.85k4 (17 May 1994) 17 | Version 0.85k3 (30 Nov 1989) 18 | Version 0.85k2 (28 Nov 1989) 19 | Version 0.85k1 (14 Nov 1989) 20 | 21 | Mini-Scheme is now maintained by Akira KIDA. 22 | 23 | E-Mail : SDI00379@niftyserve.or.jp 24 | 25 | Most part of this document is written by Akira KIDA. 26 | Send comments/requests/bug reports to Akira KIDA at the above 27 | email address. 28 | 29 | ===================================================================== 30 | 31 | This Mini-Scheme Interpreter is based on "SCHEME Interpreter in 32 | Common Lisp" in Appendix of T.Matsuda & K.Saigo, Programming of LISP, 33 | archive No5 (1987) p6 - p42 (published in Japan). 34 | 35 | 36 | Copyright Notice: 37 | THIS SOFTWARE IS PLACED IN THE PUBLIC DOMAIN BY THE AUTHOR. 38 | 39 | This software is completely free to copy, modify and/or re-distribute. 40 | But I (Atsushi Moriwaki) would appreciate it if you left my name on the 41 | code as the author. 42 | 43 | DISCLAIMER: 44 | THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 45 | IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 46 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 47 | PURPOSE. 48 | 49 | 50 | Supported features (or, NOT supported features :-) 51 | 1) Lists, symbols, strings. 52 | However, strings have very limited capability. 53 | For instance, there is *NO* string-ref, string-set!, ... etc. 54 | 2) Numbers are limited to FIXNUM only. 55 | There is *NO* complex, real, rational and even bignum. 56 | 3) Macro feature is supported, though not the one defined in R4RS. 57 | 58 | Known problems: 59 | 1) Poor error recovery from illegal use of syntax and procedure. 60 | 2) Certain procedures do not check its argument type. 61 | 62 | Installation: 63 | 1) Select system declaration and configuration options by editing 64 | source file. 65 | 66 | You may choose one of the following systems by #define'ing 67 | the preprocessor symbol. 68 | 69 | Supported systems are: 70 | Macintosh: 71 | LSC -- LightSpeed C (3.0) for Macintosh 72 | LSC4 -- LightSpeed C (4.0) for Macintosh 73 | They are different in #include header only. 74 | I (kida) think THINK C 5.0, 6.0, 7.0 may be OK 75 | with LSC4 configuration, though not tested. 76 | MPW2 -- Macintosh Programmer's Workshop v2.0x 77 | I don't tested v3.x or later. 78 | DOS: 79 | MSC4 -- Microsoft C v4.0 (use /AL) 80 | MSC v5.1, v6.0, v7.0 are all OK. 81 | TURBO2 -- Bolarnd's Turbo C v2.0 (use -ml) 82 | Turbo C++ 1.0 is OK. 83 | UNIX: 84 | BSD -- UNIX of BSD flavor, ex. SuOS 4.x 85 | SYSV -- UNIX of System-V flavor, ex. Sun/Solaris 2.x 86 | 87 | VAX/VMS: 88 | VAXC -- VAX-C v3.x (this symbol may be defined by the 89 | compiler automatically). 90 | 91 | 2) Configure some preprocessor symbols by editing source files. 92 | 93 | Configurable #define's are: 94 | 95 | #define VERBOSE 96 | -- if defined, GC messages is verbose on default. 97 | 98 | #define AVOID_HACK_LOOP 99 | -- if defined, do _NOT_ use loop construction in the 100 | form 101 | do { ... } while (0) 102 | This form is used in macro expansion, since this is 103 | the best "safety" blocking construct when used in 104 | macro definition. 105 | However, some compiler (including SunPRO CC 2.0.1) 106 | is silly enough to warning this construct, like 107 | "warning: end-of-loop code not reached", etc. 108 | If you dislike such warning, please define this symbol. 109 | NOTE: You may get some "statement not reached" warning 110 | even if you have define this symbol. Please be patient, 111 | or, use more smart compiler. 112 | In short if you use GCC, undefine this and forget it 113 | at all. 114 | 115 | #define USE_SETJMP 116 | -- if defined, use setjmp to global jump on error. 117 | if not defined, avoid to use it. Compiled with 118 | this symbol defined, the interpreter issue fatal 119 | error and return to the operating system immediately 120 | when we run out of free cells. By default, i.e., 121 | compiled with this symbol is not defined, the 122 | interpreter will just return to the top level in 123 | such a case. 124 | May not be used except for compiling as Mac DA. 125 | 126 | #define USE_MACRO 127 | -- if defined, macro features are enabled. 128 | 129 | #define USE_QQUOTE 130 | -- if defined, you can use quasi-quote "`" in macro. 131 | You can use macro even if this symbol is undefined. 132 | 133 | 3) Compile! 134 | 135 | I think there is virtually no problem about how to compile. 136 | Since there is exactly one C source file. :-) 137 | If you are on UNIX boxes with some BSD flavors, instead of 138 | using make command, it's enough to type: 139 | 140 | cc -DBSD -O -o miniscm miniscm.c 141 | 142 | You may have additional warnings like 'function should 143 | return value'. This is due to omitting 'void' keyword 144 | from the source in order to get pre-ANSI compatibility. 145 | 146 | 147 | Usage : miniscm 148 | 149 | Sorry, no command line argnumet is allowed. 150 | 151 | 152 | Special procedures of this system: 153 | 154 | gc : (gc) -- force garbage collection 155 | 156 | gc-verbose : (gc-verbose bool) -- GC verbose on/off 157 | Argument #f turnes off the GC message. 158 | Enything else turn on the GC message. 159 | 160 | quit : (quit) -- quit to the operating system 161 | 162 | put : (put sym prop expr) 163 | -- set the value of a property of a symbol. 164 | get : (get sym prop) 165 | -- get the value of a property of a symbol. 166 | 167 | new-segment : (new-segment n) 168 | -- allocate n new cell segments. 169 | 170 | print-width : (print-width list) 171 | -- returns 'printed' width of list. 172 | 173 | closure? : (closure? obj) 174 | -- test if obj is a closure or not. 175 | 176 | macro? : (macro? obj) 177 | -- test if obj is a macro or not. 178 | note that a macro is also a closure. 179 | 180 | get-closure-code 181 | : (get-closure-code closure-obj) 182 | -- extract S-expr from closure-obj. 183 | 184 | Scheme files: 185 | init.scm -- Automatically loaded at invocation. 186 | Default setting assumes that this file is in the current 187 | working directory. 188 | Change #define InitFile if you don't like it. 189 | 190 | tools.scm -- This is a sample file. Contains very tiny pretty-print 191 | procedure. 192 | After invoking miniscm, type: 193 | (load "tools.scm") 194 | and try 195 | (pp getd) 196 | (pp do) 197 | 198 | Documents?: 199 | 200 | Sorry, there is no other documents. 201 | Do not ask one for me, please see the source instead. :-) 202 | 203 | But if you _absolutely_ need help, please email to me at: 204 | 205 | 206 | Enjoy! 207 | 208 | -- Akira KIDA 209 | Sysop for FPL in NIFTY-Serve in JAPAN. 210 | (FPL stands for 'Forum for Program-Language') 211 | 212 | -------------------------------------------------------------------------------- /src/bignum.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This software is released under the MIT License, see the LICENSE file. 3 | */ 4 | 5 | #include "bignum.h" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | /* greatest common divisor */ 12 | int64_t gcd(int32_t x, int32_t y) 13 | { 14 | uint32_t ux = (uint32_t)llabs(x), uy = (uint32_t)llabs(y), uz; 15 | while (ux != 0) { 16 | uz = ux; 17 | ux = uy % ux; 18 | uy = uz; 19 | } 20 | return uy; 21 | } 22 | 23 | /* least common multiple */ 24 | int64_t lcm(int32_t x, int32_t y) 25 | { 26 | if (x == 0 || y == 0) { 27 | return 0; 28 | } 29 | return llabs(x / gcd(x, y) * y); 30 | } 31 | 32 | /* the first bit1 position */ 33 | int32_t find1_32(uint32_t val) 34 | { 35 | static const int32_t clz_table_4bit[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; 36 | int32_t n = 32; 37 | 38 | if ((val & 0xFFFF0000) == 0) { n -= 16; val <<= 16; } 39 | if ((val & 0xFF000000) == 0) { n -= 8; val <<= 8; } 40 | if ((val & 0xF0000000) == 0) { n -= 4; val <<= 4; } 41 | 42 | return n - clz_table_4bit[val >> (32 - 4)]; 43 | } 44 | 45 | /* if x == y */ 46 | int32_t bn_eq(uint32_t x[], int32_t colx, uint32_t y[], int32_t coly) 47 | { 48 | int32_t i; 49 | if (colx != coly) { 50 | return 0; 51 | } 52 | for (i = colx - 1; i >= 0; i--) { 53 | if (x[i] != y[i]) { 54 | return 0; 55 | } 56 | } 57 | return 1; 58 | } 59 | 60 | /* if x > y */ 61 | int32_t bn_gt(uint32_t x[], int32_t colx, uint32_t y[], int32_t coly) 62 | { 63 | int32_t i; 64 | if (colx > coly) { 65 | return 1; 66 | } else if (colx < coly) { 67 | return 0; 68 | } 69 | for (i = colx - 1; i >= 0; i--) { 70 | if (x[i] > y[i]) { 71 | return 1; 72 | } else if (x[i] < y[i]) { 73 | return 0; 74 | } 75 | } 76 | return 0; 77 | } 78 | 79 | /* if x >= y */ 80 | int32_t bn_ge(uint32_t x[], int32_t colx, uint32_t y[], int32_t coly) 81 | { 82 | int32_t i; 83 | if (colx > coly) { 84 | return 1; 85 | } else if (colx < coly) { 86 | return 0; 87 | } 88 | for (i = colx - 1; i >= 0; i--) { 89 | if (x[i] > y[i]) { 90 | return 1; 91 | } else if (x[i] < y[i]) { 92 | return 0; 93 | } 94 | } 95 | return 1; 96 | } 97 | 98 | /* z = x + y */ 99 | void bn_add(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly) 100 | { 101 | int32_t i, col = (colx < coly) ? colx : coly; 102 | uint64_t t = 0; 103 | for (i = 0; i < col; i++) { 104 | t = (uint64_t)x[i] + y[i] + (t >> 32); 105 | z[i] = (uint32_t)t; 106 | } 107 | if (colx > coly) { 108 | *colz = colx; 109 | for (; i < colx; i++) { 110 | t = x[i] + (t >> 32); 111 | z[i] = (uint32_t)t; 112 | } 113 | } else { 114 | *colz = coly; 115 | for (; i < coly; i++) { 116 | t = y[i] + (t >> 32); 117 | z[i] = (uint32_t)t; 118 | } 119 | } 120 | if (t >>= 32) { 121 | z[(*colz)++] = (uint32_t)t; 122 | } 123 | } 124 | 125 | /* z = x - y */ 126 | void bn_sub(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly) 127 | { 128 | int32_t i; 129 | uint32_t carry = 0; 130 | for (i = 0; i < coly; i++) { 131 | uint64_t t = (uint64_t)y[i] + carry; 132 | if (x[i] >= t) { 133 | z[i] = (uint32_t)(x[i] - t); 134 | carry = 0; 135 | } else { 136 | z[i] = (uint32_t)(((uint64_t)1 << 32) + x[i] - t); 137 | carry = 1; 138 | } 139 | } 140 | *colz = colx; 141 | for (; i < colx; i++) { 142 | if (x[i] >= carry) { 143 | z[i] = (uint32_t)(x[i] - carry); 144 | carry = 0; 145 | } else { 146 | z[i] = (uint32_t)(((uint64_t)1 << 32) + x[i] - carry); 147 | carry = 1; 148 | } 149 | } 150 | while (*colz > 0) { 151 | if (z[*colz - 1] > 0) { 152 | break; 153 | } 154 | (*colz)--; 155 | } 156 | } 157 | 158 | /* z = x * y */ 159 | void bn_mul(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly) 160 | { 161 | int32_t i, j; 162 | memset(z, 0, sizeof(uint32_t) * (colx + coly)); 163 | for (i = 0; i < colx; i++) { 164 | uint64_t t = 0; 165 | for (j = 0; j < coly; j++) { 166 | t = z[i + j] + (uint64_t)x[i] * y[j] + (t >> 32); 167 | z[i + j] = (uint32_t)t; 168 | } 169 | if (t >>= 32) { 170 | z[i + j] = (uint32_t)t; 171 | } 172 | } 173 | *colz = colx + coly; 174 | while (*colz > 0) { 175 | if (z[*colz - 1] > 0) { 176 | break; 177 | } 178 | (*colz)--; 179 | } 180 | } 181 | 182 | /* z = x^2 */ 183 | void bn_sqr(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx) 184 | { 185 | int32_t i, j = 0; 186 | uint64_t t; 187 | *colz = 2 * colx; 188 | memset(z, 0, sizeof(uint32_t) * *colz); 189 | for (i = 0; i < colx - 1; i++) { 190 | t = 0; 191 | for (j = i + 1; j < colx; j++) { 192 | t = z[i + j] + (uint64_t)x[i] * x[j] + (t >> 32); 193 | z[i + j] = (uint32_t)t; 194 | } 195 | if (t >>= 32) { 196 | z[i + j] = (uint32_t)t; 197 | } 198 | } 199 | for (i = i + j - 1; i >= 0; i--) { 200 | z[i + 1] |= z[i] >> 31; 201 | z[i] = z[i] << 1; 202 | } 203 | t = 0; 204 | for (i = 0; i < colx * 2; i++) { 205 | t = z[i] + (uint64_t)x[i >> 1] * x[i >> 1] + (t >> 32); 206 | z[i++] = (uint32_t)t; 207 | t = z[i] + (t >> 32); 208 | z[i] = (uint32_t)t; 209 | } 210 | while (*colz > 0) { 211 | if (z[*colz - 1] > 0) { 212 | break; 213 | } 214 | (*colz)--; 215 | } 216 | } 217 | 218 | /* z = x << n */ 219 | void bn_sftl(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, int32_t n) 220 | { 221 | int32_t i, q = n / 32, r = n & 0x1F; 222 | 223 | if (r == 0) { 224 | for (i = colx - 1; i >= 0; --i) { 225 | z[i + q] = x[i]; 226 | } 227 | memset(z, 0, sizeof(int32_t) * q); 228 | *colz = colx + q; 229 | } else { 230 | int32_t col = 0; 231 | if (x[colx - 1] >> (32 - r)) { 232 | col++; 233 | z[colx + q] = 0; 234 | } 235 | for (i = colx - 1; i >= 0; --i) { 236 | z[i + q + 1] |= x[i] >> (32 - r); 237 | z[i + q] = x[i] << r; 238 | } 239 | memset(z, 0, sizeof(int32_t) * q); 240 | *colz = colx + q + col; 241 | } 242 | while (*colz > 0) { 243 | if (z[*colz - 1] > 0) { 244 | break; 245 | } 246 | (*colz)--; 247 | } 248 | } 249 | 250 | /* z = x >> n */ 251 | void bn_sftr(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, int32_t n) 252 | { 253 | int32_t i, q = n / 32, r = n & 0x1F; 254 | 255 | if (r == 0) { 256 | if (colx - q <= 0) { 257 | *colz = 0; 258 | } else { 259 | for (i = q; i < colx; i++) { 260 | z[i - q] = x[i]; 261 | } 262 | *colz = colx - q; 263 | } 264 | } else { 265 | if (colx - q <= 0) { 266 | *colz = 0; 267 | } else { 268 | int32_t col = 0; 269 | if ((x[colx - 1] >> r) == 0) { 270 | col--; 271 | } 272 | z[0] = x[q] >> r; 273 | for (i = q + 1; i < colx; i++) { 274 | z[i - q - 1] |= x[i] << (32 - r); 275 | z[i - q] = x[i] >> r; 276 | } 277 | *colz = colx - q + col; 278 | } 279 | } 280 | while (*colz > 0) { 281 | if (z[*colz - 1] > 0) { 282 | break; 283 | } 284 | (*colz)--; 285 | } 286 | } 287 | 288 | /* q = x / y + r */ 289 | void bn_div(uint32_t q[], int32_t *colq, uint32_t r[], int32_t *colr, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly) 290 | { 291 | int32_t i; 292 | if (coly == 1) { 293 | uint64_t t = 0; 294 | for (i = colx - 1; i >= 0; i--) { 295 | t = t << 32 | x[i]; 296 | q[i] = (uint32_t)(t / y[0]); 297 | t = t % y[0]; 298 | } 299 | *colq = colx; 300 | while (*colq > 0) { 301 | if (q[*colq - 1] > 0) { 302 | break; 303 | } 304 | (*colq)--; 305 | } 306 | r[0] = (int32_t)t; 307 | *colr = 1; 308 | } else { 309 | uint32_t *t_a = r + *colr, *t_b = t_a + 1 + colx, *t_x = t_b + 1 + coly; 310 | int32_t cola, colb, d = 32 - find1_32(y[coly - 1]); 311 | if (d == 0) { 312 | memcpy(t_a, x, sizeof(uint32_t) * colx); 313 | memcpy(t_b, y, sizeof(uint32_t) * coly); 314 | cola = colx; 315 | colb = coly; 316 | } else { 317 | bn_sftl(t_a, &cola, x, colx, d); 318 | bn_sftl(t_b, &colb, y, coly, d); 319 | } 320 | i = cola - colb; 321 | if (i > 0) { 322 | *colq = i; 323 | } else if (i == 0) { 324 | if (bn_ge(t_a, cola, t_b, colb)) { 325 | q[0] = 1; 326 | *colq = 1; 327 | bn_sub(t_a, &cola, t_a, cola, t_b, colb); 328 | } else { 329 | *colq = 0; 330 | } 331 | } else { 332 | *colq = 0; 333 | } 334 | while (--i >= 0) { 335 | if (cola > 1 && bn_gt(t_a, cola, t_b, colb)) { 336 | uint64_t aa = (uint64_t)t_a[cola - 1] << 32 | t_a[cola - 2]; 337 | uint64_t qq = aa / t_b[colb - 1]; 338 | uint32_t q2[2]; 339 | if (cola > 2 && colb > 1) { 340 | uint64_t rr = aa % t_b[colb - 1]; 341 | while ((rr << 32 | t_a[cola - 3]) < qq * t_b[colb - 2]) { 342 | qq--; 343 | rr += t_b[colb - 2]; 344 | if (rr > UINT32_MAX) break; 345 | } 346 | } 347 | do { 348 | q2[0] = (uint32_t)qq; 349 | q2[1] = (uint32_t)(qq >> 32); 350 | bn_mul(t_x, &colx, t_b, colb, q2, q2[1] > 0 ? 2 : 1); 351 | --qq; 352 | bn_sftl(t_x, &colx, t_x, colx, i * 32); 353 | } while (bn_gt(t_x, colx, t_a, cola)); 354 | q[i] = (uint32_t)(qq + 1); 355 | if (qq >= UINT32_MAX) { 356 | q2[0] = 1; 357 | bn_add(&q[i + 1], colq, &q[i + 1], *colq - i - 1, q2, 1); 358 | *colq += i + 1; 359 | } 360 | bn_sub(t_a, &cola, t_a, cola, t_x, colx); 361 | } else if (bn_eq(t_a, cola, t_b, colb)) { 362 | q[i] = 1; 363 | while (i > 0) q[--i] = 0; 364 | while (cola > 0) t_a[--cola] = 0; 365 | } else { 366 | q[i] = 0; 367 | } 368 | } 369 | if (d > 0) { 370 | bn_sftr(r, colr, t_a, cola, d); 371 | } else { 372 | memcpy(r, t_a, cola * sizeof(uint32_t)); 373 | *colr = cola; 374 | while (*colr > 0) { 375 | if (r[*colr - 1] > 0) { 376 | break; 377 | } 378 | (*colr)--; 379 | } 380 | } 381 | } 382 | } 383 | 384 | int32_t bn_str2num_base2(const char s[], int32_t len, uint32_t x[], int32_t col) 385 | { 386 | int32_t i, j; 387 | memset(x, 0, col * sizeof(uint32_t)); 388 | for (i = 0; s[i]; i++) { 389 | if (s[i] < '0' || '1' < s[i]) { 390 | return -1; 391 | } 392 | j = len - i - 1; 393 | x[j / 32] |= (s[i] - '0') << j % 32; 394 | } 395 | while (col > 0) { 396 | if (x[col - 1] > 0) { 397 | break; 398 | } 399 | col--; 400 | } 401 | return col; 402 | } 403 | 404 | int32_t bn_str2num_base8(const char s[], int32_t len, uint32_t x[], int32_t col) 405 | { 406 | int32_t i, j, k; 407 | memset(x, 0, col * sizeof(uint32_t)); 408 | for (i = 0; s[i]; i++) { 409 | if (s[i] < '0' || '7' < s[i]) { 410 | return -1; 411 | } 412 | k = (len - i - 1) / 32; 413 | j = (len - i - 1) % 32; 414 | if (j == 10) { 415 | x[k * 3 + 1] |= (uint32_t)(s[i] - '0') >> 2; 416 | } else if (j == 21) { 417 | x[k * 3 + 2] |= (uint32_t)(s[i] - '0') >> 1; 418 | } 419 | x[k * 3 + j / 11] |= (uint32_t)(s[i] - '0') << j % 11 * 3 << j / 11 % 3; 420 | } 421 | while (col > 0) { 422 | if (x[col - 1] > 0) { 423 | break; 424 | } 425 | col--; 426 | } 427 | return col; 428 | } 429 | 430 | int32_t bn_str2num_base10(const char s[], int32_t len, uint32_t x[], int32_t col) 431 | { 432 | uint64_t t = 0; 433 | int32_t i, j, k = (len - 1) % 9 + 1; 434 | memset(x, 0, col * sizeof(uint32_t)); 435 | for (col = 1, i = 0; s[i]; i++) { 436 | if (s[i] < '0' || '9' < s[i]) { 437 | return -1; 438 | } 439 | t = t * 10 + (uint64_t)(s[i] - '0'); 440 | if (--k > 0) continue; 441 | t <<= 32; 442 | for (j = 0; j < col; j++) { 443 | t = (uint64_t)x[j] * 1000000000 + (t >> 32); 444 | x[j] = (uint32_t)t; 445 | } 446 | if (t >> 32) x[col++] = (uint32_t)(t >> 32); 447 | t = 0; 448 | k = 9; 449 | } 450 | while (col > 0) { 451 | if (x[col - 1] > 0) { 452 | break; 453 | } 454 | col--; 455 | } 456 | return col; 457 | } 458 | 459 | int32_t bn_str2num_base16(const char s[], int32_t len, uint32_t x[], int32_t col) 460 | { 461 | int32_t i, j; 462 | memset(x, 0, col * sizeof(uint32_t)); 463 | for (i = 0; s[i]; i++) { 464 | int c = toupper(s[i]); 465 | if ('0' <= c && c <= '9') { 466 | c -= '0'; 467 | } else if ('A' <= c && c <= 'F') { 468 | c += 10 - 'A'; 469 | } else { 470 | return -1; 471 | } 472 | j = len - i - 1; 473 | x[j / 8] |= c << j % 8 * 4; 474 | } 475 | while (col > 0) { 476 | if (x[col - 1] > 0) { 477 | break; 478 | } 479 | col--; 480 | } 481 | return col; 482 | } 483 | 484 | char *bn_num2str_base2(char *p, uint32_t x[], int32_t col) 485 | { 486 | int32_t i, j; 487 | *p = 0; 488 | for (i = 0; i < col; i++) { 489 | for (j = 0; j < 32; j++) { 490 | uint32_t n = x[i] >> j; 491 | if (i < col - 1 || n != 0) { 492 | *--p = (n & 0x1) + '0'; 493 | } 494 | } 495 | } 496 | return p; 497 | } 498 | 499 | char *bn_num2str_base8(char *p, uint32_t x[], int32_t col) 500 | { 501 | int32_t i; 502 | *p = 0; 503 | for (i = 0; i < col; i++) { 504 | uint32_t j, k, m = x[i]; 505 | if (i % 3 == 0) { 506 | k = 11; 507 | } else if (i % 3 == 1) { 508 | *p += (m & 0x1) << 2; 509 | k = 11; 510 | } else { 511 | *p += (m & 0x3) << 1; 512 | k = 10; 513 | } 514 | for (j = 0; j < k; j++) { 515 | uint32_t n = m >> (3 * j + i % 3); 516 | if (i < col - 1 || n != 0) { 517 | *--p = (n & 0x7) + '0'; 518 | } 519 | } 520 | } 521 | return p; 522 | } 523 | 524 | char *bn_num2str_base10(char *p, uint32_t x[], int32_t col) 525 | { 526 | *p = 0; 527 | while (col > 0) { 528 | int32_t i; 529 | uint64_t t = 0; 530 | for (i = col - 1; i >= 0; i--) { 531 | t = t << 32 | x[i]; 532 | x[i] = (uint32_t)(t / 1000000000); 533 | t = t % 1000000000; 534 | } 535 | while (col > 0) { 536 | if (x[col - 1] > 0) { 537 | break; 538 | } 539 | col--; 540 | } 541 | for (i = 0; i < 9 && (col > 0 || t > 0); i++, t /= 10) { 542 | *--p = (char)(t % 10) + '0'; 543 | } 544 | } 545 | return p; 546 | } 547 | 548 | char *bn_num2str_base16(char *p, uint32_t x[], int32_t col) 549 | { 550 | int32_t i, j; 551 | *p = 0; 552 | for (i = 0; i < col; i++) { 553 | for (j = 0; j < 8; j++) { 554 | uint32_t n = x[i] >> (4 * j); 555 | if (i < col - 1 || n != 0) { 556 | char c = n & 0xf; 557 | *--p = (c < 10) ? c + '0' : c - 10 + 'a'; 558 | } 559 | } 560 | } 561 | return p; 562 | } 563 | -------------------------------------------------------------------------------- /src/bignum.h: -------------------------------------------------------------------------------- 1 | #ifndef BIGNUM_H 2 | #define BIGNUM_H 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | int64_t gcd(int32_t x, int32_t y); 11 | int64_t lcm(int32_t x, int32_t y); 12 | int32_t find1_32(uint32_t val); 13 | int32_t bn_eq(uint32_t x[], int32_t colx, uint32_t y[], int32_t coly); 14 | int32_t bn_gt(uint32_t x[], int32_t colx, uint32_t y[], int32_t coly); 15 | int32_t bn_ge(uint32_t x[], int32_t colx, uint32_t y[], int32_t coly); 16 | void bn_add(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly); 17 | void bn_sub(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly); 18 | void bn_mul(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly); 19 | void bn_sqr(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx); 20 | void bn_sftl(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, int32_t n); 21 | void bn_sftr(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, int32_t n); 22 | void bn_div(uint32_t q[], int32_t *colq, uint32_t r[], int32_t *colr, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly); 23 | int32_t bn_str2num_base2(const char s[], int32_t len, uint32_t x[], int32_t col); 24 | int32_t bn_str2num_base8(const char s[], int32_t len, uint32_t x[], int32_t col); 25 | int32_t bn_str2num_base10(const char s[], int32_t len, uint32_t x[], int32_t col); 26 | int32_t bn_str2num_base16(const char s[], int32_t len, uint32_t x[], int32_t col); 27 | char *bn_num2str_base2(char *p, uint32_t x[], int32_t col); 28 | char *bn_num2str_base8(char *p, uint32_t x[], int32_t col); 29 | char *bn_num2str_base10(char *p, uint32_t x[], int32_t col); 30 | char *bn_num2str_base16(char *p, uint32_t x[], int32_t col); 31 | 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | 36 | #endif /* BIGNUM_H */ 37 | -------------------------------------------------------------------------------- /src/init.scm: -------------------------------------------------------------------------------- 1 | ; This is a init file for Mini-Scheme. 2 | 3 | (define call/cc call-with-current-continuation) 4 | -------------------------------------------------------------------------------- /src/makefile: -------------------------------------------------------------------------------- 1 | # Makefile for System-V flavoured UNIX 2 | # 3 | #CC = gcc # you may use both ANSI and pre-ANSI 4 | 5 | # 6 | # Please see source and/or README for system defition 7 | # 8 | #CFLAGS = -g -DSYSV -traditional -traditional-cpp -Wid-clash-8 9 | CFLAGS = -O 10 | 11 | all : miniscm 12 | 13 | miniscm : miniscm.o bignum.o 14 | $(CC) $(CFLAGS) -o miniscm miniscm.o bignum.o -lm 15 | 16 | miniscm.o : miniscm.c miniscm.h 17 | $(CC) $(CFLAGS) -c miniscm.c 18 | 19 | bignum.o : bignum.c bignum.h 20 | $(CC) $(CFLAGS) -c bignum.c 21 | 22 | clean : 23 | rm -f core *.o miniscm *~ 24 | 25 | -------------------------------------------------------------------------------- /src/miniscm.h: -------------------------------------------------------------------------------- 1 | #ifndef MINISCHEME_H 2 | #define MINISCHEME_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | /* 13 | * Define or undefine following symbols as you need. 14 | */ 15 | #ifndef STANDALONE 16 | #define STANDALONE 1 /* define 0 if you want to build as a Library */ 17 | #endif 18 | /* #define USE_SCHEME_STACK */ /* define this if you want original-Stack */ 19 | #define USE_COPYING_GC /* undef this if you do not want to use Copying GC */ 20 | 21 | /* 22 | * Basic memory allocation units 23 | */ 24 | #define CELL_SEGSIZE 500000 /* # of cells in one segment */ 25 | 26 | #define MAXFIL 64 /* stack size of loading files */ 27 | 28 | typedef struct cell *pointer; 29 | typedef pointer (*foreign_func)(pointer); 30 | 31 | /* cell structure */ 32 | struct cell { 33 | unsigned short _flag; 34 | unsigned char _extflag; 35 | unsigned char _isfixnum; 36 | union { 37 | struct { 38 | char *_svalue; 39 | size_t _length; 40 | } _string; 41 | union { 42 | struct { 43 | int32_t _ivalue; 44 | struct cell *_bignum; 45 | } _integer; 46 | double _rvalue; 47 | } _number; 48 | struct { 49 | FILE *_file; 50 | char *_curr; 51 | } _port; 52 | foreign_func _ff; 53 | struct { 54 | struct cell *_car; 55 | struct cell *_cdr; 56 | } _cons; 57 | #ifdef USE_COPYING_GC 58 | struct cell *_forwarded; 59 | #endif 60 | } _object; 61 | }; 62 | 63 | #define T_STRING 1 /* 0000000000000001 */ 64 | #define T_NUMBER 2 /* 0000000000000010 */ 65 | #define T_SYMBOL 4 /* 0000000000000100 */ 66 | #define T_SYNTAX 8 /* 0000000000001000 */ 67 | #define T_PROC 16 /* 0000000000010000 */ 68 | #define T_PAIR 32 /* 0000000000100000 */ 69 | #define T_CLOSURE 64 /* 0000000001000000 */ 70 | #define T_CONTINUATION 128 /* 0000000010000000 */ 71 | #define T_CHARACTER 256 /* 0000000100000000 */ 72 | #define T_PORT 512 /* 0000001000000000 */ 73 | #define T_VECTOR 1024 /* 0000010000000000 */ 74 | #define T_FOREIGN 2048 /* 0000100000000000 */ 75 | #define T_MEMBLOCK 4096 /* 0001000000000000 */ 76 | #define T_ENVIRONMENT 8192 /* 0010000000000000 */ 77 | #define T_ATOM 16384 /* 0100000000000000 */ /* only for gc */ 78 | #define CLRATOM 49151 /* 1011111111111111 */ /* only for gc */ 79 | #define MARK 32768 /* 1000000000000000 */ 80 | #define UNMARK 32767 /* 0111111111111111 */ 81 | #ifdef USE_COPYING_GC 82 | # define T_FORWARDED 32768 /* 1000000000000000 */ /* only for gc */ 83 | #endif 84 | 85 | #define T_VALUES 1 /* 0000000000000001 */ /* for call-with-values */ 86 | 87 | #define T_PROMISE 1 /* 00000001 */ 88 | #define T_RESULTREADY 2 /* 00000010 */ 89 | #define T_MACRO 4 /* 00000100 */ 90 | #define T_DEFMACRO 8 /* 00001000 */ /* for define-macro */ 91 | #define T_DEFSYNTAX 32768 /* 1000000000000000 */ /* for define-syntax */ 92 | #define T_SYNTAXNUM 32767 /* 0111111111111111 */ /* for define-syntax */ 93 | 94 | /* macros for cell operations */ 95 | #define type(p) ((p)->_flag) 96 | #define exttype(p) ((p)->_extflag) 97 | 98 | #define is_string(p) (type(p)&T_STRING) 99 | #define strvalue(p) ((p)->_object._string._svalue) 100 | #define strlength(p) ((p)->_object._string._length) 101 | 102 | #define is_number(p) (type(p)&T_NUMBER) 103 | #define ivalue(p) ((p)->_object._number._integer._ivalue) 104 | #define bignum(p) ((p)->_object._number._integer._bignum) 105 | #define rvalue(p) ((p)->_object._number._rvalue) 106 | #define nvalue(p) ((p)->_isfixnum ? ivalue(p) : rvalue(p)) 107 | #define is_integer(p) (is_number(p) && ((p)->_isfixnum || floor(rvalue(p) + 0.5) == rvalue(p))) 108 | #define set_num_integer(p) ((p)->_isfixnum = 1) 109 | #define set_num_real(p) ((p)->_isfixnum = 0) 110 | 111 | #define is_pair(p) (type(p)&T_PAIR) 112 | #define car(p) ((p)->_object._cons._car) 113 | #define cdr(p) ((p)->_object._cons._cdr) 114 | 115 | #define is_symbol(p) (type(p)&T_SYMBOL) 116 | #define symname(p) strvalue(p) 117 | 118 | #define is_syntax(p) (type(p)&T_SYNTAX) 119 | #define is_proc(p) (type(p)&T_PROC) 120 | #define syntaxnum(p) (*(short *)&(p)->_extflag) 121 | #define procnum(p) (int)ivalue(p) 122 | 123 | #define is_closure(p) (type(p)&T_CLOSURE) 124 | #define is_macro(p) (exttype(p)&T_MACRO) 125 | #define closure_code(p) car(p) 126 | #define closure_env(p) cdr(p) 127 | 128 | #define is_continuation(p) (type(p)&T_CONTINUATION) 129 | #define cont_dump(p) cdr(p) 130 | 131 | #define is_character(p) (type(p)&T_CHARACTER) 132 | 133 | enum { 134 | port_input = 1, 135 | port_output = 2, 136 | port_file = 4, 137 | port_string = 8, 138 | port_eof = 16, 139 | }; 140 | #define is_port(p) (type(p) & T_PORT) 141 | #define is_inport(p) (is_port(p) && ((p)->_isfixnum & port_input)) 142 | #define is_outport(p) (is_port(p) && ((p)->_isfixnum & port_output)) 143 | #define is_fileport(p) (is_port(p) && ((p)->_isfixnum & port_file)) 144 | #define is_strport(p) (is_port(p) && ((p)->_isfixnum & port_string)) 145 | #define is_eofport(p) (is_port(p) && ((p)->_isfixnum & port_eof)) 146 | #define port_file(p) ((p)->_object._port._file) 147 | #define port_curr(p) ((p)->_object._port._curr) 148 | 149 | #define is_vector(p) (type(p) & T_VECTOR) 150 | 151 | #define is_foreign(p) (type(p) & T_FOREIGN) 152 | #define foreignfnc(p) ((p)->_object._ff) 153 | 154 | #define is_memblock(p) (type(p) & T_MEMBLOCK) 155 | 156 | #define is_environment(p) (type(p) & T_ENVIRONMENT) 157 | #define setenvironment(p) type(p) |= T_ENVIRONMENT 158 | 159 | #define is_promise(p) (exttype(p) & T_PROMISE) 160 | #define setpromise(p) exttype(p) |= T_PROMISE 161 | #define is_resultready(p) (exttype(p) & T_RESULTREADY) 162 | #define setresultready(p) exttype(p) |= T_RESULTREADY 163 | 164 | #define is_atom(p) (type(p)&T_ATOM) 165 | #define setatom(p) type(p) |= T_ATOM 166 | #define clratom(p) type(p) &= CLRATOM 167 | 168 | #define is_mark(p) (type(p)&MARK) 169 | #define setmark(p) type(p) |= MARK 170 | #define clrmark(p) type(p) &= UNMARK 171 | 172 | #define caar(p) car(car(p)) 173 | #define cadr(p) car(cdr(p)) 174 | #define cdar(p) cdr(car(p)) 175 | #define cddr(p) cdr(cdr(p)) 176 | #define cadar(p) car(cdr(car(p))) 177 | #define caddr(p) car(cdr(cdr(p))) 178 | #define cadaar(p) car(cdr(car(car(p)))) 179 | #define cadddr(p) car(cdr(cdr(cdr(p)))) 180 | #define cddddr(p) cdr(cdr(cdr(cdr(p)))) 181 | 182 | extern pointer NIL; 183 | extern pointer T; 184 | extern pointer F; 185 | extern pointer EOF_OBJ; 186 | extern pointer mark_x; 187 | extern pointer mark_y; 188 | extern jmp_buf error_jmp; 189 | 190 | pointer cons(pointer a, pointer b); 191 | pointer mk_character(int c); 192 | pointer mk_integer(int32_t num); 193 | pointer mk_real(double num); 194 | pointer mk_number(pointer v); 195 | pointer mk_string(const char *str); 196 | pointer mk_empty_string(size_t len, int fill); 197 | pointer mk_symbol(const char *name); 198 | pointer mk_uninterned_symbol(const char *name); 199 | pointer gensym(void); 200 | pointer mk_atom(const char *q); 201 | pointer mk_const(const char *name); 202 | pointer mk_port(FILE *fp, int prop); 203 | pointer mk_port_string(pointer p, int prop); 204 | pointer mk_vector(int len); 205 | pointer vector_elem(pointer v, int i); 206 | pointer set_vector_elem(pointer v, int i, pointer a); 207 | int list_length(pointer a); 208 | 209 | void scheme_init(void); 210 | void scheme_deinit(void); 211 | int scheme_load_file(FILE *fin); 212 | int scheme_load_string(const char *cmd); 213 | void scheme_register_foreign_func(const char *name, foreign_func ff); 214 | pointer scheme_apply0(const char *procname); 215 | pointer scheme_apply1(const char *procname, pointer argslist); 216 | 217 | #ifdef __cplusplus 218 | } 219 | #endif 220 | 221 | #endif /* MINISCHEME_H */ 222 | -------------------------------------------------------------------------------- /src/msvcbuild.bat: -------------------------------------------------------------------------------- 1 | @setlocal 2 | @set MSCOMPILE=cl /nologo /O2 /W3 /c /D_CRT_SECURE_NO_WARNINGS 3 | @set MSLINK=link /nologo 4 | @set MSLIB=lib /nologo 5 | 6 | @if "%1" == "static" goto STATIC 7 | @if not exist bin\ ( 8 | mkdir bin 9 | ) 10 | %MSCOMPILE% /MT /DSTANDALONE=1 miniscm.c 11 | %MSCOMPILE% /MT bignum.c 12 | %MSLINK% /out:bin\miniscm.exe miniscm.obj bignum.obj 13 | 14 | @goto END 15 | 16 | :STATIC 17 | @if not exist lib\ ( 18 | mkdir lib 19 | ) 20 | %MSCOMPILE% /MT /DSTANDALONE=0 miniscm.c 21 | %MSCOMPILE% /MT bignum.c 22 | %MSLIB% /out:lib\miniscm.lib miniscm.obj bignum.obj 23 | 24 | :END 25 | del miniscm.obj bignum.obj 26 | --------------------------------------------------------------------------------