├── LICENSE
├── README.md
├── example
    ├── README.md
    ├── msvcbuild.bat
    └── repl.c
├── ext
    ├── re
    │   ├── COPYRIGHT
    │   ├── Makefile
    │   ├── README
    │   ├── WHATSNEW
    │   ├── cclass.h
    │   ├── cname.h
    │   ├── debug.c
    │   ├── engine.c
    │   ├── engine.ih
    │   ├── fake
    │   │   ├── limits.h
    │   │   ├── memmove.c
    │   │   └── stdlib.h
    │   ├── main.c
    │   ├── mkh
    │   ├── msvcbuild.bat
    │   ├── re.c
    │   ├── regcomp.c
    │   ├── regcomp.ih
    │   ├── regerror.c
    │   ├── regerror.ih
    │   ├── regex.3
    │   ├── regex.7
    │   ├── regex.h
    │   ├── regex2.h
    │   ├── regexec.c
    │   ├── regfree.c
    │   ├── split.c
    │   ├── tests
    │   └── utils.h
    └── tsx
    │   ├── LICENSE
    │   ├── Makefile
    │   ├── README
    │   ├── listhome.scm
    │   ├── msvcbuild.bat
    │   ├── smtp.scm
    │   ├── srepl.scm
    │   ├── tsx-functions.txt
    │   ├── tsx.c
    │   └── tsx.h
└── src
    ├── README
    ├── README.md
    ├── bignum.c
    ├── bignum.h
    ├── init.scm
    ├── makefile
    ├── miniscm.c
    ├── miniscm.h
    └── msvcbuild.bat


/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Tatsuya Watanabe
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | minischeme
  2 | ==========
  3 | 
  4 | Mini-Scheme Interpreter with Copying GC
  5 | 
  6 | 
  7 | Features
  8 | --------
  9 | 
 10 | * based on Mini-Scheme 0.85k4-a
 11 | * bignum (multi-precision integer) operations supported
 12 | * continuations support without using 'SCHEME STACK' (#undef USE_SCHEME_STACK)
 13 | * implemented a Copying GC using the Cheney's algorithm (#define USE_COPYING_GC)
 14 | * implemented Hygienic macro 'syntax-rules' (supports SRFI 46)
 15 | * implemented Legacy macro 'define-macro' ('gensym' also implemented)
 16 | * supports UTF-8 multibyte characters (limitations: NO normalization nor toupper/tolower, ...)
 17 | * reinventing the [tinyscheme](https://github.com/ignorabimus/tinyscheme "Experimental fork of TinyScheme and extensions TSX, RE.") :)
 18 | 
 19 | 
 20 | Build (with MSVC)
 21 | -----------------
 22 | 
 23 | Install "Visual C++ 2015" or later.
 24 | 
 25 | * I compiled using "Microsoft Visual C++ 2015", but "2013" or earlier maybe OK.
 26 | 
 27 | ### Setting up environemt
 28 | 
 29 | Open the Visual Studio command prompt, or open Normal command prompt then run
 30 | 
 31 |     > (Visual Studio installed path)\VC\vcvarsall.bat
 32 | 
 33 | and change directory to unpacking source files.
 34 | 
 35 |     > cd src
 36 | 
 37 | ### Build an executable
 38 | 
 39 | To build an executable, just run
 40 | 
 41 |     > msvcbuild.bat
 42 | 
 43 | and you'll get
 44 | 
 45 |     > bin\minischeme.exe
 46 | 
 47 | ### Build a static library
 48 | 
 49 | To build as a static library, run with "static" option
 50 | 
 51 |     > msvcbuild.bat static
 52 | 
 53 | and you'll get
 54 | 
 55 |     > lib\minischeme.lib
 56 | 
 57 | 
 58 | [R5RS standard](http://www.schemers.org/Documents/Standards/R5RS/ "schemers.org: Documents: Standards: R5RS") compatibility
 59 | ------------------
 60 | 
 61 | * [x] +
 62 | * [x] *
 63 | * [x] -
 64 | * [x] /
 65 | * [x] =
 66 | * [x] <
 67 | * [x] >
 68 | * [x] <=
 69 | * [x] >=
 70 | * [x] abs
 71 | * [x] acos
 72 | * [x] and
 73 | * [ ] angle
 74 | * [x] append
 75 | * [x] apply
 76 | * [x] asin
 77 | * [x] assoc
 78 | * [x] assq
 79 | * [x] assv
 80 | * [x] atan
 81 | * [x] begin
 82 | * [x] boolean?
 83 | * [x] caaaar
 84 | * [x] caaadr
 85 | * [x] caaar
 86 | * [x] caadr
 87 | * [x] caadar
 88 | * [x] caaddr
 89 | * [x] caar
 90 | * [x] cadaar
 91 | * [x] cadadr
 92 | * [x] cadar
 93 | * [x] caddr
 94 | * [x] caddar
 95 | * [x] cadddr
 96 | * [x] cadr
 97 | * [x] call-with-current-continuation
 98 | * [x] call-with-input-file
 99 | * [x] call-with-output-file
100 | * [x] call-with-values
101 | * [x] car
102 | * [x] case
103 | * [x] cdar
104 | * [x] cdaar
105 | * [x] cdaaar
106 | * [x] cdaadr
107 | * [x] cdadar
108 | * [x] cdaddr
109 | * [x] cdadr
110 | * [x] cddr
111 | * [x] cddar
112 | * [x] cddaar
113 | * [x] cddadr
114 | * [x] cdddr
115 | * [x] cdddar
116 | * [x] cddddr
117 | * [x] cdr
118 | * [x] ceiling
119 | * [x] char->integer
120 | * [x] char-alphabetic?
121 | * [x] char-ci<=?
122 | * [x] char-ci<?
123 | * [x] char-ci=?
124 | * [x] char-ci>=?
125 | * [x] char-ci>?
126 | * [x] char-downcase
127 | * [x] char-lower-case?
128 | * [x] char-numeric?
129 | * [x] char-ready?
130 | * [x] char-upcase
131 | * [x] char-upper-case?
132 | * [x] char-whitespace?
133 | * [x] char<=?
134 | * [x] char<?
135 | * [x] char=?
136 | * [x] char>=?
137 | * [x] char>?
138 | * [x] char?
139 | * [x] close-input-port
140 | * [x] close-output-port
141 | * [ ] complex?
142 | * [x] cond
143 | * [x] cons
144 | * [x] cos
145 | * [x] current-input-port
146 | * [x] current-output-port
147 | * [x] define
148 | * [x] define-syntax
149 | * [x] delay
150 | * [ ] denominator
151 | * [x] display
152 | * [x] do
153 | * [x] dynamic-wind
154 | * [x] else
155 | * [x] eof-object?
156 | * [x] eq?
157 | * [x] equal?
158 | * [x] eqv?
159 | * [x] eval
160 | * [x] even?
161 | * [x] exact->inexact
162 | * [x] exact?
163 | * [x] exp
164 | * [x] expt
165 | * [x] floor
166 | * [x] for-each
167 | * [x] force
168 | * [x] gcd
169 | * [x] if
170 | * [ ] imag-part
171 | * [x] inexact->exact
172 | * [x] inexact?
173 | * [x] input-port?
174 | * [x] integer->char
175 | * [x] integer?
176 | * [x] interaction-environment
177 | * [x] lambda
178 | * [x] lcm
179 | * [x] length
180 | * [x] let
181 | * [x] let*
182 | * [x] let-syntax
183 | * [x] letrec
184 | * [x] letrec-syntax
185 | * [x] list
186 | * [x] list->string
187 | * [x] list->vector
188 | * [x] list-ref
189 | * [x] list-tail
190 | * [x] list?
191 | * [x] load
192 | * [x] log
193 | * [ ] magnitude
194 | * [ ] make-polar
195 | * [ ] make-rectangular
196 | * [x] make-string
197 | * [x] make-vector
198 | * [x] map
199 | * [x] max
200 | * [x] member
201 | * [x] memq
202 | * [x] memv
203 | * [x] min
204 | * [x] modulo
205 | * [x] negative?
206 | * [x] newline
207 | * [x] not
208 | * [ ] null-environment
209 | * [x] null?
210 | * [x] number->string
211 | * [x] number?
212 | * [ ] numerator
213 | * [x] odd?
214 | * [x] open-input-file
215 | * [x] open-output-file
216 | * [x] or
217 | * [x] output-port?
218 | * [x] pair?
219 | * [x] peek-char?
220 | * [x] port?
221 | * [x] positive?
222 | * [x] procedure?
223 | * [x] quasiquote
224 | * [x] quote
225 | * [x] quotient
226 | * [ ] rational?
227 | * [ ] rationalize
228 | * [x] read
229 | * [x] read-char
230 | * [ ] real-part
231 | * [x] real?
232 | * [x] remainder
233 | * [x] reverse
234 | * [x] round
235 | * [ ] scheme-report-environment
236 | * [x] set!
237 | * [x] set-car!
238 | * [x] set-cdr!
239 | * [x] sin
240 | * [x] sqrt
241 | * [x] string
242 | * [x] string->list
243 | * [x] string->number
244 | * [x] string->symbol
245 | * [x] string-append
246 | * [x] string-ci<=?
247 | * [x] string-ci<?
248 | * [x] string-ci=?
249 | * [x] string-ci>=?
250 | * [x] string-ci>?
251 | * [x] string-copy
252 | * [x] string-fill!
253 | * [x] string-length
254 | * [x] string-ref
255 | * [x] string-set!
256 | * [x] string<=?
257 | * [x] string<?
258 | * [x] string=?
259 | * [x] string>=?
260 | * [x] string>?
261 | * [x] string?
262 | * [x] substring
263 | * [x] symbol->string
264 | * [x] symbol?
265 | * [x] syntax-rules
266 | * [x] tan
267 | * [ ] transcript-off
268 | * [ ] transcript-on
269 | * [x] truncate
270 | * [x] values
271 | * [x] vector
272 | * [x] vector->list
273 | * [x] vector-fill!
274 | * [x] vector-length
275 | * [x] vector-ref
276 | * [x] vector-set!
277 | * [x] vector?
278 | * [x] with-input-from-file
279 | * [x] with-output-to-file
280 | * [x] write
281 | * [x] write-char
282 | * [x] zero?
283 | 
284 | 
285 | Links
286 | -----
287 | 
288 | [D. Souflis, J. Shapiro - TinyScheme Home](http://tinyscheme.sourceforge.net/home.html)
289 | 
290 | [Visual Studio Downloads](http://www.visualstudio.com/downloads/)
291 | 
292 | 
293 | License
294 | -------
295 | 
296 | Copyright (c) 2015 Tatsuya Watanabe. See the LICENSE file for license rights and limitations (MIT).
297 | 


--------------------------------------------------------------------------------
/example/README.md:
--------------------------------------------------------------------------------
 1 | # REPL Example
 2 | 
 3 | * depends on [replxx](https://github.com/AmokHuginnsson/replxx)
 4 | 
 5 | ## Build (with MSVC)
 6 | 
 7 | ### Setting up environemt
 8 | 
 9 | Clone [replxx](https://github.com/AmokHuginnsson/replxx) to
10 | 
11 |     (Path of minischeme)\..\replxx
12 | 
13 | Open the Visual Studio command prompt and change directory
14 | 
15 |     > cd (Path of minischeme)\example
16 | 
17 | ### Build an executable
18 | 
19 | To build an executable, just run
20 | 
21 |     > msvcbuild.bat
22 | 
23 | and find it under bin\
24 | 
25 |     > bin\repl.exe
26 | 


--------------------------------------------------------------------------------
/example/msvcbuild.bat:
--------------------------------------------------------------------------------
 1 | @setlocal
 2 | cd ..\..\replxx
 3 | @if not exist build\ (
 4 |   mkdir build
 5 | )
 6 | cd build
 7 | cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS_RELEASE="/MT /O2 /Ob2 /DNDEBUG" ..
 8 | msbuild replxx.vcxproj /p:Configuration=Release
 9 | @endlocal
10 | 
11 | @setlocal
12 | cd ..\src
13 | call msvcbuild.bat static
14 | @endlocal
15 | 
16 | @setlocal
17 | cd ..\ext\re
18 | call msvcbuild.bat
19 | @endlocal
20 | 
21 | @setlocal
22 | cd ..\ext\tsx
23 | call msvcbuild.bat
24 | @endlocal
25 | 
26 | @setlocal
27 | @set REPLXX_H_DIR=..\..\replxx\include
28 | @set SCHEME_H_DIR=..\src
29 | @set MSCOMPILE=cl /nologo /O2 /W3 /c /D_CRT_SECURE_NO_WARNINGS /I%SCHEME_H_DIR% /I%REPLXX_H_DIR%
30 | @set MSLINK=link /nologo
31 | @set EXT_LIBS=..\ext\re\lib\re.lib ..\ext\tsx\lib\tsx.lib ..\..\replxx\build\Release\replxx.lib
32 | 
33 | @if not exist bin\ (
34 |   mkdir bin
35 | )
36 | 
37 | %MSCOMPILE% /MT repl.c
38 | %MSLINK% /out:bin\repl.exe repl.obj ..\src\lib\miniscm.lib %EXT_LIBS%
39 | 
40 | del repl.obj
41 | @endlocal
42 | 


--------------------------------------------------------------------------------
/example/repl.c:
--------------------------------------------------------------------------------
  1 | #include "miniscm.h"
  2 | #include <ctype.h>
  3 | #include <stdio.h>
  4 | #include <string.h>
  5 | #include <stdlib.h>
  6 | #include <errno.h>
  7 | #ifdef _WIN32
  8 | #include <winsock2.h>
  9 | #endif
 10 | 
 11 | #include "replxx.h"
 12 | void init_re(void);
 13 | void init_tsx(void);
 14 | extern pointer oblist;
 15 | 
 16 | static int get_input_length(const char *input)
 17 | {
 18 | 	int escaped = 0, dquotes = 0;
 19 | 	int n_paren = 0, s_paren = 0, c_paren = 0;
 20 | 	const char *p;
 21 | 
 22 | 	if (input[0] == '\0') {
 23 | 		return 0;
 24 | 	}
 25 | 
 26 | 	for (p = input; *p == '\t' || *p == ' '; p++);
 27 | 
 28 | 	while (*p) {
 29 | 		if (escaped) {
 30 | 			escaped = 0;
 31 | 			p++;
 32 | 			continue;
 33 | 		}
 34 | 		switch (*p) {
 35 | 		case '\\':
 36 | 			escaped = 1;
 37 | 			break;
 38 | 		case '"':
 39 | 			dquotes = !dquotes;
 40 | 			break;
 41 | 		case ';':
 42 | 			if (dquotes == 0) {
 43 | 				do { p++; } while (*p != '\r' && *p != '\n' && *p != '\0');
 44 | 				continue;
 45 | 			}
 46 | 			break;
 47 | 		case '\t':
 48 | 		case ' ':
 49 | 			if (dquotes == 0 && n_paren <= 0 && s_paren <= 0 && c_paren <= 0) {
 50 | 				return p - input;
 51 | 			}
 52 | 			break;
 53 | 		case '(':
 54 | 		case '[':
 55 | 		case '{':
 56 | 			if (dquotes == 0) {
 57 | 				if (p - input > 0 && n_paren <= 0 && s_paren <= 0 && c_paren <= 0) {
 58 | 					return p - input;
 59 | 				}
 60 | 				if (*p == '(') n_paren++;
 61 | 				else if (*p == '[') s_paren++;
 62 | 				else c_paren++;
 63 | 			}
 64 | 			break;
 65 | 		case ')':
 66 | 		case ']':
 67 | 		case '}':
 68 | 			if (dquotes == 0) {
 69 | 				if (*p == ')') n_paren--;
 70 | 				else if (*p == ']') s_paren--;
 71 | 				else c_paren--;
 72 | 				if (n_paren <= 0 && s_paren <= 0 && c_paren <= 0) {
 73 | 					return p - input + 1;
 74 | 				}
 75 | 			}
 76 | 			break;
 77 | 		default:
 78 | 			break;
 79 | 		}
 80 | 		p++;
 81 | 	}
 82 | 
 83 | 	return (n_paren > 0 || s_paren > 0 || c_paren > 0) ? -1 : p - input;
 84 | }
 85 | 
 86 | static void completionHook(char const* prefix, int bp, replxx_completions* lc, void* ud)
 87 | {
 88 | 	char **examples = (char **)ud;
 89 | 	size_t i;
 90 | 	for (i = 0; examples[i] != NULL; ++i) {
 91 | 		if (strncmp(prefix + bp, examples[i], strlen(prefix) - bp) == 0) {
 92 | 			replxx_add_completion(lc, examples[i]);
 93 | 		}
 94 | 	}
 95 | }
 96 | 
 97 | static void hintHook(char const* prefix, int bp, replxx_hints* lc, ReplxxColor* c, void* ud)
 98 | {
 99 | 	char **examples = (char **)ud;
100 | 	size_t i;
101 | 	int len = strlen(prefix);
102 | 	if (len > bp) {
103 | 		for (i = 0; examples[i] != NULL; ++i) {
104 | 			if (strncmp(prefix + bp, examples[i], strlen(prefix) - bp) == 0) {
105 | 				replxx_add_hint(lc, examples[i] + len - bp);
106 | 			}
107 | 		}
108 | 	}
109 | }
110 | 
111 | static void colorHook(char const* str_, ReplxxColor* colors_, int size_, void* ud)
112 | {
113 | 	int i = 0;
114 | 	for (; i < size_; ++i) {
115 | 		if (isdigit(str_[i])) {
116 | 			colors_[i] = BRIGHTMAGENTA;
117 | 		}
118 | 	}
119 | }
120 | 
121 | static int compare_hints(const void *a, const void *b)
122 | {
123 | 	return strcmp(*(char **)a, *(char **)b);
124 | }
125 | 
126 | int main(int argc, char *argv[])
127 | {
128 | 	int ret;
129 | 	pointer x;
130 | 	char *examples_base = NULL, **examples = NULL;
131 | 	size_t hints_size = 0, hints_num = 0;
132 | 
133 | #ifdef _WIN32
134 | 	WSADATA wsaData;
135 | 	if (WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) {
136 | 		return 1;
137 | 	}
138 | #endif
139 | 
140 | 	scheme_init();
141 | 	init_re();
142 | 	init_tsx();
143 | 
144 | 	ret = scheme_load_string("(define call/cc call-with-current-continuation)");
145 | 	if (ret != 0) {
146 | 		fprintf(stderr, "Unable to init.\n");
147 | 		return 1;
148 | 	}
149 | 
150 | 	for (x = oblist; x != NIL; x = cdr(x)) {
151 | 		hints_size += strlen(symname(car(x))) + 1;
152 | 		hints_num++;
153 | 	}
154 | 
155 | 	examples_base = (char *)malloc(hints_size + 1);
156 | 	if (examples_base == NULL) {
157 | 		fprintf(stderr, "Fatal: malloc failed (%u bytes).\n", hints_size + 1);
158 | 		return 1;
159 | 	}
160 | 
161 | 	examples = (char **)malloc((hints_num + 1) * sizeof (char *));
162 | 	if (examples == NULL) {
163 | 		fprintf(stderr, "Fatal: malloc failed (%u bytes).\n", (hints_num + 1) * sizeof (char *));
164 | 		return 1;
165 | 	}
166 | 
167 | 	hints_size = hints_num = 0;
168 | 	examples[0] = examples_base;
169 | 	for (x = oblist; x != NIL; x = cdr(x)) {
170 | 		hints_size = strlen(symname(car(x))) + 1;
171 | 		memcpy(examples[hints_num], symname(car(x)), hints_size);
172 | 		examples[hints_num + 1] = examples[hints_num] + hints_size;
173 | 		hints_num++;
174 | 	}
175 | 	examples[hints_num] = NULL;
176 | 
177 | 	qsort(examples, hints_num, sizeof (char *), compare_hints);
178 | 
179 | 	char const* prompt0 = "\x1b[1;32m>>>\x1b[0m ";
180 | 	char const* prompt1 = "\x1b[1;32m...\x1b[0m ";
181 | 
182 | 	Replxx* replxx = replxx_init();
183 | 	replxx_install_window_change_handler(replxx);
184 | 	replxx_set_completion_callback(replxx, completionHook, (void *)examples);
185 | 	replxx_set_highlighter_callback(replxx, colorHook, NULL);
186 | 	replxx_set_hint_callback(replxx, hintHook, (void *)examples);
187 | 	replxx_set_word_break_characters(replxx, " \\\"'`;@{([])}");
188 | 
189 | 	if (argc > 1) {
190 | 		FILE *fin = fopen(argv[1], "rb");
191 | 		if (fin == NULL) {
192 | 			fprintf(stderr, "Unable to open %s\n", argv[1]);
193 | 			return 1;
194 | 		}
195 | 		scheme_load_file(fin);
196 | 	}
197 | 
198 | 	printf("Hello, This is Mini-Scheme REPL.\n");
199 | 
200 | 	ret = 0;
201 | 	while (ret <= 0) {
202 | 		char const* result;
203 | 		do {
204 | 			result = replxx_input(replxx, prompt0);
205 | 		} while ((result == NULL) && (errno == EAGAIN));
206 | 
207 | 		if (result == NULL) {
208 | 			break;
209 | 		}
210 | 		replxx_history_add(replxx, result);
211 | 
212 | 		char *line1 = (char *)malloc(strlen(result) + 2);
213 | 		if (line1 == NULL) {
214 | 			fprintf(stderr, "Fatal: malloc failed (%u bytes).\n", strlen(result) + 2);
215 | 			goto EXIT;
216 | 		}
217 | 		sprintf(line1, "%s\n", result);
218 | 
219 | 		do {
220 | 			int i, len;
221 | 			char *line2, *line3;
222 | 			while ((len = get_input_length(line1)) == -1) {
223 | 				do {
224 | 					result = replxx_input(replxx, prompt1);
225 | 				} while ((result == NULL) && (errno == EAGAIN));
226 | 
227 | 				if (result == NULL) {
228 | 					goto EXIT;
229 | 				}
230 | 				replxx_history_add(replxx, result);
231 | 
232 | 				line3 = (char *)malloc(strlen(line1) + strlen(result) + 2);
233 | 				if (line3 == NULL) {
234 | 					fprintf(stderr, "Fatal: malloc failed (%u bytes).\n", strlen(line1) + strlen(result) + 2);
235 | 					goto EXIT;
236 | 				}
237 | 
238 | 				sprintf(line3, "%s%s\n", line1, result);
239 | 				free(line1);
240 | 				line1 = line3;
241 | 			}
242 | 
243 | 			line2 = (char *)malloc(strlen(line1) - len + 1);
244 | 			if (line2 == NULL) {
245 | 				fprintf(stderr, "Fatal: malloc failed (%u bytes).\n", strlen(line1) - len + 1);
246 | 				goto EXIT;
247 | 			}
248 | 			strcpy(line2, line1 + len);
249 | 
250 | 			for (i = 0; i < len; i++) {
251 | 				if (line1[i] == ';') {
252 | 					i = len;
253 | 					break;
254 | 				} else if (line1[i] != '\r' && line1[i] != '\n' && line1[i] != '\t' && line1[i] != ' ') {
255 | 					break;
256 | 				}
257 | 			}
258 | 			if (i < len) {
259 | 				line1[len] = '\0';
260 | 				line3 = (char *)malloc(len + 20);
261 | 				if (line3 == NULL) {
262 | 					fprintf(stderr, "Fatal: malloc failed (%u bytes).\n", len + 20);
263 | 					goto EXIT;
264 | 				}
265 | 				sprintf(line3, "(display %s)(newline)", line1);
266 | 
267 | 				ret = scheme_load_string(line3);
268 | 				free(line3);
269 | 			}
270 | 
271 | 			strcpy(line1, line2);
272 | 			free(line2);
273 | 		} while (strlen(line1) > 0 && ret <= 0);
274 | 
275 | 		free(line1);
276 | 	}
277 | 
278 | EXIT:
279 | 	replxx_end(replxx);
280 | 	free(examples);
281 | 	free(examples_base);
282 | 	scheme_deinit();
283 | #ifdef _WIN32
284 | 	WSACleanup();
285 | #endif
286 | 
287 | 	return ret;
288 | }
289 | 


--------------------------------------------------------------------------------
/ext/re/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | Copyright 1992, 1993, 1994, 1997 Henry Spencer.  All rights reserved.
 2 | This software is not subject to any license of the American Telephone
 3 | and Telegraph Company or of the Regents of the University of California.
 4 | 
 5 | Permission is granted to anyone to use this software for any purpose on
 6 | any computer system, and to alter it and redistribute it, subject
 7 | to the following restrictions:
 8 | 
 9 | 1. The author is not responsible for the consequences of use of this
10 |    software, no matter how awful, even if they arise from flaws in it.
11 | 
12 | 2. The origin of this software must not be misrepresented, either by
13 |    explicit claim or by omission.  Since few users ever read sources,
14 |    credits must appear in the documentation.
15 | 
16 | 3. Altered versions must be plainly marked as such, and must not be
17 |    misrepresented as being the original software.  Since few users
18 |    ever read sources, credits must appear in the documentation.
19 | 
20 | 4. This notice may not be removed or altered.
21 | 


--------------------------------------------------------------------------------
/ext/re/Makefile:
--------------------------------------------------------------------------------
  1 | # You probably want to take -DREDEBUG out of CFLAGS, and put something like
  2 | # -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of
  3 | # internal assertion checking and some debugging facilities).
  4 | # Put -Dconst= in for a pre-ANSI compiler.
  5 | # Do not take -DPOSIX_MISTAKE out.
  6 | # REGCFLAGS isn't important to you (it's for my use in some special contexts).
  7 | CFLAGS=-I. -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
  8 | 
  9 | # If you have a pre-ANSI compiler, put -o into MKHFLAGS.  If you want
 10 | # the Berkeley __P macro, put -b in.
 11 | MKHFLAGS=
 12 | 
 13 | # Flags for linking but not compiling, if any.
 14 | LDFLAGS=
 15 | 
 16 | # Extra libraries for linking, if any.
 17 | LIBS=
 18 | 
 19 | # Internal stuff, should not need changing.
 20 | OBJPRODN=regcomp.o regexec.o regerror.o regfree.o
 21 | OBJS=$(OBJPRODN) split.o debug.o main.o
 22 | H=cclass.h cname.h regex2.h utils.h
 23 | REGSRC=regcomp.c regerror.c regexec.c regfree.c
 24 | ALLSRC=$(REGSRC) engine.c debug.c main.c split.c
 25 | 
 26 | # Stuff that matters only if you're trying to lint the package.
 27 | LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG
 28 | LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c main.c
 29 | JUNKLINT=possible pointer alignment|null effect
 30 | 
 31 | # arrangements to build forward-reference header files
 32 | .SUFFIXES:	.ih .h
 33 | .c.ih:
 34 | 	sh ./mkh $(MKHFLAGS) -p $< >$@
 35 | 
 36 | default:	r
 37 | 
 38 | lib:	purge $(OBJPRODN)
 39 | 	rm -f libregex.a
 40 | 	ar crv libregex.a $(OBJPRODN)
 41 | 
 42 | purge:
 43 | 	rm -f *.o
 44 | 
 45 | # stuff to build regex.h
 46 | REGEXH=regex.h
 47 | REGEXHSRC=regex2.h $(REGSRC)
 48 | $(REGEXH):	$(REGEXHSRC) mkh
 49 | 	sh ./mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.tmp
 50 | 	cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h
 51 | 	rm -f regex.tmp
 52 | 
 53 | # dependencies
 54 | $(OBJPRODN) debug.o:	utils.h regex.h regex2.h
 55 | regcomp.o:	cclass.h cname.h regcomp.ih
 56 | regexec.o:	engine.c engine.ih
 57 | regerror.o:	regerror.ih
 58 | debug.o:	debug.ih
 59 | main.o:	main.ih
 60 | 
 61 | # tester
 62 | re:	$(OBJS)
 63 | 	$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
 64 | 
 65 | # regression test
 66 | r:	re tests
 67 | 	./re <tests
 68 | 	./re -el <tests
 69 | 	./re -er <tests
 70 | 
 71 | # 57 variants, and other stuff, for development use -- not useful to you
 72 | ra:	./re tests
 73 | 	-./re <tests
 74 | 	-./re -el <tests
 75 | 	-./re -er <tests
 76 | 
 77 | rx:	./re tests
 78 | 	./re -x <tests
 79 | 	./re -x -el <tests
 80 | 	./re -x -er <tests
 81 | 
 82 | t:	./re tests
 83 | 	-time ./re <tests
 84 | 	-time ./re -cs <tests
 85 | 	-time ./re -el <tests
 86 | 	-time ./re -cs -el <tests
 87 | 
 88 | l:	$(LINTC)
 89 | 	lint $(LINTFLAGS) -h $(LINTC) 2>&1 | egrep -v '$(JUNKLINT)' | tee lint
 90 | 
 91 | fullprint:
 92 | 	ti README WHATSNEW notes todo | list
 93 | 	ti *.h | list
 94 | 	list *.c
 95 | 	list regex.3 regex.7
 96 | 
 97 | print:
 98 | 	ti README WHATSNEW notes todo | list
 99 | 	ti *.h | list
100 | 	list reg*.c engine.c
101 | 
102 | 
103 | mf.tmp:	Makefile
104 | 	sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@
105 | 
106 | DTRH=cclass.h cname.h regex2.h utils.h
107 | PRE=COPYRIGHT README WHATSNEW
108 | POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch]
109 | FILES=$(PRE) Makefile $(POST)
110 | DTR=$(PRE) Makefile=mf.tmp $(POST)
111 | dtr:	$(FILES) mf.tmp
112 | 	makedtr $(DTR) >$@
113 | 	rm mf.tmp
114 | 
115 | cio:	$(FILES)
116 | 	cio $(FILES)
117 | 
118 | rdf:	$(FILES)
119 | 	rcsdiff -c $(FILES) 2>&1 | p
120 | 
121 | # various forms of cleanup
122 | tidy:
123 | 	rm -f junk* core core.* *.core dtr *.tmp lint
124 | 
125 | clean:	tidy
126 | 	rm -f *.o *.s *.ih re libregex.a
127 | 
128 | # don't do this one unless you know what you're doing
129 | spotless:	clean
130 | 	rm -f mkh regex.h
131 | 


--------------------------------------------------------------------------------
/ext/re/README:
--------------------------------------------------------------------------------
 1 | alpha3.8 release.
 2 | Tue Aug 10 15:51:48 EDT 1999
 3 | henry@spsystems.net  (formerly henry@zoo.toronto.edu)
 4 | 
 5 | See WHATSNEW for change listing.
 6 | 
 7 | installation notes:
 8 | --------
 9 | Read the comments at the beginning of Makefile before running.
10 | 
11 | Utils.h contains some things that just might have to be modified on
12 | some systems, as well as a nested include (ugh) of <assert.h>.
13 | 
14 | The "fake" directory contains quick-and-dirty fakes for some header
15 | files and routines that old systems may not have.  Note also that
16 | -DUSEBCOPY will make utils.h substitute bcopy() for memmove().
17 | 
18 | After that, "make r" will build regcomp.o, regexec.o, regfree.o,
19 | and regerror.o (the actual routines), bundle them together into a test
20 | program, and run regression tests on them.  No output is good output.
21 | 
22 | "make lib" builds just the .o files for the actual routines (when
23 | you're happy with testing and have adjusted CFLAGS for production),
24 | and puts them together into libregex.a.  You can pick up either the
25 | library or *.o ("make lib" makes sure there are no other .o files left
26 | around to confuse things).
27 | 
28 | Main.c, debug.c, split.c are used for regression testing but are not part
29 | of the RE routines themselves.
30 | 
31 | Regex.h goes in /usr/include.  All other .h files are internal only.
32 | --------
33 | 


--------------------------------------------------------------------------------
/ext/re/WHATSNEW:
--------------------------------------------------------------------------------
  1 | New in alpha3.8p1: Bug fix for integer overflow in regcomp for excessively
  2 | long pattern strings. CERT Vulnerability Note VU#695940. Found by Guido
  3 | Vranken. - Gary Houston, 25 February 2015.
  4 | 
  5 | New in alpha3.8:  Bug fix for signed/unsigned mixup, found and fixed
  6 | by the FreeBSD folks.
  7 | 
  8 | New in alpha3.7:  A bit of cleanup aimed at maximizing portability,
  9 | possibly at slight cost in efficiency.  "ul" suffixes and "unsigned long"
 10 | no longer appear, in particular.
 11 | 
 12 | New in alpha3.6:  A couple more portability glitches fixed.
 13 | 
 14 | New in alpha3.5:  Active development of this code has been stopped --
 15 | I'm working on a complete reimplementation -- but folks have found some
 16 | minor portability glitches and the like, hence this release to fix them.
 17 | One penalty:  slightly reduced compatibility with old compilers, because
 18 | the ANSI C `unsigned long' type and `ul' constant suffix are used in a
 19 | few places (I could avoid this but it would be considerably more work).
 20 | 
 21 | New in alpha3.4:  The complex bug alluded to below has been fixed (in a
 22 | slightly kludgey temporary way that may hurt efficiency a bit; this is
 23 | another "get it out the door for 4.4" release).  The tests at the end of
 24 | the tests file have accordingly been uncommented.  The primary sign of
 25 | the bug was that something like a?b matching ab matched b rather than ab.
 26 | (The bug was essentially specific to this exact situation, else it would
 27 | have shown up earlier.)
 28 | 
 29 | New in alpha3.3:  The definition of word boundaries has been altered
 30 | slightly, to more closely match the usual programming notion that "_"
 31 | is an alphabetic.  Stuff used for pre-ANSI systems is now in a subdir,
 32 | and the makefile no longer alludes to it in mysterious ways.  The
 33 | makefile has generally been cleaned up some.  Fixes have been made
 34 | (again!) so that the regression test will run without -DREDEBUG, at
 35 | the cost of weaker checking.  A workaround for a bug in some folks'
 36 | <assert.h> has been added.  And some more things have been added to
 37 | tests, including a couple right at the end which are commented out
 38 | because the code currently flunks them (complex bug; fix coming).
 39 | Plus the usual minor cleanup.
 40 | 
 41 | New in alpha3.2:  Assorted bits of cleanup and portability improvement
 42 | (the development base is now a BSDI system using GCC instead of an ancient
 43 | Sun system, and the newer compiler exposed some glitches).  Fix for a
 44 | serious bug that affected REs using many [] (including REG_ICASE REs
 45 | because of the way they are implemented), *sometimes*, depending on
 46 | memory-allocation patterns.  The header-file prototypes no longer name
 47 | the parameters, avoiding possible name conflicts.  The possibility that
 48 | some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is
 49 | now handled gracefully.  "uchar" is no longer used as an internal type
 50 | name (too many people have the same idea).  Still the same old lousy
 51 | performance, alas.
 52 | 
 53 | New in alpha3.1:  Basically nothing, this release is just a bookkeeping
 54 | convenience.  Stay tuned.
 55 | 
 56 | New in alpha3.0:  Performance is no better, alas, but some fixes have been
 57 | made and some functionality has been added.  (This is basically the "get
 58 | it out the door in time for 4.4" release.)  One bug fix:  regfree() didn't
 59 | free the main internal structure (how embarrassing).  It is now possible
 60 | to put NULs in either the RE or the target string, using (resp.) a new
 61 | REG_PEND flag and the old REG_STARTEND flag.  The REG_NOSPEC flag to
 62 | regcomp() makes all characters ordinary, so you can match a literal
 63 | string easily (this will become more useful when performance improves!).
 64 | There are now primitives to match beginnings and ends of words, although
 65 | the syntax is disgusting and so is the implementation.  The REG_ATOI
 66 | debugging interface has changed a bit.  And there has been considerable
 67 | internal cleanup of various kinds.
 68 | 
 69 | New in alpha2.3:  Split change list out of README, and moved flags notes
 70 | into Makefile.  Macro-ized the name of regex(7) in regex(3), since it has
 71 | to change for 4.4BSD.  Cleanup work in engine.c, and some new regression
 72 | tests to catch tricky cases thereof.
 73 | 
 74 | New in alpha2.2:  Out-of-date manpages updated.  Regerror() acquires two
 75 | small extensions -- REG_ITOA and REG_ATOI -- which avoid debugging kludges
 76 | in my own test program and might be useful to others for similar purposes.
 77 | The regression test will now compile (and run) without REDEBUG.  The
 78 | BRE \$ bug is fixed.  Most uses of "uchar" are gone; it's all chars now.
 79 | Char/uchar parameters are now written int/unsigned, to avoid possible
 80 | portability problems with unpromoted parameters.  Some unsigned casts have
 81 | been introduced to minimize portability problems with shifting into sign
 82 | bits.
 83 | 
 84 | New in alpha2.1:  Lots of little stuff, cleanup and fixes.  The one big
 85 | thing is that regex.h is now generated, using mkh, rather than being
 86 | supplied in the distribution; due to circularities in dependencies,
 87 | you have to build regex.h explicitly by "make h".  The two known bugs
 88 | have been fixed (and the regression test now checks for them), as has a
 89 | problem with assertions not being suppressed in the absence of REDEBUG.
 90 | No performance work yet.
 91 | 
 92 | New in alpha2:  Backslash-anything is an ordinary character, not an
 93 | error (except, of course, for the handful of backslashed metacharacters
 94 | in BREs), which should reduce script breakage.  The regression test
 95 | checks *where* null strings are supposed to match, and has generally
 96 | been tightened up somewhat.  Small bug fixes in parameter passing (not
 97 | harmful, but technically errors) and some other areas.  Debugging
 98 | invoked by defining REDEBUG rather than not defining NDEBUG.
 99 | 
100 | New in alpha+3:  full prototyping for internal routines, using a little
101 | helper program, mkh, which extracts prototypes given in stylized comments.
102 | More minor cleanup.  Buglet fix:  it's CHAR_BIT, not CHAR_BITS.  Simple
103 | pre-screening of input when a literal string is known to be part of the
104 | RE; this does wonders for performance.
105 | 
106 | New in alpha+2:  minor bits of cleanup.  Notably, the number "32" for the
107 | word width isn't hardwired into regexec.c any more, the public header
108 | file prototypes the functions if __STDC__ is defined, and some small typos
109 | in the manpages have been fixed.
110 | 
111 | New in alpha+1:  improvements to the manual pages, and an important
112 | extension, the REG_STARTEND option to regexec().
113 | 


--------------------------------------------------------------------------------
/ext/re/cclass.h:
--------------------------------------------------------------------------------
 1 | /* character-class table */
 2 | static struct cclass {
 3 | 	char *name;
 4 | 	char *chars;
 5 | 	char *multis;
 6 | } cclasses[] = {
 7 | 	"alnum",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
 8 | 0123456789",				"",
 9 | 	"alpha",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
10 | 					"",
11 | 	"blank",	" \t",		"",
12 | 	"cntrl",	"\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
13 | \25\26\27\30\31\32\33\34\35\36\37\177",	"",
14 | 	"digit",	"0123456789",	"",
15 | 	"graph",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
16 | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
17 | 					"",
18 | 	"lower",	"abcdefghijklmnopqrstuvwxyz",
19 | 					"",
20 | 	"print",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
21 | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
22 | 					"",
23 | 	"punct",	"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
24 | 					"",
25 | 	"space",	"\t\n\v\f\r ",	"",
26 | 	"upper",	"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
27 | 					"",
28 | 	"xdigit",	"0123456789ABCDEFabcdef",
29 | 					"",
30 | 	NULL,		0,		""
31 | };
32 | 


--------------------------------------------------------------------------------
/ext/re/cname.h:
--------------------------------------------------------------------------------
  1 | /* character-name table */
  2 | static struct cname {
  3 | 	char *name;
  4 | 	char code;
  5 | } cnames[] = {
  6 | 	"NUL",	'\0',
  7 | 	"SOH",	'\001',
  8 | 	"STX",	'\002',
  9 | 	"ETX",	'\003',
 10 | 	"EOT",	'\004',
 11 | 	"ENQ",	'\005',
 12 | 	"ACK",	'\006',
 13 | 	"BEL",	'\007',
 14 | 	"alert",	'\007',
 15 | 	"BS",		'\010',
 16 | 	"backspace",	'\b',
 17 | 	"HT",		'\011',
 18 | 	"tab",		'\t',
 19 | 	"LF",		'\012',
 20 | 	"newline",	'\n',
 21 | 	"VT",		'\013',
 22 | 	"vertical-tab",	'\v',
 23 | 	"FF",		'\014',
 24 | 	"form-feed",	'\f',
 25 | 	"CR",		'\015',
 26 | 	"carriage-return",	'\r',
 27 | 	"SO",	'\016',
 28 | 	"SI",	'\017',
 29 | 	"DLE",	'\020',
 30 | 	"DC1",	'\021',
 31 | 	"DC2",	'\022',
 32 | 	"DC3",	'\023',
 33 | 	"DC4",	'\024',
 34 | 	"NAK",	'\025',
 35 | 	"SYN",	'\026',
 36 | 	"ETB",	'\027',
 37 | 	"CAN",	'\030',
 38 | 	"EM",	'\031',
 39 | 	"SUB",	'\032',
 40 | 	"ESC",	'\033',
 41 | 	"IS4",	'\034',
 42 | 	"FS",	'\034',
 43 | 	"IS3",	'\035',
 44 | 	"GS",	'\035',
 45 | 	"IS2",	'\036',
 46 | 	"RS",	'\036',
 47 | 	"IS1",	'\037',
 48 | 	"US",	'\037',
 49 | 	"space",		' ',
 50 | 	"exclamation-mark",	'!',
 51 | 	"quotation-mark",	'"',
 52 | 	"number-sign",		'#',
 53 | 	"dollar-sign",		'$',
 54 | 	"percent-sign",		'%',
 55 | 	"ampersand",		'&',
 56 | 	"apostrophe",		'\'',
 57 | 	"left-parenthesis",	'(',
 58 | 	"right-parenthesis",	')',
 59 | 	"asterisk",	'*',
 60 | 	"plus-sign",	'+',
 61 | 	"comma",	',',
 62 | 	"hyphen",	'-',
 63 | 	"hyphen-minus",	'-',
 64 | 	"period",	'.',
 65 | 	"full-stop",	'.',
 66 | 	"slash",	'/',
 67 | 	"solidus",	'/',
 68 | 	"zero",		'0',
 69 | 	"one",		'1',
 70 | 	"two",		'2',
 71 | 	"three",	'3',
 72 | 	"four",		'4',
 73 | 	"five",		'5',
 74 | 	"six",		'6',
 75 | 	"seven",	'7',
 76 | 	"eight",	'8',
 77 | 	"nine",		'9',
 78 | 	"colon",	':',
 79 | 	"semicolon",	';',
 80 | 	"less-than-sign",	'<',
 81 | 	"equals-sign",		'=',
 82 | 	"greater-than-sign",	'>',
 83 | 	"question-mark",	'?',
 84 | 	"commercial-at",	'@',
 85 | 	"left-square-bracket",	'[',
 86 | 	"backslash",		'\\',
 87 | 	"reverse-solidus",	'\\',
 88 | 	"right-square-bracket",	']',
 89 | 	"circumflex",		'^',
 90 | 	"circumflex-accent",	'^',
 91 | 	"underscore",		'_',
 92 | 	"low-line",		'_',
 93 | 	"grave-accent",		'`',
 94 | 	"left-brace",		'{',
 95 | 	"left-curly-bracket",	'{',
 96 | 	"vertical-line",	'|',
 97 | 	"right-brace",		'}',
 98 | 	"right-curly-bracket",	'}',
 99 | 	"tilde",		'~',
100 | 	"DEL",	'\177',
101 | 	NULL,	0,
102 | };
103 | 


--------------------------------------------------------------------------------
/ext/re/debug.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | #include <ctype.h>
  4 | #include <limits.h>
  5 | #include <stdlib.h>
  6 | #include <sys/types.h>
  7 | #include <regex.h>
  8 | 
  9 | #include "utils.h"
 10 | #include "regex2.h"
 11 | #include "debug.ih"
 12 | 
 13 | /*
 14 |  - regprint - print a regexp for debugging
 15 |  == void regprint(regex_t *r, FILE *d);
 16 |  */
 17 | void
 18 | regprint(r, d)
 19 | regex_t *r;
 20 | FILE *d;
 21 | {
 22 | 	register struct re_guts *g = r->re_g;
 23 | 	register int i;
 24 | 	register int c;
 25 | 	register int last;
 26 | 	int nincat[NC];
 27 | 
 28 | 	fprintf(d, "%ld states, %d categories", (long)g->nstates,
 29 | 							g->ncategories);
 30 | 	fprintf(d, ", first %ld last %ld", (long)g->firststate,
 31 | 						(long)g->laststate);
 32 | 	if (g->iflags&USEBOL)
 33 | 		fprintf(d, ", USEBOL");
 34 | 	if (g->iflags&USEEOL)
 35 | 		fprintf(d, ", USEEOL");
 36 | 	if (g->iflags&BAD)
 37 | 		fprintf(d, ", BAD");
 38 | 	if (g->nsub > 0)
 39 | 		fprintf(d, ", nsub=%ld", (long)g->nsub);
 40 | 	if (g->must != NULL)
 41 | 		fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen,
 42 | 								g->must);
 43 | 	if (g->backrefs)
 44 | 		fprintf(d, ", backrefs");
 45 | 	if (g->nplus > 0)
 46 | 		fprintf(d, ", nplus %ld", (long)g->nplus);
 47 | 	fprintf(d, "\n");
 48 | 	s_print(g, d);
 49 | 	for (i = 0; i < g->ncategories; i++) {
 50 | 		nincat[i] = 0;
 51 | 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 52 | 			if (g->categories[c] == i)
 53 | 				nincat[i]++;
 54 | 	}
 55 | 	fprintf(d, "cc0#%d", nincat[0]);
 56 | 	for (i = 1; i < g->ncategories; i++)
 57 | 		if (nincat[i] == 1) {
 58 | 			for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 59 | 				if (g->categories[c] == i)
 60 | 					break;
 61 | 			fprintf(d, ", %d=%s", i, regchar(c));
 62 | 		}
 63 | 	fprintf(d, "\n");
 64 | 	for (i = 1; i < g->ncategories; i++)
 65 | 		if (nincat[i] != 1) {
 66 | 			fprintf(d, "cc%d\t", i);
 67 | 			last = -1;
 68 | 			for (c = CHAR_MIN; c <= CHAR_MAX+1; c++)	/* +1 does flush */
 69 | 				if (c <= CHAR_MAX && g->categories[c] == i) {
 70 | 					if (last < 0) {
 71 | 						fprintf(d, "%s", regchar(c));
 72 | 						last = c;
 73 | 					}
 74 | 				} else {
 75 | 					if (last >= 0) {
 76 | 						if (last != c-1)
 77 | 							fprintf(d, "-%s",
 78 | 								regchar(c-1));
 79 | 						last = -1;
 80 | 					}
 81 | 				}
 82 | 			fprintf(d, "\n");
 83 | 		}
 84 | }
 85 | 
 86 | /*
 87 |  - s_print - print the strip for debugging
 88 |  == static void s_print(register struct re_guts *g, FILE *d);
 89 |  */
 90 | static void
 91 | s_print(g, d)
 92 | register struct re_guts *g;
 93 | FILE *d;
 94 | {
 95 | 	register sop *s;
 96 | 	register cset *cs;
 97 | 	register int i;
 98 | 	register int done = 0;
 99 | 	register sop opnd;
100 | 	register int col = 0;
101 | 	register int last;
102 | 	register sopno offset = 2;
103 | #	define	GAP()	{	if (offset % 5 == 0) { \
104 | 					if (col > 40) { \
105 | 						fprintf(d, "\n\t"); \
106 | 						col = 0; \
107 | 					} else { \
108 | 						fprintf(d, " "); \
109 | 						col++; \
110 | 					} \
111 | 				} else \
112 | 					col++; \
113 | 				offset++; \
114 | 			}
115 | 
116 | 	if (OP(g->strip[0]) != OEND)
117 | 		fprintf(d, "missing initial OEND!\n");
118 | 	for (s = &g->strip[1]; !done; s++) {
119 | 		opnd = OPND(*s);
120 | 		switch (OP(*s)) {
121 | 		case OEND:
122 | 			fprintf(d, "\n");
123 | 			done = 1;
124 | 			break;
125 | 		case OCHAR:
126 | 			if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL)
127 | 				fprintf(d, "\\%c", (char)opnd);
128 | 			else
129 | 				fprintf(d, "%s", regchar((char)opnd));
130 | 			break;
131 | 		case OBOL:
132 | 			fprintf(d, "^");
133 | 			break;
134 | 		case OEOL:
135 | 			fprintf(d, "$");
136 | 			break;
137 | 		case OBOW:
138 | 			fprintf(d, "\\{");
139 | 			break;
140 | 		case OEOW:
141 | 			fprintf(d, "\\}");
142 | 			break;
143 | 		case OANY:
144 | 			fprintf(d, ".");
145 | 			break;
146 | 		case OANYOF:
147 | 			fprintf(d, "[(%ld)", (long)opnd);
148 | 			cs = &g->sets[opnd];
149 | 			last = -1;
150 | 			for (i = 0; i < g->csetsize+1; i++)	/* +1 flushes */
151 | 				if (CHIN(cs, i) && i < g->csetsize) {
152 | 					if (last < 0) {
153 | 						fprintf(d, "%s", regchar(i));
154 | 						last = i;
155 | 					}
156 | 				} else {
157 | 					if (last >= 0) {
158 | 						if (last != i-1)
159 | 							fprintf(d, "-%s",
160 | 								regchar(i-1));
161 | 						last = -1;
162 | 					}
163 | 				}
164 | 			fprintf(d, "]");
165 | 			break;
166 | 		case OBACK_:
167 | 			fprintf(d, "(\\<%ld>", (long)opnd);
168 | 			break;
169 | 		case O_BACK:
170 | 			fprintf(d, "<%ld>\\)", (long)opnd);
171 | 			break;
172 | 		case OPLUS_:
173 | 			fprintf(d, "(+");
174 | 			if (OP(*(s+opnd)) != O_PLUS)
175 | 				fprintf(d, "<%ld>", (long)opnd);
176 | 			break;
177 | 		case O_PLUS:
178 | 			if (OP(*(s-opnd)) != OPLUS_)
179 | 				fprintf(d, "<%ld>", (long)opnd);
180 | 			fprintf(d, "+)");
181 | 			break;
182 | 		case OQUEST_:
183 | 			fprintf(d, "(?");
184 | 			if (OP(*(s+opnd)) != O_QUEST)
185 | 				fprintf(d, "<%ld>", (long)opnd);
186 | 			break;
187 | 		case O_QUEST:
188 | 			if (OP(*(s-opnd)) != OQUEST_)
189 | 				fprintf(d, "<%ld>", (long)opnd);
190 | 			fprintf(d, "?)");
191 | 			break;
192 | 		case OLPAREN:
193 | 			fprintf(d, "((<%ld>", (long)opnd);
194 | 			break;
195 | 		case ORPAREN:
196 | 			fprintf(d, "<%ld>))", (long)opnd);
197 | 			break;
198 | 		case OCH_:
199 | 			fprintf(d, "<");
200 | 			if (OP(*(s+opnd)) != OOR2)
201 | 				fprintf(d, "<%ld>", (long)opnd);
202 | 			break;
203 | 		case OOR1:
204 | 			if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_)
205 | 				fprintf(d, "<%ld>", (long)opnd);
206 | 			fprintf(d, "|");
207 | 			break;
208 | 		case OOR2:
209 | 			fprintf(d, "|");
210 | 			if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH)
211 | 				fprintf(d, "<%ld>", (long)opnd);
212 | 			break;
213 | 		case O_CH:
214 | 			if (OP(*(s-opnd)) != OOR1)
215 | 				fprintf(d, "<%ld>", (long)opnd);
216 | 			fprintf(d, ">");
217 | 			break;
218 | 		default:
219 | 			fprintf(d, "!%d(%d)!", OP(*s), opnd);
220 | 			break;
221 | 		}
222 | 		if (!done)
223 | 			GAP();
224 | 	}
225 | }
226 | 
227 | /*
228 |  - regchar - make a character printable
229 |  == static char *regchar(int ch);
230 |  */
231 | static char *			/* -> representation */
232 | regchar(ch)
233 | int ch;
234 | {
235 | 	static char buf[10];
236 | 
237 | 	if (isprint(ch) || ch == ' ')
238 | 		sprintf(buf, "%c", ch);
239 | 	else
240 | 		sprintf(buf, "\\%o", ch);
241 | 	return(buf);
242 | }
243 | 


--------------------------------------------------------------------------------
/ext/re/engine.ih:
--------------------------------------------------------------------------------
 1 | /* ========= begin header generated by ./mkh ========= */
 2 | #ifdef __cplusplus
 3 | extern "C" {
 4 | #endif
 5 | 
 6 | /* === engine.c === */
 7 | static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
 8 | static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
 9 | static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
10 | static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
11 | static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
12 | static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft);
13 | #define	BOL	(OUT+1)
14 | #define	EOL	(BOL+1)
15 | #define	BOLEOL	(BOL+2)
16 | #define	NOTHING	(BOL+3)
17 | #define	BOW	(BOL+4)
18 | #define	EOW	(BOL+5)
19 | #define	CODEMAX	(BOL+5)		/* highest code used */
20 | #define	NONCHAR(c)	((c) > CHAR_MAX)
21 | #define	NNONCHAR	(CODEMAX-CHAR_MAX)
22 | #ifdef REDEBUG
23 | static void print(struct match *m, char *caption, states st, int ch, FILE *d);
24 | #endif
25 | #ifdef REDEBUG
26 | static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
27 | #endif
28 | #ifdef REDEBUG
29 | static char *pchar(int ch);
30 | #endif
31 | 
32 | #ifdef __cplusplus
33 | }
34 | #endif
35 | /* ========= end header generated by ./mkh ========= */
36 | 


--------------------------------------------------------------------------------
/ext/re/fake/limits.h:
--------------------------------------------------------------------------------
1 | #define	_POSIX2_RE_DUP_MAX	255
2 | #define	CHAR_MIN	(-128)
3 | #define	CHAR_MAX	127
4 | #define	CHAR_BIT	8
5 | 


--------------------------------------------------------------------------------
/ext/re/fake/memmove.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <string.h>
 3 | #include <sys/types.h>
 4 | 
 5 | /*
 6 |  - memmove - fake ANSI C routine
 7 |  */
 8 | char *
 9 | memmove(dst, src, count)
10 | char *dst;
11 | char *src;
12 | size_t count;
13 | {
14 | 	register char *s;
15 | 	register char *d;
16 | 	register size_t n;
17 | 
18 | 	if (dst > src)
19 | 		for (d = dst+count, s = src+count, n = count; n > 0; n--)
20 | 			*--d = *--s;
21 | 	else
22 | 		for (d = dst, s = src, n = count; n > 0; n--)
23 | 			*d++ = *s++;
24 | 
25 | 	return(dst);
26 | }
27 | 


--------------------------------------------------------------------------------
/ext/re/fake/stdlib.h:
--------------------------------------------------------------------------------
1 | extern char *malloc();
2 | extern char *realloc();
3 | 


--------------------------------------------------------------------------------
/ext/re/main.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | #include <sys/types.h>
  4 | #include <regex.h>
  5 | #include <assert.h>
  6 | 
  7 | #include "main.ih"
  8 | 
  9 | char *progname;
 10 | int debug = 0;
 11 | int line = 0;
 12 | int status = 0;
 13 | 
 14 | int copts = REG_EXTENDED;
 15 | int eopts = 0;
 16 | regoff_t startoff = 0;
 17 | regoff_t endoff = 0;
 18 | 
 19 | 
 20 | extern int split();
 21 | extern void regprint();
 22 | 
 23 | /*
 24 |  - main - do the simple case, hand off to regress() for regression
 25 |  */
 26 | main(argc, argv)
 27 | int argc;
 28 | char *argv[];
 29 | {
 30 | 	regex_t re;
 31 | #	define	NS	10
 32 | 	regmatch_t subs[NS];
 33 | 	char erbuf[100];
 34 | 	int err;
 35 | 	size_t len;
 36 | 	int c;
 37 | 	int errflg = 0;
 38 | 	register int i;
 39 | 	extern int optind;
 40 | 	extern char *optarg;
 41 | 
 42 | 	progname = argv[0];
 43 | 
 44 | 	while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
 45 | 		switch (c) {
 46 | 		case 'c':	/* compile options */
 47 | 			copts = options('c', optarg);
 48 | 			break;
 49 | 		case 'e':	/* execute options */
 50 | 			eopts = options('e', optarg);
 51 | 			break;
 52 | 		case 'S':	/* start offset */
 53 | 			startoff = (regoff_t)atoi(optarg);
 54 | 			break;
 55 | 		case 'E':	/* end offset */
 56 | 			endoff = (regoff_t)atoi(optarg);
 57 | 			break;
 58 | 		case 'x':	/* Debugging. */
 59 | 			debug++;
 60 | 			break;
 61 | 		case '?':
 62 | 		default:
 63 | 			errflg++;
 64 | 			break;
 65 | 		}
 66 | 	if (errflg) {
 67 | 		fprintf(stderr, "usage: %s ", progname);
 68 | 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
 69 | 		exit(2);
 70 | 	}
 71 | 
 72 | 	if (optind >= argc) {
 73 | 		regress(stdin);
 74 | 		exit(status);
 75 | 	}
 76 | 
 77 | 	err = regcomp(&re, argv[optind++], copts);
 78 | 	if (err) {
 79 | 		len = regerror(err, &re, erbuf, sizeof(erbuf));
 80 | 		fprintf(stderr, "error %s, %d/%d `%s'\n",
 81 | 			eprint(err), len, sizeof(erbuf), erbuf);
 82 | 		exit(status);
 83 | 	}
 84 | 	regprint(&re, stdout);	
 85 | 
 86 | 	if (optind >= argc) {
 87 | 		regfree(&re);
 88 | 		exit(status);
 89 | 	}
 90 | 
 91 | 	if (eopts&REG_STARTEND) {
 92 | 		subs[0].rm_so = startoff;
 93 | 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
 94 | 	}
 95 | 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
 96 | 	if (err) {
 97 | 		len = regerror(err, &re, erbuf, sizeof(erbuf));
 98 | 		fprintf(stderr, "error %s, %d/%d `%s'\n",
 99 | 			eprint(err), len, sizeof(erbuf), erbuf);
100 | 		exit(status);
101 | 	}
102 | 	if (!(copts&REG_NOSUB)) {
103 | 		len = (int)(subs[0].rm_eo - subs[0].rm_so);
104 | 		if (subs[0].rm_so != -1) {
105 | 			if (len != 0)
106 | 				printf("match `%.*s'\n", len,
107 | 					argv[optind] + subs[0].rm_so);
108 | 			else
109 | 				printf("match `'@%.1s\n",
110 | 					argv[optind] + subs[0].rm_so);
111 | 		}
112 | 		for (i = 1; i < NS; i++)
113 | 			if (subs[i].rm_so != -1)
114 | 				printf("(%d) `%.*s'\n", i,
115 | 					(int)(subs[i].rm_eo - subs[i].rm_so),
116 | 					argv[optind] + subs[i].rm_so);
117 | 	}
118 | 	exit(status);
119 | }
120 | 
121 | /*
122 |  - regress - main loop of regression test
123 |  == void regress(FILE *in);
124 |  */
125 | void
126 | regress(in)
127 | FILE *in;
128 | {
129 | 	char inbuf[1000];
130 | #	define	MAXF	10
131 | 	char *f[MAXF];
132 | 	int nf;
133 | 	int i;
134 | 	char erbuf[100];
135 | 	size_t ne;
136 | 	char *badpat = "invalid regular expression";
137 | #	define	SHORT	10
138 | 	char *bpname = "REG_BADPAT";
139 | 	regex_t re;
140 | 
141 | 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
142 | 		line++;
143 | 		if (inbuf[0] == '#' || inbuf[0] == '\n')
144 | 			continue;			/* NOTE CONTINUE */
145 | 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
146 | 		if (debug)
147 | 			fprintf(stdout, "%d:\n", line);
148 | 		nf = split(inbuf, f, MAXF, "\t\t");
149 | 		if (nf < 3) {
150 | 			fprintf(stderr, "bad input, line %d\n", line);
151 | 			exit(1);
152 | 		}
153 | 		for (i = 0; i < nf; i++)
154 | 			if (strcmp(f[i], "\"\"") == 0)
155 | 				f[i] = "";
156 | 		if (nf <= 3)
157 | 			f[3] = NULL;
158 | 		if (nf <= 4)
159 | 			f[4] = NULL;
160 | 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
161 | 		if (opt('&', f[1]))	/* try with either type of RE */
162 | 			try(f[0], f[1], f[2], f[3], f[4],
163 | 					options('c', f[1]) &~ REG_EXTENDED);
164 | 	}
165 | 
166 | 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
167 | 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
168 | 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
169 | 							erbuf, badpat);
170 | 		status = 1;
171 | 	}
172 | 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
173 | 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
174 | 						ne != strlen(badpat)+1) {
175 | 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
176 | 						erbuf, SHORT-1, badpat);
177 | 		status = 1;
178 | 	}
179 | 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
180 | 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
181 | 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
182 | 						erbuf, bpname);
183 | 		status = 1;
184 | 	}
185 | 	re.re_endp = bpname;
186 | 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
187 | 	if (atoi(erbuf) != (int)REG_BADPAT) {
188 | 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
189 | 						erbuf, (long)REG_BADPAT);
190 | 		status = 1;
191 | 	} else if (ne != strlen(erbuf)+1) {
192 | 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
193 | 						erbuf, (long)REG_BADPAT);
194 | 		status = 1;
195 | 	}
196 | }
197 | 
198 | /*
199 |  - try - try it, and report on problems
200 |  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
201 |  */
202 | void
203 | try(f0, f1, f2, f3, f4, opts)
204 | char *f0;
205 | char *f1;
206 | char *f2;
207 | char *f3;
208 | char *f4;
209 | int opts;			/* may not match f1 */
210 | {
211 | 	regex_t re;
212 | #	define	NSUBS	10
213 | 	regmatch_t subs[NSUBS];
214 | #	define	NSHOULD	15
215 | 	char *should[NSHOULD];
216 | 	int nshould;
217 | 	char erbuf[100];
218 | 	int err;
219 | 	int len;
220 | 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
221 | 	register int i;
222 | 	char *grump;
223 | 	char f0copy[1000];
224 | 	char f2copy[1000];
225 | 
226 | 	strcpy(f0copy, f0);
227 | 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
228 | 	fixstr(f0copy);
229 | 	err = regcomp(&re, f0copy, opts);
230 | 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
231 | 		/* unexpected error or wrong error */
232 | 		len = regerror(err, &re, erbuf, sizeof(erbuf));
233 | 		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
234 | 					line, type, eprint(err), len,
235 | 					sizeof(erbuf), erbuf);
236 | 		status = 1;
237 | 	} else if (err == 0 && opt('C', f1)) {
238 | 		/* unexpected success */
239 | 		fprintf(stderr, "%d: %s should have given REG_%s\n",
240 | 						line, type, f2);
241 | 		status = 1;
242 | 		err = 1;	/* so we won't try regexec */
243 | 	}
244 | 
245 | 	if (err != 0) {
246 | 		regfree(&re);
247 | 		return;
248 | 	}
249 | 
250 | 	strcpy(f2copy, f2);
251 | 	fixstr(f2copy);
252 | 
253 | 	if (options('e', f1)&REG_STARTEND) {
254 | 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
255 | 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
256 | 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
257 | 		subs[0].rm_eo = strchr(f2, ')') - f2;
258 | 	}
259 | 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
260 | 
261 | 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
262 | 		/* unexpected error or wrong error */
263 | 		len = regerror(err, &re, erbuf, sizeof(erbuf));
264 | 		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
265 | 					line, type, eprint(err), len,
266 | 					sizeof(erbuf), erbuf);
267 | 		status = 1;
268 | 	} else if (err != 0) {
269 | 		/* nothing more to check */
270 | 	} else if (f3 == NULL) {
271 | 		/* unexpected success */
272 | 		fprintf(stderr, "%d: %s exec should have failed\n",
273 | 						line, type);
274 | 		status = 1;
275 | 		err = 1;		/* just on principle */
276 | 	} else if (opts&REG_NOSUB) {
277 | 		/* nothing more to check */
278 | 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
279 | 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
280 | 		status = 1;
281 | 		err = 1;
282 | 	}
283 | 
284 | 	if (err != 0 || f4 == NULL) {
285 | 		regfree(&re);
286 | 		return;
287 | 	}
288 | 
289 | 	for (i = 1; i < NSHOULD; i++)
290 | 		should[i] = NULL;
291 | 	nshould = split(f4, should+1, NSHOULD-1, ",");
292 | 	if (nshould == 0) {
293 | 		nshould = 1;
294 | 		should[1] = "";
295 | 	}
296 | 	for (i = 1; i < NSUBS; i++) {
297 | 		grump = check(f2, subs[i], should[i]);
298 | 		if (grump != NULL) {
299 | 			fprintf(stderr, "%d: %s $%d %s\n", line,
300 | 							type, i, grump);
301 | 			status = 1;
302 | 			err = 1;
303 | 		}
304 | 	}
305 | 
306 | 	regfree(&re);
307 | }
308 | 
309 | /*
310 |  - options - pick options out of a regression-test string
311 |  == int options(int type, char *s);
312 |  */
313 | int
314 | options(type, s)
315 | int type;			/* 'c' compile, 'e' exec */
316 | char *s;
317 | {
318 | 	register char *p;
319 | 	register int o = (type == 'c') ? copts : eopts;
320 | 	register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
321 | 
322 | 	for (p = s; *p != '\0'; p++)
323 | 		if (strchr(legal, *p) != NULL)
324 | 			switch (*p) {
325 | 			case 'b':
326 | 				o &= ~REG_EXTENDED;
327 | 				break;
328 | 			case 'i':
329 | 				o |= REG_ICASE;
330 | 				break;
331 | 			case 's':
332 | 				o |= REG_NOSUB;
333 | 				break;
334 | 			case 'n':
335 | 				o |= REG_NEWLINE;
336 | 				break;
337 | 			case 'm':
338 | 				o &= ~REG_EXTENDED;
339 | 				o |= REG_NOSPEC;
340 | 				break;
341 | 			case 'p':
342 | 				o |= REG_PEND;
343 | 				break;
344 | 			case '^':
345 | 				o |= REG_NOTBOL;
346 | 				break;
347 | 			case '$':
348 | 				o |= REG_NOTEOL;
349 | 				break;
350 | 			case '#':
351 | 				o |= REG_STARTEND;
352 | 				break;
353 | 			case 't':	/* trace */
354 | 				o |= REG_TRACE;
355 | 				break;
356 | 			case 'l':	/* force long representation */
357 | 				o |= REG_LARGE;
358 | 				break;
359 | 			case 'r':	/* force backref use */
360 | 				o |= REG_BACKR;
361 | 				break;
362 | 			}
363 | 	return(o);
364 | }
365 | 
366 | /*
367 |  - opt - is a particular option in a regression string?
368 |  == int opt(int c, char *s);
369 |  */
370 | int				/* predicate */
371 | opt(c, s)
372 | int c;
373 | char *s;
374 | {
375 | 	return(strchr(s, c) != NULL);
376 | }
377 | 
378 | /*
379 |  - fixstr - transform magic characters in strings
380 |  == void fixstr(register char *p);
381 |  */
382 | void
383 | fixstr(p)
384 | register char *p;
385 | {
386 | 	if (p == NULL)
387 | 		return;
388 | 
389 | 	for (; *p != '\0'; p++)
390 | 		if (*p == 'N')
391 | 			*p = '\n';
392 | 		else if (*p == 'T')
393 | 			*p = '\t';
394 | 		else if (*p == 'S')
395 | 			*p = ' ';
396 | 		else if (*p == 'Z')
397 | 			*p = '\0';
398 | }
399 | 
400 | /*
401 |  - check - check a substring match
402 |  == char *check(char *str, regmatch_t sub, char *should);
403 |  */
404 | char *				/* NULL or complaint */
405 | check(str, sub, should)
406 | char *str;
407 | regmatch_t sub;
408 | char *should;
409 | {
410 | 	register int len;
411 | 	register int shlen;
412 | 	register char *p;
413 | 	static char grump[500];
414 | 	register char *at = NULL;
415 | 
416 | 	if (should != NULL && strcmp(should, "-") == 0)
417 | 		should = NULL;
418 | 	if (should != NULL && should[0] == '@') {
419 | 		at = should + 1;
420 | 		should = "";
421 | 	}
422 | 
423 | 	/* check rm_so and rm_eo for consistency */
424 | 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
425 | 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
426 | 				(sub.rm_so != -1 && sub.rm_so < 0) ||
427 | 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
428 | 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
429 | 							(long)sub.rm_eo);
430 | 		return(grump);
431 | 	}
432 | 
433 | 	/* check for no match */
434 | 	if (sub.rm_so == -1 && should == NULL)
435 | 		return(NULL);
436 | 	if (sub.rm_so == -1)
437 | 		return("did not match");
438 | 
439 | 	/* check for in range */
440 | 	if (sub.rm_eo > strlen(str)) {
441 | 		sprintf(grump, "start %ld end %ld, past end of string",
442 | 					(long)sub.rm_so, (long)sub.rm_eo);
443 | 		return(grump);
444 | 	}
445 | 
446 | 	len = (int)(sub.rm_eo - sub.rm_so);
447 | 	shlen = (int)strlen(should);
448 | 	p = str + sub.rm_so;
449 | 
450 | 	/* check for not supposed to match */
451 | 	if (should == NULL) {
452 | 		sprintf(grump, "matched `%.*s'", len, p);
453 | 		return(grump);
454 | 	}
455 | 
456 | 	/* check for wrong match */
457 | 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
458 | 		sprintf(grump, "matched `%.*s' instead", len, p);
459 | 		return(grump);
460 | 	}
461 | 	if (shlen > 0)
462 | 		return(NULL);
463 | 
464 | 	/* check null match in right place */
465 | 	if (at == NULL)
466 | 		return(NULL);
467 | 	shlen = strlen(at);
468 | 	if (shlen == 0)
469 | 		shlen = 1;	/* force check for end-of-string */
470 | 	if (strncmp(p, at, shlen) != 0) {
471 | 		sprintf(grump, "matched null at `%.20s'", p);
472 | 		return(grump);
473 | 	}
474 | 	return(NULL);
475 | }
476 | 
477 | /*
478 |  - eprint - convert error number to name
479 |  == static char *eprint(int err);
480 |  */
481 | static char *
482 | eprint(err)
483 | int err;
484 | {
485 | 	static char epbuf[100];
486 | 	size_t len;
487 | 
488 | 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
489 | 	assert(len <= sizeof(epbuf));
490 | 	return(epbuf);
491 | }
492 | 
493 | /*
494 |  - efind - convert error name to number
495 |  == static int efind(char *name);
496 |  */
497 | static int
498 | efind(name)
499 | char *name;
500 | {
501 | 	static char efbuf[100];
502 | 	size_t n;
503 | 	regex_t re;
504 | 
505 | 	sprintf(efbuf, "REG_%s", name);
506 | 	assert(strlen(efbuf) < sizeof(efbuf));
507 | 	re.re_endp = efbuf;
508 | 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
509 | 	return(atoi(efbuf));
510 | }
511 | 


--------------------------------------------------------------------------------
/ext/re/mkh:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | # mkh - pull headers out of C source
 3 | PATH=/bin:/usr/bin ; export PATH
 4 | 
 5 | # egrep pattern to pick out marked lines
 6 | egrep='^ =([ 	]|$)'
 7 | 
 8 | # Sed program to process marked lines into lines for the header file.
 9 | # The markers have already been removed.  Two things are done here:  removal
10 | # of backslashed newlines, and some fudging of comments.  The first is done
11 | # because -o needs to have prototypes on one line to strip them down.
12 | # Getting comments into the output is tricky; we turn C++-style // comments
13 | # into /* */ comments, after altering any existing */'s to avoid trouble.
14 | peel='	/\\$/N
15 | 	/\\\n[ 	]*/s///g
16 | 	/\/\//s;\*/;* /;g
17 | 	/\/\//s;//\(.*\);/*\1 */;'
18 | 
19 | for a
20 | do
21 | 	case "$a" in
22 | 	-o)	# old (pre-function-prototype) compiler
23 | 		# add code to comment out argument lists
24 | 		peel="$peel
25 | 			"'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1(/*\2*/);'
26 | 		shift
27 | 		;;
28 | 	-b)	# funny Berkeley __P macro
29 | 		peel="$peel
30 | 			"'/^\([^#\/][^\/]*[a-zA-Z0-9_)]\)(\(.*\))/s;;\1 __P((\2));'
31 | 		shift
32 | 		;;
33 | 	-s)	# compiler doesn't like `static foo();'
34 | 		# add code to get rid of the `static'
35 | 		peel="$peel
36 | 			"'/^static[ 	][^\/]*[a-zA-Z0-9_)](.*)/s;static.;;'
37 | 		shift
38 | 		;;
39 | 	-p)	# private declarations
40 | 		egrep='^ ==([ 	]|$)'
41 | 		shift
42 | 		;;
43 | 	-i)	# wrap in #ifndef, argument is name
44 | 		ifndef="$2"
45 | 		shift ; shift
46 | 		;;
47 | 	*)	break
48 | 		;;
49 | 	esac
50 | done
51 | 
52 | if test " $ifndef" != " "
53 | then
54 | 	echo "#ifndef $ifndef"
55 | 	echo "#define	$ifndef	/* never again */"
56 | fi
57 | echo "/* ========= begin header generated by $0 ========= */"
58 | echo '#ifdef __cplusplus'
59 | echo 'extern "C" {'
60 | echo '#endif'
61 | for f
62 | do
63 | 	echo
64 | 	echo "/* === $f === */"
65 | 	egrep "$egrep" $f | sed 's/^ ==*[ 	]//;s/^ ==*$//' | sed "$peel"
66 | 	echo
67 | done
68 | echo '#ifdef __cplusplus'
69 | echo '}'
70 | echo '#endif'
71 | echo "/* ========= end header generated by $0 ========= */"
72 | if test " $ifndef" != " "
73 | then
74 | 	echo "#endif"
75 | fi
76 | exit 0
77 | 


--------------------------------------------------------------------------------
/ext/re/msvcbuild.bat:
--------------------------------------------------------------------------------
 1 | @setlocal
 2 | @set SCHEME_H_DIR=..\..\src
 3 | @set MSCOMPILE=cl /nologo /O2 /W3 /c /D_CRT_SECURE_NO_DEPRECATE /I%SCHEME_H_DIR%
 4 | @set MSLIB=lib /nologo
 5 | @set REGSRC=re.c regcomp.c regexec.c regerror.c regfree.c
 6 | @set OBJPRODN=re.obj regcomp.obj regexec.obj regerror.obj regfree.obj
 7 | 
 8 | @if not exist lib\ (
 9 |   mkdir lib
10 | )
11 | 
12 | %MSCOMPILE% /MT %REGSRC%
13 | %MSLIB% /out:lib\re.lib %OBJPRODN%
14 | 
15 | del %OBJPRODN%
16 | 


--------------------------------------------------------------------------------
/ext/re/re.c:
--------------------------------------------------------------------------------
 1 | /* re.c */
 2 | /* Henry Spencer's implementation of Regular Expressions,
 3 |    used for MiniScheme */
 4 | /* Refurbished by Stephen Gildea */
 5 | #include <sys/types.h>
 6 | #include <stdlib.h>
 7 | #include "regex.h"
 8 | #include "miniscm.h"
 9 | 
10 | pointer foreign_re_match(pointer args)
11 | {
12 | 	pointer retval = F;
13 | 	int retcode;
14 | 	regex_t rt;
15 | 	pointer first_arg, second_arg;
16 | 	pointer third_arg = NIL;
17 | 	char *string;
18 | 	char *pattern;
19 | 	int num = 0;
20 | 
21 | 	if (!((args != NIL) && is_string((first_arg = car(args)))
22 | 		  && (args=cdr(args))
23 | 		  && is_pair(args) && is_string((second_arg = car(args))))) {
24 | 		return F;
25 | 	}
26 | 	pattern = strvalue(first_arg);
27 | 	string = strvalue(second_arg);
28 | 
29 | 	args = cdr(args);
30 | 	if (args != NIL) {
31 | 		if (!(is_pair(args) && is_vector((third_arg = car(args))))) {
32 | 			return F;
33 | 		} else {
34 | 			num = ivalue(third_arg);
35 | 		}
36 | 	}
37 | 
38 | 	if (regcomp(&rt, pattern, REG_EXTENDED) != 0) {
39 | 		return F;
40 | 	}
41 | 
42 | 	if (num == 0) {
43 | 		retcode = regexec(&rt, string, 0, 0, 0);
44 | 	} else {
45 | 		regmatch_t *pmatch = malloc((num + 1) * sizeof(regmatch_t));
46 | 		if (pmatch != 0) {
47 | 			retcode = regexec(&rt, string, num + 1, pmatch, 0);
48 | 			if (retcode == 0) {
49 | 				int i;
50 | 				for (i = 0; i < num; i++) {
51 | 					mark_x = mk_integer(pmatch[i].rm_so);
52 | 					mark_y = mk_integer(pmatch[i].rm_eo);
53 | 					set_vector_elem(third_arg, i, cons(mark_x, mark_y));
54 | 				}
55 | 			}
56 | 			free(pmatch);
57 | 		} else {
58 | 			retcode = -1;
59 | 		}
60 | 	}
61 | 
62 | 	if (retcode == 0) {
63 | 		retval = T;
64 | 	}
65 | 
66 | 	regfree(&rt);
67 | 
68 | 	return retval;
69 | }
70 | 
71 | void init_re(void)
72 | {
73 | 	scheme_register_foreign_func("re-match", foreign_re_match);
74 | }
75 | 


--------------------------------------------------------------------------------
/ext/re/regcomp.ih:
--------------------------------------------------------------------------------
 1 | /* ========= begin header generated by ./mkh ========= */
 2 | #ifdef __cplusplus
 3 | extern "C" {
 4 | #endif
 5 | 
 6 | /* === regcomp.c === */
 7 | static void p_ere(register struct parse *p, int stop);
 8 | static void p_ere_exp(register struct parse *p);
 9 | static void p_str(register struct parse *p);
10 | static void p_bre(register struct parse *p, register int end1, register int end2);
11 | static int p_simp_re(register struct parse *p, int starordinary);
12 | static int p_count(register struct parse *p);
13 | static void p_bracket(register struct parse *p);
14 | static void p_b_term(register struct parse *p, register cset *cs);
15 | static void p_b_cclass(register struct parse *p, register cset *cs);
16 | static void p_b_eclass(register struct parse *p, register cset *cs);
17 | static char p_b_symbol(register struct parse *p);
18 | static char p_b_coll_elem(register struct parse *p, int endc);
19 | static char othercase(int ch);
20 | static void bothcases(register struct parse *p, int ch);
21 | static void ordinary(register struct parse *p, register int ch);
22 | static void nonnewline(register struct parse *p);
23 | static void repeat(register struct parse *p, sopno start, int from, int to);
24 | static int seterr(register struct parse *p, int e);
25 | static cset *allocset(register struct parse *p);
26 | static void freeset(register struct parse *p, register cset *cs);
27 | static int freezeset(register struct parse *p, register cset *cs);
28 | static int firstch(register struct parse *p, register cset *cs);
29 | static int nch(register struct parse *p, register cset *cs);
30 | static void mcadd(register struct parse *p, register cset *cs, register char *cp);
31 | static void mcsub(register cset *cs, register char *cp);
32 | static int mcin(register cset *cs, register char *cp);
33 | static char *mcfind(register cset *cs, register char *cp);
34 | static void mcinvert(register struct parse *p, register cset *cs);
35 | static void mccase(register struct parse *p, register cset *cs);
36 | static int isinsets(register struct re_guts *g, int c);
37 | static int samesets(register struct re_guts *g, int c1, int c2);
38 | static void categorize(struct parse *p, register struct re_guts *g);
39 | static sopno dupl(register struct parse *p, sopno start, sopno finish);
40 | static void doemit(register struct parse *p, sop op, size_t opnd);
41 | static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
42 | static void dofwd(register struct parse *p, sopno pos, sop value);
43 | static void enlarge(register struct parse *p, sopno size);
44 | static void stripsnug(register struct parse *p, register struct re_guts *g);
45 | static void findmust(register struct parse *p, register struct re_guts *g);
46 | static sopno pluscount(register struct parse *p, register struct re_guts *g);
47 | 
48 | #ifdef __cplusplus
49 | }
50 | #endif
51 | /* ========= end header generated by ./mkh ========= */
52 | 


--------------------------------------------------------------------------------
/ext/re/regerror.c:
--------------------------------------------------------------------------------
  1 | #include <sys/types.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | #include <ctype.h>
  5 | #include <limits.h>
  6 | #include <stdlib.h>
  7 | #include "regex.h"
  8 | 
  9 | #include "utils.h"
 10 | #include "regerror.ih"
 11 | 
 12 | /*
 13 |  = #define	REG_OKAY	 0
 14 |  = #define	REG_NOMATCH	 1
 15 |  = #define	REG_BADPAT	 2
 16 |  = #define	REG_ECOLLATE	 3
 17 |  = #define	REG_ECTYPE	 4
 18 |  = #define	REG_EESCAPE	 5
 19 |  = #define	REG_ESUBREG	 6
 20 |  = #define	REG_EBRACK	 7
 21 |  = #define	REG_EPAREN	 8
 22 |  = #define	REG_EBRACE	 9
 23 |  = #define	REG_BADBR	10
 24 |  = #define	REG_ERANGE	11
 25 |  = #define	REG_ESPACE	12
 26 |  = #define	REG_BADRPT	13
 27 |  = #define	REG_EMPTY	14
 28 |  = #define	REG_ASSERT	15
 29 |  = #define	REG_INVARG	16
 30 |  = #define	REG_ATOI	255	// convert name to number (!)
 31 |  = #define	REG_ITOA	0400	// convert number to name (!)
 32 |  */
 33 | static struct rerr {
 34 | 	int code;
 35 | 	char *name;
 36 | 	char *explain;
 37 | } rerrs[] = {
 38 | 	REG_OKAY,	"REG_OKAY",	"no errors detected",
 39 | 	REG_NOMATCH,	"REG_NOMATCH",	"regexec() failed to match",
 40 | 	REG_BADPAT,	"REG_BADPAT",	"invalid regular expression",
 41 | 	REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element",
 42 | 	REG_ECTYPE,	"REG_ECTYPE",	"invalid character class",
 43 | 	REG_EESCAPE,	"REG_EESCAPE",	"trailing backslash (\\)",
 44 | 	REG_ESUBREG,	"REG_ESUBREG",	"invalid backreference number",
 45 | 	REG_EBRACK,	"REG_EBRACK",	"brackets ([ ]) not balanced",
 46 | 	REG_EPAREN,	"REG_EPAREN",	"parentheses not balanced",
 47 | 	REG_EBRACE,	"REG_EBRACE",	"braces not balanced",
 48 | 	REG_BADBR,	"REG_BADBR",	"invalid repetition count(s)",
 49 | 	REG_ERANGE,	"REG_ERANGE",	"invalid character range",
 50 | 	REG_ESPACE,	"REG_ESPACE",	"out of memory",
 51 | 	REG_BADRPT,	"REG_BADRPT",	"repetition-operator operand invalid",
 52 | 	REG_EMPTY,	"REG_EMPTY",	"empty (sub)expression",
 53 | 	REG_ASSERT,	"REG_ASSERT",	"\"can't happen\" -- you found a bug",
 54 | 	REG_INVARG,	"REG_INVARG",	"invalid argument to regex routine",
 55 | 	-1,		"",		"*** unknown regexp error code ***",
 56 | };
 57 | 
 58 | /*
 59 |  - regerror - the interface to error numbers
 60 |  = extern size_t regerror(int, const regex_t *, char *, size_t);
 61 |  */
 62 | /* ARGSUSED */
 63 | size_t
 64 | regerror(errcode, preg, errbuf, errbuf_size)
 65 | int errcode;
 66 | const regex_t *preg;
 67 | char *errbuf;
 68 | size_t errbuf_size;
 69 | {
 70 | 	register struct rerr *r;
 71 | 	register size_t len;
 72 | 	register int target = errcode &~ REG_ITOA;
 73 | 	register char *s;
 74 | 	char convbuf[50];
 75 | 
 76 | 	if (errcode == REG_ATOI)
 77 | 		s = regatoi(preg, convbuf);
 78 | 	else {
 79 | 		for (r = rerrs; r->code >= 0; r++)
 80 | 			if (r->code == target)
 81 | 				break;
 82 | 	
 83 | 		if (errcode&REG_ITOA) {
 84 | 			if (r->code >= 0)
 85 | 				(void) strcpy(convbuf, r->name);
 86 | 			else
 87 | 				sprintf(convbuf, "REG_0x%x", target);
 88 | 			assert(strlen(convbuf) < sizeof(convbuf));
 89 | 			s = convbuf;
 90 | 		} else
 91 | 			s = r->explain;
 92 | 	}
 93 | 
 94 | 	len = strlen(s) + 1;
 95 | 	if (errbuf_size > 0) {
 96 | 		if (errbuf_size > len)
 97 | 			(void) strcpy(errbuf, s);
 98 | 		else {
 99 | 			(void) strncpy(errbuf, s, errbuf_size-1);
100 | 			errbuf[errbuf_size-1] = '\0';
101 | 		}
102 | 	}
103 | 
104 | 	return(len);
105 | }
106 | 
107 | /*
108 |  - regatoi - internal routine to implement REG_ATOI
109 |  == static char *regatoi(const regex_t *preg, char *localbuf);
110 |  */
111 | static char *
112 | regatoi(preg, localbuf)
113 | const regex_t *preg;
114 | char *localbuf;
115 | {
116 | 	register struct rerr *r;
117 | 
118 | 	for (r = rerrs; r->code >= 0; r++)
119 | 		if (strcmp(r->name, preg->re_endp) == 0)
120 | 			break;
121 | 	if (r->code < 0)
122 | 		return("0");
123 | 
124 | 	sprintf(localbuf, "%d", r->code);
125 | 	return(localbuf);
126 | }
127 | 


--------------------------------------------------------------------------------
/ext/re/regerror.ih:
--------------------------------------------------------------------------------
 1 | /* ========= begin header generated by ./mkh ========= */
 2 | #ifdef __cplusplus
 3 | extern "C" {
 4 | #endif
 5 | 
 6 | /* === regerror.c === */
 7 | static char *regatoi(const regex_t *preg, char *localbuf);
 8 | 
 9 | #ifdef __cplusplus
10 | }
11 | #endif
12 | /* ========= end header generated by ./mkh ========= */
13 | 


--------------------------------------------------------------------------------
/ext/re/regex.3:
--------------------------------------------------------------------------------
  1 | .TH REGEX 3 "25 Sept 1997"
  2 | .BY "Henry Spencer"
  3 | .de ZR
  4 | .\" one other place knows this name:  the SEE ALSO section
  5 | .IR regex (7) \\$1
  6 | ..
  7 | .SH NAME
  8 | regcomp, regexec, regerror, regfree \- regular-expression library
  9 | .SH SYNOPSIS
 10 | .ft B
 11 | .\".na
 12 | #include <sys/types.h>
 13 | .br
 14 | #include <regex.h>
 15 | .HP 10
 16 | int regcomp(regex_t\ *preg, const\ char\ *pattern, int\ cflags);
 17 | .HP
 18 | int\ regexec(const\ regex_t\ *preg, const\ char\ *string,
 19 | size_t\ nmatch, regmatch_t\ pmatch[], int\ eflags);
 20 | .HP
 21 | size_t\ regerror(int\ errcode, const\ regex_t\ *preg,
 22 | char\ *errbuf, size_t\ errbuf_size);
 23 | .HP
 24 | void\ regfree(regex_t\ *preg);
 25 | .\".ad
 26 | .ft
 27 | .SH DESCRIPTION
 28 | These routines implement POSIX 1003.2 regular expressions (``RE''s);
 29 | see
 30 | .ZR .
 31 | .I Regcomp
 32 | compiles an RE written as a string into an internal form,
 33 | .I regexec
 34 | matches that internal form against a string and reports results,
 35 | .I regerror
 36 | transforms error codes from either into human-readable messages,
 37 | and
 38 | .I regfree
 39 | frees any dynamically-allocated storage used by the internal form
 40 | of an RE.
 41 | .PP
 42 | The header
 43 | .I <regex.h>
 44 | declares two structure types,
 45 | .I regex_t
 46 | and
 47 | .IR regmatch_t ,
 48 | the former for compiled internal forms and the latter for match reporting.
 49 | It also declares the four functions,
 50 | a type
 51 | .IR regoff_t ,
 52 | and a number of constants with names starting with ``REG_''.
 53 | .PP
 54 | .I Regcomp
 55 | compiles the regular expression contained in the
 56 | .I pattern
 57 | string,
 58 | subject to the flags in
 59 | .IR cflags ,
 60 | and places the results in the
 61 | .I regex_t
 62 | structure pointed to by
 63 | .IR preg .
 64 | .I Cflags
 65 | is the bitwise OR of zero or more of the following flags:
 66 | .IP REG_EXTENDED \w'REG_EXTENDED'u+2n
 67 | Compile modern (``extended'') REs,
 68 | rather than the obsolete (``basic'') REs that
 69 | are the default.
 70 | .IP REG_BASIC
 71 | This is a synonym for 0,
 72 | provided as a counterpart to REG_EXTENDED to improve readability.
 73 | This is an extension,
 74 | compatible with but not specified by POSIX 1003.2,
 75 | and should be used with
 76 | caution in software intended to be portable to other systems.
 77 | .IP REG_NOSPEC
 78 | Compile with recognition of all special characters turned off.
 79 | All characters are thus considered ordinary,
 80 | so the ``RE'' is a literal string.
 81 | This is an extension,
 82 | compatible with but not specified by POSIX 1003.2,
 83 | and should be used with
 84 | caution in software intended to be portable to other systems.
 85 | REG_EXTENDED and REG_NOSPEC may not be used
 86 | in the same call to
 87 | .IR regcomp .
 88 | .IP REG_ICASE
 89 | Compile for matching that ignores upper/lower case distinctions.
 90 | See
 91 | .ZR .
 92 | .IP REG_NOSUB
 93 | Compile for matching that need only report success or failure,
 94 | not what was matched.
 95 | .IP REG_NEWLINE
 96 | Compile for newline-sensitive matching.
 97 | By default, newline is a completely ordinary character with no special
 98 | meaning in either REs or strings.
 99 | With this flag,
100 | `[^' bracket expressions and `.' never match newline,
101 | a `^' anchor matches the null string after any newline in the string
102 | in addition to its normal function,
103 | and the `$' anchor matches the null string before any newline in the
104 | string in addition to its normal function.
105 | .IP REG_PEND
106 | The regular expression ends,
107 | not at the first NUL,
108 | but just before the character pointed to by the
109 | .I re_endp
110 | member of the structure pointed to by
111 | .IR preg .
112 | The
113 | .I re_endp
114 | member is of type
115 | .IR const\ char\ * .
116 | This flag permits inclusion of NULs in the RE;
117 | they are considered ordinary characters.
118 | This is an extension,
119 | compatible with but not specified by POSIX 1003.2,
120 | and should be used with
121 | caution in software intended to be portable to other systems.
122 | .PP
123 | When successful,
124 | .I regcomp
125 | returns 0 and fills in the structure pointed to by
126 | .IR preg .
127 | One member of that structure
128 | (other than
129 | .IR re_endp )
130 | is publicized:
131 | .IR re_nsub ,
132 | of type
133 | .IR size_t ,
134 | contains the number of parenthesized subexpressions within the RE
135 | (except that the value of this member is undefined if the
136 | REG_NOSUB flag was used).
137 | If
138 | .I regcomp
139 | fails, it returns a non-zero error code;
140 | see DIAGNOSTICS.
141 | .PP
142 | .I Regexec
143 | matches the compiled RE pointed to by
144 | .I preg
145 | against the
146 | .IR string ,
147 | subject to the flags in
148 | .IR eflags ,
149 | and reports results using
150 | .IR nmatch ,
151 | .IR pmatch ,
152 | and the returned value.
153 | The RE must have been compiled by a previous invocation of
154 | .IR regcomp .
155 | The compiled form is not altered during execution of
156 | .IR regexec ,
157 | so a single compiled RE can be used simultaneously by multiple threads.
158 | .PP
159 | By default,
160 | the NUL-terminated string pointed to by
161 | .I string
162 | is considered to be the text of an entire line,
163 | with the NUL indicating the end of the line.
164 | (That is,
165 | any other end-of-line marker is considered to have been removed
166 | and replaced by the NUL.)
167 | The
168 | .I eflags
169 | argument is the bitwise OR of zero or more of the following flags:
170 | .IP REG_NOTBOL \w'REG_STARTEND'u+2n
171 | The first character of
172 | the string
173 | is not the beginning of a line, so the `^' anchor should not match before it.
174 | This does not affect the behavior of newlines under REG_NEWLINE.
175 | .IP REG_NOTEOL
176 | The NUL terminating
177 | the string
178 | does not end a line, so the `$' anchor should not match before it.
179 | This does not affect the behavior of newlines under REG_NEWLINE.
180 | .IP REG_STARTEND
181 | The string is considered to start at
182 | \fIstring\fR\ + \fIpmatch\fR[0].\fIrm_so\fR
183 | and to have a terminating NUL located at
184 | \fIstring\fR\ + \fIpmatch\fR[0].\fIrm_eo\fR
185 | (there need not actually be a NUL at that location),
186 | regardless of the value of
187 | .IR nmatch .
188 | See below for the definition of
189 | .IR pmatch
190 | and
191 | .IR nmatch .
192 | This is an extension,
193 | compatible with but not specified by POSIX 1003.2,
194 | and should be used with
195 | caution in software intended to be portable to other systems.
196 | Note that a non-zero \fIrm_so\fR does not imply REG_NOTBOL;
197 | REG_STARTEND affects only the location of the string,
198 | not how it is matched.
199 | .PP
200 | See
201 | .ZR
202 | for a discussion of what is matched in situations where an RE or a
203 | portion thereof could match any of several substrings of
204 | .IR string .
205 | .PP
206 | Normally,
207 | .I regexec
208 | returns 0 for success and the non-zero code REG_NOMATCH for failure.
209 | Other non-zero error codes may be returned in exceptional situations;
210 | see DIAGNOSTICS.
211 | .PP
212 | If REG_NOSUB was specified in the compilation of the RE,
213 | or if
214 | .I nmatch
215 | is 0,
216 | .I regexec
217 | ignores the
218 | .I pmatch
219 | argument (but see below for the case where REG_STARTEND is specified).
220 | Otherwise,
221 | .I pmatch
222 | points to an array of
223 | .I nmatch
224 | structures of type
225 | .IR regmatch_t .
226 | Such a structure has at least the members
227 | .I rm_so
228 | and
229 | .IR rm_eo ,
230 | both of type
231 | .I regoff_t
232 | (a signed arithmetic type at least as large as an
233 | .I off_t
234 | and a
235 | .IR ssize_t ),
236 | containing respectively the offset of the first character of a substring
237 | and the offset of the first character after the end of the substring.
238 | Offsets are measured from the beginning of the
239 | .I string
240 | argument given to
241 | .IR regexec .
242 | An empty substring is denoted by equal offsets,
243 | both indicating the character following the empty substring.
244 | .PP
245 | The 0th member of the
246 | .I pmatch
247 | array is filled in to indicate what substring of
248 | .I string
249 | was matched by the entire RE.
250 | Remaining members report what substring was matched by parenthesized
251 | subexpressions within the RE;
252 | member
253 | .I i
254 | reports subexpression
255 | .IR i ,
256 | with subexpressions counted (starting at 1) by the order of their opening
257 | parentheses in the RE, left to right.
258 | Unused entries in the array\(emcorresponding either to subexpressions that
259 | did not participate in the match at all, or to subexpressions that do not
260 | exist in the RE (that is, \fIi\fR\ > \fIpreg\fR\->\fIre_nsub\fR)\(emhave both
261 | .I rm_so
262 | and
263 | .I rm_eo
264 | set to \-1.
265 | If a subexpression participated in the match several times,
266 | the reported substring is the last one it matched.
267 | (Note, as an example in particular, that when the RE `(b*)+' matches `bbb',
268 | the parenthesized subexpression matches the three `b's and then
269 | an infinite number of empty strings following the last `b',
270 | so the reported substring is one of the empties.)
271 | .PP
272 | If REG_STARTEND is specified,
273 | .I pmatch
274 | must point to at least one
275 | .I regmatch_t
276 | (even if
277 | .I nmatch
278 | is 0 or REG_NOSUB was specified),
279 | to hold the input offsets for REG_STARTEND.
280 | Use for output is still entirely controlled by
281 | .IR nmatch ;
282 | if
283 | .I nmatch
284 | is 0 or REG_NOSUB was specified,
285 | the value of
286 | .IR pmatch [0]
287 | will not be changed by a successful
288 | .IR regexec .
289 | .PP
290 | .I Regerror
291 | maps a non-zero
292 | .I errcode
293 | from either
294 | .I regcomp
295 | or
296 | .I regexec
297 | to a human-readable, printable message.
298 | If
299 | .I preg
300 | is non-NULL,
301 | the error code should have arisen from use of
302 | the
303 | .I regex_t
304 | pointed to by
305 | .IR preg ,
306 | and if the error code came from
307 | .IR regcomp ,
308 | it should have been the result from the most recent
309 | .I regcomp
310 | using that
311 | .IR regex_t .
312 | .RI ( Regerror
313 | may be able to supply a more detailed message using information
314 | from the
315 | .IR regex_t .)
316 | .I Regerror
317 | places the NUL-terminated message into the buffer pointed to by
318 | .IR errbuf ,
319 | limiting the length (including the NUL) to at most
320 | .I errbuf_size
321 | bytes.
322 | If the whole message won't fit,
323 | as much of it as will fit before the terminating NUL is supplied.
324 | In any case,
325 | the returned value is the size of buffer needed to hold the whole
326 | message (including terminating NUL).
327 | If
328 | .I errbuf_size
329 | is 0,
330 | .I errbuf
331 | is ignored but the return value is still correct.
332 | .PP
333 | If the
334 | .I errcode
335 | given to
336 | .I regerror
337 | is first ORed with REG_ITOA,
338 | the ``message'' that results is the printable name of the error code,
339 | e.g. ``REG_NOMATCH'',
340 | rather than an explanation thereof.
341 | If
342 | .I errcode
343 | is REG_ATOI,
344 | then
345 | .I preg
346 | shall be non-NULL and the
347 | .I re_endp
348 | member of the structure it points to
349 | must point to the printable name of an error code;
350 | in this case, the result in
351 | .I errbuf
352 | is the decimal digits of
353 | the numeric value of the error code
354 | (0 if the name is not recognized).
355 | REG_ITOA and REG_ATOI are intended primarily as debugging facilities;
356 | they are extensions,
357 | compatible with but not specified by POSIX 1003.2,
358 | and should be used with
359 | caution in software intended to be portable to other systems.
360 | Be warned also that they are considered experimental and changes are possible.
361 | .PP
362 | .I Regfree
363 | frees any dynamically-allocated storage associated with the compiled RE
364 | pointed to by
365 | .IR preg .
366 | The remaining
367 | .I regex_t
368 | is no longer a valid compiled RE
369 | and the effect of supplying it to
370 | .I regexec
371 | or
372 | .I regerror
373 | is undefined.
374 | .PP
375 | None of these functions references global variables except for tables
376 | of constants;
377 | all are safe for use from multiple threads if the arguments are safe.
378 | .SH IMPLEMENTATION CHOICES
379 | There are a number of decisions that 1003.2 leaves up to the implementor,
380 | either by explicitly saying ``undefined'' or by virtue of them being
381 | forbidden by the RE grammar.
382 | This implementation treats them as follows.
383 | .PP
384 | See
385 | .ZR
386 | for a discussion of the definition of case-independent matching.
387 | .PP
388 | There is no particular limit on the length of REs,
389 | except insofar as memory is limited.
390 | Memory usage is approximately linear in RE size, and largely insensitive
391 | to RE complexity, except for bounded repetitions.
392 | See BUGS for one short RE using them
393 | that will run almost any system out of memory.
394 | .PP
395 | A backslashed character other than one specifically given a magic meaning
396 | by 1003.2 (such magic meanings occur only in obsolete [``basic''] REs)
397 | is taken as an ordinary character.
398 | .PP
399 | Any unmatched [ is a REG_EBRACK error.
400 | .PP
401 | Equivalence classes cannot begin or end bracket-expression ranges.
402 | The endpoint of one range cannot begin another.
403 | .PP
404 | RE_DUP_MAX, the limit on repetition counts in bounded repetitions, is 255.
405 | .PP
406 | A repetition operator (?, *, +, or bounds) cannot follow another
407 | repetition operator.
408 | A repetition operator cannot begin an expression or subexpression
409 | or follow `^' or `|'.
410 | .PP
411 | `|' cannot appear first or last in a (sub)expression or after another `|',
412 | i.e. an operand of `|' cannot be an empty subexpression.
413 | An empty parenthesized subexpression, `()', is legal and matches an
414 | empty (sub)string.
415 | An empty string is not a legal RE.
416 | .PP
417 | A `{' followed by a digit is considered the beginning of bounds for a
418 | bounded repetition, which must then follow the syntax for bounds.
419 | A `{' \fInot\fR followed by a digit is considered an ordinary character.
420 | .PP
421 | `^' and `$' beginning and ending subexpressions in obsolete (``basic'')
422 | REs are anchors, not ordinary characters.
423 | .SH SEE ALSO
424 | grep(1), regex(7)
425 | .PP
426 | POSIX 1003.2, sections 2.8 (Regular Expression Notation)
427 | and
428 | B.5 (C Binding for Regular Expression Matching).
429 | .SH DIAGNOSTICS
430 | Non-zero error codes from
431 | .I regcomp
432 | and
433 | .I regexec
434 | include the following:
435 | .PP
436 | .nf
437 | .ta \w'REG_ECOLLATE'u+3n
438 | REG_NOMATCH	regexec() failed to match
439 | REG_BADPAT	invalid regular expression
440 | REG_ECOLLATE	invalid collating element
441 | REG_ECTYPE	invalid character class
442 | REG_EESCAPE	\e applied to unescapable character
443 | REG_ESUBREG	invalid backreference number
444 | REG_EBRACK	brackets [ ] not balanced
445 | REG_EPAREN	parentheses ( ) not balanced
446 | REG_EBRACE	braces { } not balanced
447 | REG_BADBR	invalid repetition count(s) in { }
448 | REG_ERANGE	invalid character range in [ ]
449 | REG_ESPACE	ran out of memory
450 | REG_BADRPT	?, *, or + operand invalid
451 | REG_EMPTY	empty (sub)expression
452 | REG_ASSERT	``can't happen''\(emyou found a bug
453 | REG_INVARG	invalid argument, e.g. negative-length string
454 | .fi
455 | .SH HISTORY
456 | Written by Henry Spencer,
457 | henry@zoo.toronto.edu.
458 | .SH BUGS
459 | This is an alpha release with known defects.
460 | Please report problems.
461 | .PP
462 | There is one known functionality bug.
463 | The implementation of internationalization is incomplete:
464 | the locale is always assumed to be the default one of 1003.2,
465 | and only the collating elements etc. of that locale are available.
466 | .PP
467 | The back-reference code is subtle and doubts linger about its correctness
468 | in complex cases.
469 | .PP
470 | .I Regexec
471 | performance is poor.
472 | This will improve with later releases.
473 | .I Nmatch
474 | exceeding 0 is expensive;
475 | .I nmatch
476 | exceeding 1 is worse.
477 | .I Regexec
478 | is largely insensitive to RE complexity \fIexcept\fR that back
479 | references are massively expensive.
480 | RE length does matter; in particular, there is a strong speed bonus
481 | for keeping RE length under about 30 characters,
482 | with most special characters counting roughly double.
483 | .PP
484 | .I Regcomp
485 | implements bounded repetitions by macro expansion,
486 | which is costly in time and space if counts are large
487 | or bounded repetitions are nested.
488 | An RE like, say,
489 | `((((a{1,100}){1,100}){1,100}){1,100}){1,100}'
490 | will (eventually) run almost any existing machine out of swap space.
491 | .PP
492 | There are suspected problems with response to obscure error conditions.
493 | Notably,
494 | certain kinds of internal overflow,
495 | produced only by truly enormous REs or by multiply nested bounded repetitions,
496 | are probably not handled well.
497 | .PP
498 | Due to a mistake in 1003.2, things like `a)b' are legal REs because `)' is
499 | a special character only in the presence of a previous unmatched `('.
500 | This can't be fixed until the spec is fixed.
501 | .PP
502 | The standard's definition of back references is vague.
503 | For example, does
504 | `a\e(\e(b\e)*\e2\e)*d' match `abbbd'?
505 | Until the standard is clarified,
506 | behavior in such cases should not be relied on.
507 | .PP
508 | The implementation of word-boundary matching is a bit of a kludge,
509 | and bugs may lurk in combinations of word-boundary matching and anchoring.
510 | 


--------------------------------------------------------------------------------
/ext/re/regex.7:
--------------------------------------------------------------------------------
  1 | .TH REGEX 7 "25 Oct 1995"
  2 | .BY "Henry Spencer"
  3 | .SH NAME
  4 | regex \- POSIX 1003.2 regular expressions
  5 | .SH DESCRIPTION
  6 | Regular expressions (``RE''s),
  7 | as defined in POSIX 1003.2, come in two forms:
  8 | modern REs (roughly those of
  9 | .IR egrep ;
 10 | 1003.2 calls these ``extended'' REs)
 11 | and obsolete REs (roughly those of
 12 | .IR ed ;
 13 | 1003.2 ``basic'' REs).
 14 | Obsolete REs mostly exist for backward compatibility in some old programs;
 15 | they will be discussed at the end.
 16 | 1003.2 leaves some aspects of RE syntax and semantics open;
 17 | `\(dg' marks decisions on these aspects that
 18 | may not be fully portable to other 1003.2 implementations.
 19 | .PP
 20 | A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR,
 21 | separated by `|'.
 22 | It matches anything that matches one of the branches.
 23 | .PP
 24 | A branch is one\(dg or more \fIpieces\fR, concatenated.
 25 | It matches a match for the first, followed by a match for the second, etc.
 26 | .PP
 27 | A piece is an \fIatom\fR possibly followed
 28 | by a single\(dg `*', `+', `?', or \fIbound\fR.
 29 | An atom followed by `*' matches a sequence of 0 or more matches of the atom.
 30 | An atom followed by `+' matches a sequence of 1 or more matches of the atom.
 31 | An atom followed by `?' matches a sequence of 0 or 1 matches of the atom.
 32 | .PP
 33 | A \fIbound\fR is `{' followed by an unsigned decimal integer,
 34 | possibly followed by `,'
 35 | possibly followed by another unsigned decimal integer,
 36 | always followed by `}'.
 37 | The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive,
 38 | and if there are two of them, the first may not exceed the second.
 39 | An atom followed by a bound containing one integer \fIi\fR
 40 | and no comma matches
 41 | a sequence of exactly \fIi\fR matches of the atom.
 42 | An atom followed by a bound
 43 | containing one integer \fIi\fR and a comma matches
 44 | a sequence of \fIi\fR or more matches of the atom.
 45 | An atom followed by a bound
 46 | containing two integers \fIi\fR and \fIj\fR matches
 47 | a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom.
 48 | .PP
 49 | An atom is a regular expression enclosed in `()' (matching a match for the
 50 | regular expression),
 51 | an empty set of `()' (matching the null string)\(dg,
 52 | a \fIbracket expression\fR (see below), `.'
 53 | (matching any single character), `^' (matching the null string at the
 54 | beginning of a line), `$' (matching the null string at the
 55 | end of a line), a `\e' followed by one of the characters
 56 | `^.[$()|*+?{\e'
 57 | (matching that character taken as an ordinary character),
 58 | a `\e' followed by any other character\(dg
 59 | (matching that character taken as an ordinary character,
 60 | as if the `\e' had not been present\(dg),
 61 | or a single character with no other significance (matching that character).
 62 | A `{' followed by a character other than a digit is an ordinary
 63 | character, not the beginning of a bound\(dg.
 64 | It is illegal to end an RE with `\e'.
 65 | .PP
 66 | A \fIbracket expression\fR is a list of characters enclosed in `[]'.
 67 | It normally matches any single character from the list (but see below).
 68 | If the list begins with `^',
 69 | it matches any single character
 70 | (but see below) \fInot\fR from the rest of the list.
 71 | If two characters in the list are separated by `\-', this is shorthand
 72 | for the full \fIrange\fR of characters between those two (inclusive) in the
 73 | collating sequence,
 74 | e.g. `[0\-9]' in ASCII matches any decimal digit.
 75 | It is illegal\(dg for two ranges to share an
 76 | endpoint, e.g. `a\-c\-e'.
 77 | Ranges are very collating-sequence-dependent,
 78 | and portable programs should avoid relying on them.
 79 | .PP
 80 | To include a literal `]' in the list, make it the first character
 81 | (following a possible `^').
 82 | To include a literal `\-', make it the first or last character,
 83 | or the second endpoint of a range.
 84 | To use a literal `\-' as the first endpoint of a range,
 85 | enclose it in `[.' and `.]' to make it a collating element (see below).
 86 | With the exception of these and some combinations using `[' (see next
 87 | paragraphs), all other special characters, including `\e', lose their
 88 | special significance within a bracket expression.
 89 | .PP
 90 | Within a bracket expression, a collating element (a character,
 91 | a multi-character sequence that collates as if it were a single character,
 92 | or a collating-sequence name for either)
 93 | enclosed in `[.' and `.]' stands for the
 94 | sequence of characters of that collating element.
 95 | The sequence is a single element of the bracket expression's list.
 96 | A bracket expression containing a multi-character collating element 
 97 | can thus match more than one character,
 98 | e.g. if the collating sequence includes a `ch' collating element,
 99 | then the RE `[[.ch.]]*c' matches the first five characters
100 | of `chchcc'.
101 | .PP
102 | Within a bracket expression, a collating element enclosed in `[=' and
103 | `=]' is an equivalence class, standing for the sequences of characters
104 | of all collating elements equivalent to that one, including itself.
105 | (If there are no other equivalent collating elements,
106 | the treatment is as if the enclosing delimiters were `[.' and `.]'.)
107 | For example, if o and \o'o^' are the members of an equivalence class,
108 | then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous.
109 | An equivalence class may not\(dg be an endpoint
110 | of a range.
111 | .PP
112 | Within a bracket expression, the name of a \fIcharacter class\fR enclosed
113 | in `[:' and `:]' stands for the list of all characters belonging to that
114 | class.
115 | Standard character class names are:
116 | .PP
117 | .RS
118 | .nf
119 | .ta 3c 6c 9c
120 | alnum	digit	punct
121 | alpha	graph	space
122 | blank	lower	upper
123 | cntrl	print	xdigit
124 | .fi
125 | .RE
126 | .PP
127 | These stand for the character classes defined in
128 | .IR ctype (3).
129 | A locale may provide others.
130 | A character class may not be used as an endpoint of a range.
131 | .PP
132 | There are two special cases\(dg of bracket expressions:
133 | the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at
134 | the beginning and end of a word respectively.
135 | A word is defined as a sequence of
136 | word characters
137 | which is neither preceded nor followed by
138 | word characters.
139 | A word character is an
140 | .I alnum
141 | character (as defined by
142 | .IR ctype (3))
143 | or an underscore.
144 | This is an extension,
145 | compatible with but not specified by POSIX 1003.2,
146 | and should be used with
147 | caution in software intended to be portable to other systems.
148 | .PP
149 | In the event that an RE could match more than one substring of a given
150 | string,
151 | the RE matches the one starting earliest in the string.
152 | If the RE could match more than one substring starting at that point,
153 | it matches the longest.
154 | Subexpressions also match the longest possible substrings, subject to
155 | the constraint that the whole match be as long as possible,
156 | with subexpressions starting earlier in the RE taking priority over
157 | ones starting later.
158 | Note that higher-level subexpressions thus take priority over
159 | their lower-level component subexpressions.
160 | .PP
161 | Match lengths are measured in characters, not collating elements.
162 | A null string is considered longer than no match at all.
163 | For example,
164 | `bb*' matches the three middle characters of `abbbc',
165 | `(wee|week)(knights|nights)' matches all ten characters of `weeknights',
166 | when `(.*).*' is matched against `abc' the parenthesized subexpression
167 | matches all three characters, and
168 | when `(a*)*' is matched against `bc' both the whole RE and the parenthesized
169 | subexpression match the null string.
170 | .PP
171 | If case-independent matching is specified,
172 | the effect is much as if all case distinctions had vanished from the
173 | alphabet.
174 | When an alphabetic that exists in multiple cases appears as an
175 | ordinary character outside a bracket expression, it is effectively
176 | transformed into a bracket expression containing both cases,
177 | e.g. `x' becomes `[xX]'.
178 | When it appears inside a bracket expression, all case counterparts
179 | of it are added to the bracket expression, so that (e.g.) `[x]'
180 | becomes `[xX]' and `[^x]' becomes `[^xX]'.
181 | .PP
182 | No particular limit is imposed on the length of REs\(dg.
183 | Programs intended to be portable should not employ REs longer
184 | than 256 bytes,
185 | as an implementation can refuse to accept such REs and remain
186 | POSIX-compliant.
187 | .PP
188 | Obsolete (``basic'') regular expressions differ in several respects.
189 | `|', `+', and `?' are ordinary characters and there is no equivalent
190 | for their functionality.
191 | The delimiters for bounds are `\e{' and `\e}',
192 | with `{' and `}' by themselves ordinary characters.
193 | The parentheses for nested subexpressions are `\e(' and `\e)',
194 | with `(' and `)' by themselves ordinary characters.
195 | `^' is an ordinary character except at the beginning of the
196 | RE or\(dg the beginning of a parenthesized subexpression,
197 | `$' is an ordinary character except at the end of the
198 | RE or\(dg the end of a parenthesized subexpression,
199 | and `*' is an ordinary character if it appears at the beginning of the
200 | RE or the beginning of a parenthesized subexpression
201 | (after a possible leading `^').
202 | Finally, there is one new type of atom, a \fIback reference\fR:
203 | `\e' followed by a non-zero decimal digit \fId\fR
204 | matches the same sequence of characters
205 | matched by the \fId\fRth parenthesized subexpression
206 | (numbering subexpressions by the positions of their opening parentheses,
207 | left to right),
208 | so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'.
209 | .SH SEE ALSO
210 | regex(3)
211 | .PP
212 | POSIX 1003.2, section 2.8 (Regular Expression Notation).
213 | .SH HISTORY
214 | Written by Henry Spencer, based on the 1003.2 spec.
215 | .SH BUGS
216 | Having two kinds of REs is a botch.
217 | .PP
218 | The current 1003.2 spec says that `)' is an ordinary character in
219 | the absence of an unmatched `(';
220 | this was an unintentional result of a wording error,
221 | and change is likely.
222 | Avoid relying on it.
223 | .PP
224 | Back references are a dreadful botch,
225 | posing major problems for efficient implementations.
226 | They are also somewhat vaguely defined
227 | (does
228 | `a\e(\e(b\e)*\e2\e)*d' match `abbbd'?).
229 | Avoid using them.
230 | .PP
231 | 1003.2's specification of case-independent matching is vague.
232 | The ``one case implies all cases'' definition given above
233 | is current consensus among implementors as to the right interpretation.
234 | .PP
235 | The syntax for word boundaries is incredibly ugly.
236 | 


--------------------------------------------------------------------------------
/ext/re/regex.h:
--------------------------------------------------------------------------------
 1 | #ifndef _REGEX_H_
 2 | #define   _REGEX_H_ /* never again */
 3 | /* ========= begin header generated by ././mkh ========= */
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | /* === regex2.h === */
 9 | typedef off_t regoff_t;
10 | typedef struct {
11 |      int re_magic;
12 |      size_t re_nsub;          /* number of parenthesized subexpressions */
13 |      const char *re_endp;     /* end pointer for REG_PEND */
14 |      struct re_guts *re_g;    /* none of your business :-) */
15 | } regex_t;
16 | typedef struct {
17 |      regoff_t rm_so;          /* start of match */
18 |      regoff_t rm_eo;          /* end of match */
19 | } regmatch_t;
20 | 
21 | 
22 | /* === regcomp.c === */
23 | extern int regcomp(regex_t *, const char *, int);
24 | #define   REG_BASIC 0000
25 | #define   REG_EXTENDED   0001
26 | #define   REG_ICASE 0002
27 | #define   REG_NOSUB 0004
28 | #define   REG_NEWLINE    0010
29 | #define   REG_NOSPEC     0020
30 | #define   REG_PEND  0040
31 | #define   REG_DUMP  0200
32 | 
33 | 
34 | /* === regerror.c === */
35 | #define   REG_OKAY  0
36 | #define   REG_NOMATCH     1
37 | #define   REG_BADPAT      2
38 | #define   REG_ECOLLATE    3
39 | #define   REG_ECTYPE      4
40 | #define   REG_EESCAPE     5
41 | #define   REG_ESUBREG     6
42 | #define   REG_EBRACK      7
43 | #define   REG_EPAREN      8
44 | #define   REG_EBRACE      9
45 | #define   REG_BADBR 10
46 | #define   REG_ERANGE     11
47 | #define   REG_ESPACE     12
48 | #define   REG_BADRPT     13
49 | #define   REG_EMPTY 14
50 | #define   REG_ASSERT     15
51 | #define   REG_INVARG     16
52 | #define   REG_ATOI  255  /* convert name to number (!) */
53 | #define   REG_ITOA  0400 /* convert number to name (!) */
54 | extern size_t regerror(int, const regex_t *, char *, size_t);
55 | 
56 | 
57 | /* === regexec.c === */
58 | extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int);
59 | #define   REG_NOTBOL     00001
60 | #define   REG_NOTEOL     00002
61 | #define   REG_STARTEND   00004
62 | #define   REG_TRACE 00400     /* tracing of execution */
63 | #define   REG_LARGE 01000     /* force large representation */
64 | #define   REG_BACKR 02000     /* force use of backref code */
65 | 
66 | 
67 | /* === regfree.c === */
68 | extern void regfree(regex_t *);
69 | 
70 | #ifdef __cplusplus
71 | }
72 | #endif
73 | /* ========= end header generated by ././mkh ========= */
74 | #endif
75 | 


--------------------------------------------------------------------------------
/ext/re/regex2.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * First, the stuff that ends up in the outside-world include file
  3 |  = typedef off_t regoff_t;
  4 |  = typedef struct {
  5 |  = 	int re_magic;
  6 |  = 	size_t re_nsub;		// number of parenthesized subexpressions
  7 |  = 	const char *re_endp;	// end pointer for REG_PEND
  8 |  = 	struct re_guts *re_g;	// none of your business :-)
  9 |  = } regex_t;
 10 |  = typedef struct {
 11 |  = 	regoff_t rm_so;		// start of match
 12 |  = 	regoff_t rm_eo;		// end of match
 13 |  = } regmatch_t;
 14 |  */
 15 | /*
 16 |  * internals of regex_t
 17 |  */
 18 | #define	MAGIC1	((('r'^0200)<<8) | 'e')
 19 | 
 20 | /*
 21 |  * The internal representation is a *strip*, a sequence of
 22 |  * operators ending with an endmarker.  (Some terminology etc. is a
 23 |  * historical relic of earlier versions which used multiple strips.)
 24 |  * Certain oddities in the representation are there to permit running
 25 |  * the machinery backwards; in particular, any deviation from sequential
 26 |  * flow must be marked at both its source and its destination.  Some
 27 |  * fine points:
 28 |  *
 29 |  * - OPLUS_ and O_PLUS are *inside* the loop they create.
 30 |  * - OQUEST_ and O_QUEST are *outside* the bypass they create.
 31 |  * - OCH_ and O_CH are *outside* the multi-way branch they create, while
 32 |  *   OOR1 and OOR2 are respectively the end and the beginning of one of
 33 |  *   the branches.  Note that there is an implicit OOR2 following OCH_
 34 |  *   and an implicit OOR1 preceding O_CH.
 35 |  *
 36 |  * In state representations, an operator's bit is on to signify a state
 37 |  * immediately *preceding* "execution" of that operator.
 38 |  */
 39 | typedef long sop;		/* strip operator */
 40 | typedef long sopno;
 41 | #define	OPRMASK	0x7c000000
 42 | #define	OPDMASK	0x03ffffff
 43 | #define	OPSHIFT	(26)
 44 | #define	OP(n)	((n)&OPRMASK)
 45 | #define	OPND(n)	((n)&OPDMASK)
 46 | #define	SOP(op, opnd)	((op)|(opnd))
 47 | /* operators			   meaning	operand			*/
 48 | /*						(back, fwd are offsets)	*/
 49 | #define	OEND	(1<<OPSHIFT)	/* endmarker	-			*/
 50 | #define	OCHAR	(2<<OPSHIFT)	/* character	unsigned char		*/
 51 | #define	OBOL	(3<<OPSHIFT)	/* left anchor	-			*/
 52 | #define	OEOL	(4<<OPSHIFT)	/* right anchor	-			*/
 53 | #define	OANY	(5<<OPSHIFT)	/* .		-			*/
 54 | #define	OANYOF	(6<<OPSHIFT)	/* [...]	set number		*/
 55 | #define	OBACK_	(7<<OPSHIFT)	/* begin \d	paren number		*/
 56 | #define	O_BACK	(8<<OPSHIFT)	/* end \d	paren number		*/
 57 | #define	OPLUS_	(9<<OPSHIFT)	/* + prefix	fwd to suffix		*/
 58 | #define	O_PLUS	(10<<OPSHIFT)	/* + suffix	back to prefix		*/
 59 | #define	OQUEST_	(11<<OPSHIFT)	/* ? prefix	fwd to suffix		*/
 60 | #define	O_QUEST	(12<<OPSHIFT)	/* ? suffix	back to prefix		*/
 61 | #define	OLPAREN	(13<<OPSHIFT)	/* (		fwd to )		*/
 62 | #define	ORPAREN	(14<<OPSHIFT)	/* )		back to (		*/
 63 | #define	OCH_	(15<<OPSHIFT)	/* begin choice	fwd to OOR2		*/
 64 | #define	OOR1	(16<<OPSHIFT)	/* | pt. 1	back to OOR1 or OCH_	*/
 65 | #define	OOR2	(17<<OPSHIFT)	/* | pt. 2	fwd to OOR2 or O_CH	*/
 66 | #define	O_CH	(18<<OPSHIFT)	/* end choice	back to OOR1		*/
 67 | #define	OBOW	(19<<OPSHIFT)	/* begin word	-			*/
 68 | #define	OEOW	(20<<OPSHIFT)	/* end word	-			*/
 69 | 
 70 | /*
 71 |  * Structure for [] character-set representation.  Character sets are
 72 |  * done as bit vectors, grouped 8 to a byte vector for compactness.
 73 |  * The individual set therefore has both a pointer to the byte vector
 74 |  * and a mask to pick out the relevant bit of each byte.  A hash code
 75 |  * simplifies testing whether two sets could be identical.
 76 |  *
 77 |  * This will get trickier for multicharacter collating elements.  As
 78 |  * preliminary hooks for dealing with such things, we also carry along
 79 |  * a string of multi-character elements, and decide the size of the
 80 |  * vectors at run time.
 81 |  */
 82 | typedef struct {
 83 | 	uch *ptr;		/* -> uch [csetsize] */
 84 | 	uch mask;		/* bit within array */
 85 | 	uch hash;		/* hash code */
 86 | 	size_t smultis;
 87 | 	char *multis;		/* -> char[smulti]  ab\0cd\0ef\0\0 */
 88 | } cset;
 89 | /* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
 90 | #define	CHadd(cs, c)	((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
 91 | #define	CHsub(cs, c)	((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
 92 | #define	CHIN(cs, c)	((cs)->ptr[(uch)(c)] & (cs)->mask)
 93 | #define	MCadd(p, cs, cp)	mcadd(p, cs, cp)	/* regcomp() internal fns */
 94 | #define	MCsub(p, cs, cp)	mcsub(p, cs, cp)
 95 | #define	MCin(p, cs, cp)	mcin(p, cs, cp)
 96 | 
 97 | /* stuff for character categories */
 98 | typedef unsigned char cat_t;
 99 | 
100 | /*
101 |  * main compiled-expression structure
102 |  */
103 | struct re_guts {
104 | 	int magic;
105 | #		define	MAGIC2	((('R'^0200)<<8)|'E')
106 | 	sop *strip;		/* malloced area for strip */
107 | 	int csetsize;		/* number of bits in a cset vector */
108 | 	int ncsets;		/* number of csets in use */
109 | 	cset *sets;		/* -> cset [ncsets] */
110 | 	uch *setbits;		/* -> uch[csetsize][ncsets/CHAR_BIT] */
111 | 	int cflags;		/* copy of regcomp() cflags argument */
112 | 	sopno nstates;		/* = number of sops */
113 | 	sopno firststate;	/* the initial OEND (normally 0) */
114 | 	sopno laststate;	/* the final OEND */
115 | 	int iflags;		/* internal flags */
116 | #		define	USEBOL	01	/* used ^ */
117 | #		define	USEEOL	02	/* used $ */
118 | #		define	BAD	04	/* something wrong */
119 | 	int nbol;		/* number of ^ used */
120 | 	int neol;		/* number of $ used */
121 | 	int ncategories;	/* how many character categories */
122 | 	cat_t *categories;	/* ->catspace[-CHAR_MIN] */
123 | 	char *must;		/* match must contain this string */
124 | 	int mlen;		/* length of must */
125 | 	size_t nsub;		/* copy of re_nsub */
126 | 	int backrefs;		/* does it use back references? */
127 | 	sopno nplus;		/* how deep does it nest +s? */
128 | 	/* catspace must be last */
129 | 	cat_t catspace[1];	/* actually [NC] */
130 | };
131 | 
132 | /* misc utilities */
133 | #define	OUT	(CHAR_MAX+1)	/* a non-character value */
134 | #define	ISWORD(c)	(isalnum(c) || (c) == '_')
135 | 


--------------------------------------------------------------------------------
/ext/re/regexec.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * the outer shell of regexec()
  3 |  *
  4 |  * This file includes engine.c *twice*, after muchos fiddling with the
  5 |  * macros that code uses.  This lets the same code operate on two different
  6 |  * representations for state sets.
  7 |  */
  8 | #include <sys/types.h>
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #include <string.h>
 12 | #include <limits.h>
 13 | #include <ctype.h>
 14 | #include "regex.h"
 15 | 
 16 | #include "utils.h"
 17 | #include "regex2.h"
 18 | 
 19 | static int nope = 0;		/* for use in asserts; shuts lint up */
 20 | 
 21 | /* macros for manipulating states, small version */
 22 | #define	states	unsigned
 23 | #define	states1	unsigned	/* for later use in regexec() decision */
 24 | #define	CLEAR(v)	((v) = 0)
 25 | #define	SET0(v, n)	((v) &= ~((unsigned)1 << (n)))
 26 | #define	SET1(v, n)	((v) |= (unsigned)1 << (n))
 27 | #define	ISSET(v, n)	((v) & ((unsigned)1 << (n)))
 28 | #define	ASSIGN(d, s)	((d) = (s))
 29 | #define	EQ(a, b)	((a) == (b))
 30 | #define	STATEVARS	int dummy	/* dummy version */
 31 | #define	STATESETUP(m, n)	/* nothing */
 32 | #define	STATETEARDOWN(m)	/* nothing */
 33 | #define	SETUP(v)	((v) = 0)
 34 | #define	onestate	unsigned
 35 | #define	INIT(o, n)	((o) = (unsigned)1 << (n))
 36 | #define	INC(o)	((o) <<= 1)
 37 | #define	ISSTATEIN(v, o)	((v) & (o))
 38 | /* some abbreviations; note that some of these know variable names! */
 39 | /* do "if I'm here, I can also be there" etc without branches */
 40 | #define	FWD(dst, src, n)	((dst) |= ((unsigned)(src)&(here)) << (n))
 41 | #define	BACK(dst, src, n)	((dst) |= ((unsigned)(src)&(here)) >> (n))
 42 | #define	ISSETBACK(v, n)	((v) & ((unsigned)here >> (n)))
 43 | /* function names */
 44 | #define SNAMES			/* engine.c looks after details */
 45 | 
 46 | #include "engine.c"
 47 | 
 48 | /* now undo things */
 49 | #undef	states
 50 | #undef	CLEAR
 51 | #undef	SET0
 52 | #undef	SET1
 53 | #undef	ISSET
 54 | #undef	ASSIGN
 55 | #undef	EQ
 56 | #undef	STATEVARS
 57 | #undef	STATESETUP
 58 | #undef	STATETEARDOWN
 59 | #undef	SETUP
 60 | #undef	onestate
 61 | #undef	INIT
 62 | #undef	INC
 63 | #undef	ISSTATEIN
 64 | #undef	FWD
 65 | #undef	BACK
 66 | #undef	ISSETBACK
 67 | #undef	SNAMES
 68 | 
 69 | /* macros for manipulating states, large version */
 70 | #define	states	char *
 71 | #define	CLEAR(v)	memset(v, 0, m->g->nstates)
 72 | #define	SET0(v, n)	((v)[n] = 0)
 73 | #define	SET1(v, n)	((v)[n] = 1)
 74 | #define	ISSET(v, n)	((v)[n])
 75 | #define	ASSIGN(d, s)	memcpy(d, s, m->g->nstates)
 76 | #define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
 77 | #define	STATEVARS	int vn; char *space
 78 | #define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
 79 | 				if ((m)->space == NULL) return(REG_ESPACE); \
 80 | 				(m)->vn = 0; }
 81 | #define	STATETEARDOWN(m)	{ free((m)->space); }
 82 | #define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
 83 | #define	onestate	int
 84 | #define	INIT(o, n)	((o) = (n))
 85 | #define	INC(o)	((o)++)
 86 | #define	ISSTATEIN(v, o)	((v)[o])
 87 | /* some abbreviations; note that some of these know variable names! */
 88 | /* do "if I'm here, I can also be there" etc without branches */
 89 | #define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
 90 | #define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
 91 | #define	ISSETBACK(v, n)	((v)[here - (n)])
 92 | /* function names */
 93 | #define	LNAMES			/* flag */
 94 | 
 95 | #include "engine.c"
 96 | 
 97 | /*
 98 |  - regexec - interface for matching
 99 |  = extern int regexec(const regex_t *, const char *, size_t, \
100 |  =					regmatch_t [], int);
101 |  = #define	REG_NOTBOL	00001
102 |  = #define	REG_NOTEOL	00002
103 |  = #define	REG_STARTEND	00004
104 |  = #define	REG_TRACE	00400	// tracing of execution
105 |  = #define	REG_LARGE	01000	// force large representation
106 |  = #define	REG_BACKR	02000	// force use of backref code
107 |  *
108 |  * We put this here so we can exploit knowledge of the state representation
109 |  * when choosing which matcher to call.  Also, by this point the matchers
110 |  * have been prototyped.
111 |  */
112 | int				/* 0 success, REG_NOMATCH failure */
113 | regexec(preg, string, nmatch, pmatch, eflags)
114 | const regex_t *preg;
115 | const char *string;
116 | size_t nmatch;
117 | regmatch_t pmatch[];
118 | int eflags;
119 | {
120 | 	register struct re_guts *g = preg->re_g;
121 | #ifdef REDEBUG
122 | #	define	GOODFLAGS(f)	(f)
123 | #else
124 | #	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
125 | #endif
126 | 
127 | 	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
128 | 		return(REG_BADPAT);
129 | 	assert(!(g->iflags&BAD));
130 | 	if (g->iflags&BAD)		/* backstop for no-debug case */
131 | 		return(REG_BADPAT);
132 | 	eflags = GOODFLAGS(eflags);
133 | 
134 | 	if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
135 | 		return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
136 | 	else
137 | 		return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
138 | }
139 | 


--------------------------------------------------------------------------------
/ext/re/regfree.c:
--------------------------------------------------------------------------------
 1 | #include <sys/types.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include "regex.h"
 5 | 
 6 | #include "utils.h"
 7 | #include "regex2.h"
 8 | 
 9 | /*
10 |  - regfree - free everything
11 |  = extern void regfree(regex_t *);
12 |  */
13 | void
14 | regfree(preg)
15 | regex_t *preg;
16 | {
17 | 	register struct re_guts *g;
18 | 
19 | 	if (preg->re_magic != MAGIC1)	/* oops */
20 | 		return;			/* nice to complain, but hard */
21 | 
22 | 	g = preg->re_g;
23 | 	if (g == NULL || g->magic != MAGIC2)	/* oops again */
24 | 		return;
25 | 	preg->re_magic = 0;		/* mark it invalid */
26 | 	g->magic = 0;			/* mark it invalid */
27 | 
28 | 	if (g->strip != NULL)
29 | 		free((char *)g->strip);
30 | 	if (g->sets != NULL)
31 | 		free((char *)g->sets);
32 | 	if (g->setbits != NULL)
33 | 		free((char *)g->setbits);
34 | 	if (g->must != NULL)
35 | 		free(g->must);
36 | 	free((char *)g);
37 | }
38 | 


--------------------------------------------------------------------------------
/ext/re/split.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | 
  4 | /*
  5 |  - split - divide a string into fields, like awk split()
  6 |  = int split(char *string, char *fields[], int nfields, char *sep);
  7 |  */
  8 | int				/* number of fields, including overflow */
  9 | split(string, fields, nfields, sep)
 10 | char *string;
 11 | char *fields[];			/* list is not NULL-terminated */
 12 | int nfields;			/* number of entries available in fields[] */
 13 | char *sep;			/* "" white, "c" single char, "ab" [ab]+ */
 14 | {
 15 | 	register char *p = string;
 16 | 	register char c;			/* latest character */
 17 | 	register char sepc = sep[0];
 18 | 	register char sepc2;
 19 | 	register int fn;
 20 | 	register char **fp = fields;
 21 | 	register char *sepp;
 22 | 	register int trimtrail;
 23 | 
 24 | 	/* white space */
 25 | 	if (sepc == '\0') {
 26 | 		while ((c = *p++) == ' ' || c == '\t')
 27 | 			continue;
 28 | 		p--;
 29 | 		trimtrail = 1;
 30 | 		sep = " \t";	/* note, code below knows this is 2 long */
 31 | 		sepc = ' ';
 32 | 	} else
 33 | 		trimtrail = 0;
 34 | 	sepc2 = sep[1];		/* now we can safely pick this up */
 35 | 
 36 | 	/* catch empties */
 37 | 	if (*p == '\0')
 38 | 		return(0);
 39 | 
 40 | 	/* single separator */
 41 | 	if (sepc2 == '\0') {
 42 | 		fn = nfields;
 43 | 		for (;;) {
 44 | 			*fp++ = p;
 45 | 			fn--;
 46 | 			if (fn == 0)
 47 | 				break;
 48 | 			while ((c = *p++) != sepc)
 49 | 				if (c == '\0')
 50 | 					return(nfields - fn);
 51 | 			*(p-1) = '\0';
 52 | 		}
 53 | 		/* we have overflowed the fields vector -- just count them */
 54 | 		fn = nfields;
 55 | 		for (;;) {
 56 | 			while ((c = *p++) != sepc)
 57 | 				if (c == '\0')
 58 | 					return(fn);
 59 | 			fn++;
 60 | 		}
 61 | 		/* not reached */
 62 | 	}
 63 | 
 64 | 	/* two separators */
 65 | 	if (sep[2] == '\0') {
 66 | 		fn = nfields;
 67 | 		for (;;) {
 68 | 			*fp++ = p;
 69 | 			fn--;
 70 | 			while ((c = *p++) != sepc && c != sepc2)
 71 | 				if (c == '\0') {
 72 | 					if (trimtrail && **(fp-1) == '\0')
 73 | 						fn++;
 74 | 					return(nfields - fn);
 75 | 				}
 76 | 			if (fn == 0)
 77 | 				break;
 78 | 			*(p-1) = '\0';
 79 | 			while ((c = *p++) == sepc || c == sepc2)
 80 | 				continue;
 81 | 			p--;
 82 | 		}
 83 | 		/* we have overflowed the fields vector -- just count them */
 84 | 		fn = nfields;
 85 | 		while (c != '\0') {
 86 | 			while ((c = *p++) == sepc || c == sepc2)
 87 | 				continue;
 88 | 			p--;
 89 | 			fn++;
 90 | 			while ((c = *p++) != '\0' && c != sepc && c != sepc2)
 91 | 				continue;
 92 | 		}
 93 | 		/* might have to trim trailing white space */
 94 | 		if (trimtrail) {
 95 | 			p--;
 96 | 			while ((c = *--p) == sepc || c == sepc2)
 97 | 				continue;
 98 | 			p++;
 99 | 			if (*p != '\0') {
100 | 				if (fn == nfields+1)
101 | 					*p = '\0';
102 | 				fn--;
103 | 			}
104 | 		}
105 | 		return(fn);
106 | 	}
107 | 
108 | 	/* n separators */
109 | 	fn = 0;
110 | 	for (;;) {
111 | 		if (fn < nfields)
112 | 			*fp++ = p;
113 | 		fn++;
114 | 		for (;;) {
115 | 			c = *p++;
116 | 			if (c == '\0')
117 | 				return(fn);
118 | 			sepp = sep;
119 | 			while ((sepc = *sepp++) != '\0' && sepc != c)
120 | 				continue;
121 | 			if (sepc != '\0')	/* it was a separator */
122 | 				break;
123 | 		}
124 | 		if (fn < nfields)
125 | 			*(p-1) = '\0';
126 | 		for (;;) {
127 | 			c = *p++;
128 | 			sepp = sep;
129 | 			while ((sepc = *sepp++) != '\0' && sepc != c)
130 | 				continue;
131 | 			if (sepc == '\0')	/* it wasn't a separator */
132 | 				break;
133 | 		}
134 | 		p--;
135 | 	}
136 | 
137 | 	/* not reached */
138 | }
139 | 
140 | #ifdef TEST_SPLIT
141 | 
142 | 
143 | /*
144 |  * test program
145 |  * pgm		runs regression
146 |  * pgm sep	splits stdin lines by sep
147 |  * pgm str sep	splits str by sep
148 |  * pgm str sep n	splits str by sep n times
149 |  */
150 | int
151 | main(argc, argv)
152 | int argc;
153 | char *argv[];
154 | {
155 | 	char buf[512];
156 | 	register int n;
157 | #	define	MNF	10
158 | 	char *fields[MNF];
159 | 
160 | 	if (argc > 4)
161 | 		for (n = atoi(argv[3]); n > 0; n--) {
162 | 			(void) strcpy(buf, argv[1]);
163 | 		}
164 | 	else if (argc > 3)
165 | 		for (n = atoi(argv[3]); n > 0; n--) {
166 | 			(void) strcpy(buf, argv[1]);
167 | 			(void) split(buf, fields, MNF, argv[2]);
168 | 		}
169 | 	else if (argc > 2)
170 | 		dosplit(argv[1], argv[2]);
171 | 	else if (argc > 1)
172 | 		while (fgets(buf, sizeof(buf), stdin) != NULL) {
173 | 			buf[strlen(buf)-1] = '\0';	/* stomp newline */
174 | 			dosplit(buf, argv[1]);
175 | 		}
176 | 	else
177 | 		regress();
178 | 
179 | 	exit(0);
180 | }
181 | 
182 | dosplit(string, seps)
183 | char *string;
184 | char *seps;
185 | {
186 | #	define	NF	5
187 | 	char *fields[NF];
188 | 	register int nf;
189 | 
190 | 	nf = split(string, fields, NF, seps);
191 | 	print(nf, NF, fields);
192 | }
193 | 
194 | print(nf, nfp, fields)
195 | int nf;
196 | int nfp;
197 | char *fields[];
198 | {
199 | 	register int fn;
200 | 	register int bound;
201 | 
202 | 	bound = (nf > nfp) ? nfp : nf;
203 | 	printf("%d:\t", nf);
204 | 	for (fn = 0; fn < bound; fn++)
205 | 		printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
206 | }
207 | 
208 | #define	RNF	5		/* some table entries know this */
209 | struct {
210 | 	char *str;
211 | 	char *seps;
212 | 	int nf;
213 | 	char *fi[RNF];
214 | } tests[] = {
215 | 	"",		" ",	0,	{ "" },
216 | 	" ",		" ",	2,	{ "", "" },
217 | 	"x",		" ",	1,	{ "x" },
218 | 	"xy",		" ",	1,	{ "xy" },
219 | 	"x y",		" ",	2,	{ "x", "y" },
220 | 	"abc def  g ",	" ",	5,	{ "abc", "def", "", "g", "" },
221 | 	"  a bcd",	" ",	4,	{ "", "", "a", "bcd" },
222 | 	"a b c d e f",	" ",	6,	{ "a", "b", "c", "d", "e f" },
223 | 	" a b c d ",	" ",	6,	{ "", "a", "b", "c", "d " },
224 | 
225 | 	"",		" _",	0,	{ "" },
226 | 	" ",		" _",	2,	{ "", "" },
227 | 	"x",		" _",	1,	{ "x" },
228 | 	"x y",		" _",	2,	{ "x", "y" },
229 | 	"ab _ cd",	" _",	2,	{ "ab", "cd" },
230 | 	" a_b  c ",	" _",	5,	{ "", "a", "b", "c", "" },
231 | 	"a b c_d e f",	" _",	6,	{ "a", "b", "c", "d", "e f" },
232 | 	" a b c d ",	" _",	6,	{ "", "a", "b", "c", "d " },
233 | 
234 | 	"",		" _~",	0,	{ "" },
235 | 	" ",		" _~",	2,	{ "", "" },
236 | 	"x",		" _~",	1,	{ "x" },
237 | 	"x y",		" _~",	2,	{ "x", "y" },
238 | 	"ab _~ cd",	" _~",	2,	{ "ab", "cd" },
239 | 	" a_b  c~",	" _~",	5,	{ "", "a", "b", "c", "" },
240 | 	"a b_c d~e f",	" _~",	6,	{ "a", "b", "c", "d", "e f" },
241 | 	"~a b c d ",	" _~",	6,	{ "", "a", "b", "c", "d " },
242 | 
243 | 	"",		" _~-",	0,	{ "" },
244 | 	" ",		" _~-",	2,	{ "", "" },
245 | 	"x",		" _~-",	1,	{ "x" },
246 | 	"x y",		" _~-",	2,	{ "x", "y" },
247 | 	"ab _~- cd",	" _~-",	2,	{ "ab", "cd" },
248 | 	" a_b  c~",	" _~-",	5,	{ "", "a", "b", "c", "" },
249 | 	"a b_c-d~e f",	" _~-",	6,	{ "a", "b", "c", "d", "e f" },
250 | 	"~a-b c d ",	" _~-",	6,	{ "", "a", "b", "c", "d " },
251 | 
252 | 	"",		"  ",	0,	{ "" },
253 | 	" ",		"  ",	2,	{ "", "" },
254 | 	"x",		"  ",	1,	{ "x" },
255 | 	"xy",		"  ",	1,	{ "xy" },
256 | 	"x y",		"  ",	2,	{ "x", "y" },
257 | 	"abc def  g ",	"  ",	4,	{ "abc", "def", "g", "" },
258 | 	"  a bcd",	"  ",	3,	{ "", "a", "bcd" },
259 | 	"a b c d e f",	"  ",	6,	{ "a", "b", "c", "d", "e f" },
260 | 	" a b c d ",	"  ",	6,	{ "", "a", "b", "c", "d " },
261 | 
262 | 	"",		"",	0,	{ "" },
263 | 	" ",		"",	0,	{ "" },
264 | 	"x",		"",	1,	{ "x" },
265 | 	"xy",		"",	1,	{ "xy" },
266 | 	"x y",		"",	2,	{ "x", "y" },
267 | 	"abc def  g ",	"",	3,	{ "abc", "def", "g" },
268 | 	"\t a bcd",	"",	2,	{ "a", "bcd" },
269 | 	"  a \tb\t c ",	"",	3,	{ "a", "b", "c" },
270 | 	"a b c d e ",	"",	5,	{ "a", "b", "c", "d", "e" },
271 | 	"a b\tc d e f",	"",	6,	{ "a", "b", "c", "d", "e f" },
272 | 	" a b c d e f ",	"",	6,	{ "a", "b", "c", "d", "e f " },
273 | 
274 | 	NULL,		NULL,	0,	{ NULL },
275 | };
276 | 
277 | regress()
278 | {
279 | 	char buf[512];
280 | 	register int n;
281 | 	char *fields[RNF+1];
282 | 	register int nf;
283 | 	register int i;
284 | 	register int printit;
285 | 	register char *f;
286 | 
287 | 	for (n = 0; tests[n].str != NULL; n++) {
288 | 		(void) strcpy(buf, tests[n].str);
289 | 		fields[RNF] = NULL;
290 | 		nf = split(buf, fields, RNF, tests[n].seps);
291 | 		printit = 0;
292 | 		if (nf != tests[n].nf) {
293 | 			printf("split `%s' by `%s' gave %d fields, not %d\n",
294 | 				tests[n].str, tests[n].seps, nf, tests[n].nf);
295 | 			printit = 1;
296 | 		} else if (fields[RNF] != NULL) {
297 | 			printf("split() went beyond array end\n");
298 | 			printit = 1;
299 | 		} else {
300 | 			for (i = 0; i < nf && i < RNF; i++) {
301 | 				f = fields[i];
302 | 				if (f == NULL)
303 | 					f = "(NULL)";
304 | 				if (strcmp(f, tests[n].fi[i]) != 0) {
305 | 					printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
306 | 						tests[n].str, tests[n].seps,
307 | 						i, fields[i], tests[n].fi[i]);
308 | 					printit = 1;
309 | 				}
310 | 			}
311 | 		}
312 | 		if (printit)
313 | 			print(nf, RNF, fields);
314 | 	}
315 | }
316 | #endif
317 | 


--------------------------------------------------------------------------------
/ext/re/tests:
--------------------------------------------------------------------------------
  1 | # regular expression test set
  2 | # Lines are at least three fields, separated by one or more tabs.  "" stands
  3 | # for an empty field.  First field is an RE.  Second field is flags.  If
  4 | # C flag given, regcomp() is expected to fail, and the third field is the
  5 | # error name (minus the leading REG_).
  6 | #
  7 | # Otherwise it is expected to succeed, and the third field is the string to
  8 | # try matching it against.  If there is no fourth field, the match is
  9 | # expected to fail.  If there is a fourth field, it is the substring that
 10 | # the RE is expected to match.  If there is a fifth field, it is a comma-
 11 | # separated list of what the subexpressions should match, with - indicating
 12 | # no match for that one.  In both the fourth and fifth fields, a (sub)field
 13 | # starting with @ indicates that the (sub)expression is expected to match
 14 | # a null string followed by the stuff after the @; this provides a way to
 15 | # test where null strings match.  The character `N' in REs and strings
 16 | # is newline, `S' is space, `T' is tab, `Z' is NUL.
 17 | #
 18 | # The full list of flags:
 19 | #	-	placeholder, does nothing
 20 | #	b	RE is a BRE, not an ERE
 21 | #	&	try it as both an ERE and a BRE
 22 | #	C	regcomp() error expected, third field is error name
 23 | #	i	REG_ICASE
 24 | #	m	("mundane") REG_NOSPEC
 25 | #	s	REG_NOSUB (not really testable)
 26 | #	n	REG_NEWLINE
 27 | #	^	REG_NOTBOL
 28 | #	$	REG_NOTEOL
 29 | #	#	REG_STARTEND (see below)
 30 | #	p	REG_PEND
 31 | #
 32 | # For REG_STARTEND, the start/end offsets are those of the substring
 33 | # enclosed in ().
 34 | 
 35 | # basics
 36 | a		&	a	a
 37 | abc		&	abc	abc
 38 | abc|de		-	abc	abc
 39 | a|b|c		-	abc	a
 40 | 
 41 | # parentheses and perversions thereof
 42 | a(b)c		-	abc	abc
 43 | a\(b\)c		b	abc	abc
 44 | a(		C	EPAREN
 45 | a(		b	a(	a(
 46 | a\(		-	a(	a(
 47 | a\(		bC	EPAREN
 48 | a\(b		bC	EPAREN
 49 | a(b		C	EPAREN
 50 | a(b		b	a(b	a(b
 51 | # gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
 52 | a)		-	a)	a)
 53 | )		-	)	)
 54 | # end gagging (in a just world, those *should* give EPAREN)
 55 | a)		b	a)	a)
 56 | a\)		bC	EPAREN
 57 | \)		bC	EPAREN
 58 | a()b		-	ab	ab
 59 | a\(\)b		b	ab	ab
 60 | 
 61 | # anchoring and REG_NEWLINE
 62 | ^abc$		&	abc	abc
 63 | a^b		-	a^b
 64 | a^b		b	a^b	a^b
 65 | a$b		-	a$b
 66 | a$b		b	a$b	a$b
 67 | ^		&	abc	@abc
 68 | $		&	abc	@
 69 | ^$		&	""	@
 70 | $^		-	""	@
 71 | \($\)\(^\)	b	""	@
 72 | # stop retching, those are legitimate (although disgusting)
 73 | ^^		-	""	@
 74 | $$		-	""	@
 75 | b$		&	abNc
 76 | b$		&n	abNc	b
 77 | ^b$		&	aNbNc
 78 | ^b$		&n	aNbNc	b
 79 | ^$		&n	aNNb	@Nb
 80 | ^$		n	abc
 81 | ^$		n	abcN	@
 82 | $^		n	aNNb	@Nb
 83 | \($\)\(^\)	bn	aNNb	@Nb
 84 | ^^		n^	aNNb	@Nb
 85 | $$		n	aNNb	@NN
 86 | ^a		^	a
 87 | a$		$	a
 88 | ^a		^n	aNb
 89 | ^b		^n	aNb	b
 90 | a$		$n	bNa
 91 | b$		$n	bNa	b
 92 | a*(^b$)c*	-	b	b
 93 | a*\(^b$\)c*	b	b	b
 94 | 
 95 | # certain syntax errors and non-errors
 96 | |		C	EMPTY
 97 | |		b	|	|
 98 | *		C	BADRPT
 99 | *		b	*	*
100 | +		C	BADRPT
101 | ?		C	BADRPT
102 | ""		&C	EMPTY
103 | ()		-	abc	@abc
104 | \(\)		b	abc	@abc
105 | a||b		C	EMPTY
106 | |ab		C	EMPTY
107 | ab|		C	EMPTY
108 | (|a)b		C	EMPTY
109 | (a|)b		C	EMPTY
110 | (*a)		C	BADRPT
111 | (+a)		C	BADRPT
112 | (?a)		C	BADRPT
113 | ({1}a)		C	BADRPT
114 | \(\{1\}a\)	bC	BADRPT
115 | (a|*b)		C	BADRPT
116 | (a|+b)		C	BADRPT
117 | (a|?b)		C	BADRPT
118 | (a|{1}b)	C	BADRPT
119 | ^*		C	BADRPT
120 | ^*		b	*	*
121 | ^+		C	BADRPT
122 | ^?		C	BADRPT
123 | ^{1}		C	BADRPT
124 | ^\{1\}		bC	BADRPT
125 | 
126 | # metacharacters, backslashes
127 | a.c		&	abc	abc
128 | a[bc]d		&	abd	abd
129 | a\*c		&	a*c	a*c
130 | a\\b		&	a\b	a\b
131 | a\\\*b		&	a\*b	a\*b
132 | a\bc		&	abc	abc
133 | a\		&C	EESCAPE
134 | a\\bc		&	a\bc	a\bc
135 | \{		bC	BADRPT
136 | a\[b		&	a[b	a[b
137 | a[b		&C	EBRACK
138 | # trailing $ is a peculiar special case for the BRE code
139 | a$		&	a	a
140 | a$		&	a$
141 | a\$		&	a
142 | a\$		&	a$	a$
143 | a\\$		&	a
144 | a\\$		&	a$
145 | a\\$		&	a\$
146 | a\\$		&	a\	a\
147 | 
148 | # back references, ugh
149 | a\(b\)\2c	bC	ESUBREG
150 | a\(b\1\)c	bC	ESUBREG
151 | a\(b*\)c\1d	b	abbcbbd	abbcbbd	bb
152 | a\(b*\)c\1d	b	abbcbd
153 | a\(b*\)c\1d	b	abbcbbbd
154 | ^\(.\)\1	b	abc
155 | a\([bc]\)\1d	b	abcdabbd	abbd	b
156 | a\(\([bc]\)\2\)*d	b	abbccd	abbccd
157 | a\(\([bc]\)\2\)*d	b	abbcbd
158 | # actually, this next one probably ought to fail, but the spec is unclear
159 | a\(\(b\)*\2\)*d		b	abbbd	abbbd
160 | # here is a case that no NFA implementation does right
161 | \(ab*\)[ab]*\1	b	ababaaa	ababaaa	a
162 | # check out normal matching in the presence of back refs
163 | \(a\)\1bcd	b	aabcd	aabcd
164 | \(a\)\1bc*d	b	aabcd	aabcd
165 | \(a\)\1bc*d	b	aabd	aabd
166 | \(a\)\1bc*d	b	aabcccd	aabcccd
167 | \(a\)\1bc*[ce]d	b	aabcccd	aabcccd
168 | ^\(a\)\1b\(c\)*cd$	b	aabcccd	aabcccd
169 | 
170 | # ordinary repetitions
171 | ab*c		&	abc	abc
172 | ab+c		-	abc	abc
173 | ab?c		-	abc	abc
174 | a\(*\)b		b	a*b	a*b
175 | a\(**\)b	b	ab	ab
176 | a\(***\)b	bC	BADRPT
177 | *a		b	*a	*a
178 | **a		b	a	a
179 | ***a		bC	BADRPT
180 | 
181 | # the dreaded bounded repetitions
182 | {		&	{	{
183 | {abc		&	{abc	{abc
184 | {1		C	BADRPT
185 | {1}		C	BADRPT
186 | a{b		&	a{b	a{b
187 | a{1}b		-	ab	ab
188 | a\{1\}b		b	ab	ab
189 | a{1,}b		-	ab	ab
190 | a\{1,\}b	b	ab	ab
191 | a{1,2}b		-	aab	aab
192 | a\{1,2\}b	b	aab	aab
193 | a{1		C	EBRACE
194 | a\{1		bC	EBRACE
195 | a{1a		C	EBRACE
196 | a\{1a		bC	EBRACE
197 | a{1a}		C	BADBR
198 | a\{1a\}		bC	BADBR
199 | a{,2}		-	a{,2}	a{,2}
200 | a\{,2\}		bC	BADBR
201 | a{,}		-	a{,}	a{,}
202 | a\{,\}		bC	BADBR
203 | a{1,x}		C	BADBR
204 | a\{1,x\}	bC	BADBR
205 | a{1,x		C	EBRACE
206 | a\{1,x		bC	EBRACE
207 | a{300}		C	BADBR
208 | a\{300\}	bC	BADBR
209 | a{1,0}		C	BADBR
210 | a\{1,0\}	bC	BADBR
211 | ab{0,0}c	-	abcac	ac
212 | ab\{0,0\}c	b	abcac	ac
213 | ab{0,1}c	-	abcac	abc
214 | ab\{0,1\}c	b	abcac	abc
215 | ab{0,3}c	-	abbcac	abbc
216 | ab\{0,3\}c	b	abbcac	abbc
217 | ab{1,1}c	-	acabc	abc
218 | ab\{1,1\}c	b	acabc	abc
219 | ab{1,3}c	-	acabc	abc
220 | ab\{1,3\}c	b	acabc	abc
221 | ab{2,2}c	-	abcabbc	abbc
222 | ab\{2,2\}c	b	abcabbc	abbc
223 | ab{2,4}c	-	abcabbc	abbc
224 | ab\{2,4\}c	b	abcabbc	abbc
225 | ((a{1,10}){1,10}){1,10}	-	a	a	a,a
226 | 
227 | # multiple repetitions
228 | a**		&C	BADRPT
229 | a++		C	BADRPT
230 | a??		C	BADRPT
231 | a*+		C	BADRPT
232 | a*?		C	BADRPT
233 | a+*		C	BADRPT
234 | a+?		C	BADRPT
235 | a?*		C	BADRPT
236 | a?+		C	BADRPT
237 | a{1}{1}		C	BADRPT
238 | a*{1}		C	BADRPT
239 | a+{1}		C	BADRPT
240 | a?{1}		C	BADRPT
241 | a{1}*		C	BADRPT
242 | a{1}+		C	BADRPT
243 | a{1}?		C	BADRPT
244 | a*{b}		-	a{b}	a{b}
245 | a\{1\}\{1\}	bC	BADRPT
246 | a*\{1\}		bC	BADRPT
247 | a\{1\}*		bC	BADRPT
248 | 
249 | # brackets, and numerous perversions thereof
250 | a[b]c		&	abc	abc
251 | a[ab]c		&	abc	abc
252 | a[^ab]c		&	adc	adc
253 | a[]b]c		&	a]c	a]c
254 | a[[b]c		&	a[c	a[c
255 | a[-b]c		&	a-c	a-c
256 | a[^]b]c		&	adc	adc
257 | a[^-b]c		&	adc	adc
258 | a[b-]c		&	a-c	a-c
259 | a[b		&C	EBRACK
260 | a[]		&C	EBRACK
261 | a[1-3]c		&	a2c	a2c
262 | a[3-1]c		&C	ERANGE
263 | a[1-3-5]c	&C	ERANGE
264 | a[[.-.]--]c	&	a-c	a-c
265 | a[1-		&C	ERANGE
266 | a[[.		&C	EBRACK
267 | a[[.x		&C	EBRACK
268 | a[[.x.		&C	EBRACK
269 | a[[.x.]		&C	EBRACK
270 | a[[.x.]]	&	ax	ax
271 | a[[.x,.]]	&C	ECOLLATE
272 | a[[.one.]]b	&	a1b	a1b
273 | a[[.notdef.]]b	&C	ECOLLATE
274 | a[[.].]]b	&	a]b	a]b
275 | a[[:alpha:]]c	&	abc	abc
276 | a[[:notdef:]]c	&C	ECTYPE
277 | a[[:		&C	EBRACK
278 | a[[:alpha	&C	EBRACK
279 | a[[:alpha:]	&C	EBRACK
280 | a[[:alpha,:]	&C	ECTYPE
281 | a[[:]:]]b	&C	ECTYPE
282 | a[[:-:]]b	&C	ECTYPE
283 | a[[:alph:]]	&C	ECTYPE
284 | a[[:alphabet:]]	&C	ECTYPE
285 | [[:alnum:]]+	-	-%@a0X-	a0X
286 | [[:alpha:]]+	-	-%@aX0-	aX
287 | [[:blank:]]+	-	aSSTb	SST
288 | [[:cntrl:]]+	-	aNTb	NT
289 | [[:digit:]]+	-	a019b	019
290 | [[:graph:]]+	-	Sa%bS	a%b
291 | [[:lower:]]+	-	AabC	ab
292 | [[:print:]]+	-	NaSbN	aSb
293 | [[:punct:]]+	-	S%-&T	%-&
294 | [[:space:]]+	-	aSNTb	SNT
295 | [[:upper:]]+	-	aBCd	BC
296 | [[:xdigit:]]+	-	p0f3Cq	0f3C
297 | a[[=b=]]c	&	abc	abc
298 | a[[=		&C	EBRACK
299 | a[[=b		&C	EBRACK
300 | a[[=b=		&C	EBRACK
301 | a[[=b=]		&C	EBRACK
302 | a[[=b,=]]	&C	ECOLLATE
303 | a[[=one=]]b	&	a1b	a1b
304 | 
305 | # complexities
306 | a(((b)))c	-	abc	abc
307 | a(b|(c))d	-	abd	abd
308 | a(b*|c)d	-	abbd	abbd
309 | # just gotta have one DFA-buster, of course
310 | a[ab]{20}	-	aaaaabaaaabaaaabaaaab	aaaaabaaaabaaaabaaaab
311 | # and an inline expansion in case somebody gets tricky
312 | a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]	-	aaaaabaaaabaaaabaaaab	aaaaabaaaabaaaabaaaab
313 | # and in case somebody just slips in an NFA...
314 | a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)	-	aaaaabaaaabaaaabaaaabweeknights	aaaaabaaaabaaaabaaaabweeknights
315 | # fish for anomalies as the number of states passes 32
316 | 12345678901234567890123456789	-	a12345678901234567890123456789b	12345678901234567890123456789
317 | 123456789012345678901234567890	-	a123456789012345678901234567890b	123456789012345678901234567890
318 | 1234567890123456789012345678901	-	a1234567890123456789012345678901b	1234567890123456789012345678901
319 | 12345678901234567890123456789012	-	a12345678901234567890123456789012b	12345678901234567890123456789012
320 | 123456789012345678901234567890123	-	a123456789012345678901234567890123b	123456789012345678901234567890123
321 | # and one really big one, beyond any plausible word width
322 | 1234567890123456789012345678901234567890123456789012345678901234567890	-	a1234567890123456789012345678901234567890123456789012345678901234567890b	1234567890123456789012345678901234567890123456789012345678901234567890
323 | # fish for problems as brackets go past 8
324 | [ab][cd][ef][gh][ij][kl][mn]	-	xacegikmoq	acegikm
325 | [ab][cd][ef][gh][ij][kl][mn][op]	-	xacegikmoq	acegikmo
326 | [ab][cd][ef][gh][ij][kl][mn][op][qr]	-	xacegikmoqy	acegikmoq
327 | [ab][cd][ef][gh][ij][kl][mn][op][q]	-	xacegikmoqy	acegikmoq
328 | 
329 | # subtleties of matching
330 | abc		&	xabcy	abc
331 | a\(b\)?c\1d	b	acd
332 | aBc		i	Abc	Abc
333 | a[Bc]*d		i	abBCcd	abBCcd
334 | 0[[:upper:]]1	&i	0a1	0a1
335 | 0[[:lower:]]1	&i	0A1	0A1
336 | a[^b]c		&i	abc
337 | a[^b]c		&i	aBc
338 | a[^b]c		&i	adc	adc
339 | [a]b[c]		-	abc	abc
340 | [a]b[a]		-	aba	aba
341 | [abc]b[abc]	-	abc	abc
342 | [abc]b[abd]	-	abd	abd
343 | a(b?c)+d	-	accd	accd
344 | (wee|week)(knights|night)	-	weeknights	weeknights
345 | (we|wee|week|frob)(knights|night|day)	-	weeknights	weeknights
346 | a[bc]d		-	xyzaaabcaababdacd	abd
347 | a[ab]c		-	aaabc	abc
348 | abc		s	abc	abc
349 | a*		&	b	@b
350 | 
351 | # Let's have some fun -- try to match a C comment.
352 | # first the obvious, which looks okay at first glance...
353 | /\*.*\*/	-	/*x*/	/*x*/
354 | # but...
355 | /\*.*\*/	-	/*x*/y/*z*/	/*x*/y/*z*/
356 | # okay, we must not match */ inside; try to do that...
357 | /\*([^*]|\*[^/])*\*/	-	/*x*/	/*x*/
358 | /\*([^*]|\*[^/])*\*/	-	/*x*/y/*z*/	/*x*/
359 | # but...
360 | /\*([^*]|\*[^/])*\*/	-	/*x**/y/*z*/	/*x**/y/*z*/
361 | # and a still fancier version, which does it right (I think)...
362 | /\*([^*]|\*+[^*/])*\*+/	-	/*x*/	/*x*/
363 | /\*([^*]|\*+[^*/])*\*+/	-	/*x*/y/*z*/	/*x*/
364 | /\*([^*]|\*+[^*/])*\*+/	-	/*x**/y/*z*/	/*x**/
365 | /\*([^*]|\*+[^*/])*\*+/	-	/*x****/y/*z*/	/*x****/
366 | /\*([^*]|\*+[^*/])*\*+/	-	/*x**x*/y/*z*/	/*x**x*/
367 | /\*([^*]|\*+[^*/])*\*+/	-	/*x***x/y/*z*/	/*x***x/y/*z*/
368 | 
369 | # subexpressions
370 | .*		-	abc	abc	-
371 | a(b)(c)d	-	abcd	abcd	b,c
372 | a(((b)))c	-	abc	abc	b,b,b
373 | a(b|(c))d	-	abd	abd	b,-
374 | a(b*|c|e)d	-	abbd	abbd	bb
375 | a(b*|c|e)d	-	acd	acd	c
376 | a(b*|c|e)d	-	ad	ad	@d
377 | a(b?)c		-	abc	abc	b
378 | a(b?)c		-	ac	ac	@c
379 | a(b+)c		-	abc	abc	b
380 | a(b+)c		-	abbbc	abbbc	bbb
381 | a(b*)c		-	ac	ac	@c
382 | (a|ab)(bc([de]+)f|cde)	-	abcdef	abcdef	a,bcdef,de
383 | # the regression tester only asks for 9 subexpressions
384 | a(b)(c)(d)(e)(f)(g)(h)(i)(j)k	-	abcdefghijk	abcdefghijk	b,c,d,e,f,g,h,i,j
385 | a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l	-	abcdefghijkl	abcdefghijkl	b,c,d,e,f,g,h,i,j,k
386 | a([bc]?)c	-	abc	abc	b
387 | a([bc]?)c	-	ac	ac	@c
388 | a([bc]+)c	-	abc	abc	b
389 | a([bc]+)c	-	abcc	abcc	bc
390 | a([bc]+)bc	-	abcbc	abcbc	bc
391 | a(bb+|b)b	-	abb	abb	b
392 | a(bbb+|bb+|b)b	-	abb	abb	b
393 | a(bbb+|bb+|b)b	-	abbb	abbb	bb
394 | a(bbb+|bb+|b)bb	-	abbb	abbb	b
395 | (.*).*		-	abcdef	abcdef	abcdef
396 | (a*)*		-	bc	@b	@b
397 | 
398 | # do we get the right subexpression when it is used more than once?
399 | a(b|c)*d	-	ad	ad	-
400 | a(b|c)*d	-	abcd	abcd	c
401 | a(b|c)+d	-	abd	abd	b
402 | a(b|c)+d	-	abcd	abcd	c
403 | a(b|c?)+d	-	ad	ad	@d
404 | a(b|c?)+d	-	abcd	abcd	@d
405 | a(b|c){0,0}d	-	ad	ad	-
406 | a(b|c){0,1}d	-	ad	ad	-
407 | a(b|c){0,1}d	-	abd	abd	b
408 | a(b|c){0,2}d	-	ad	ad	-
409 | a(b|c){0,2}d	-	abcd	abcd	c
410 | a(b|c){0,}d	-	ad	ad	-
411 | a(b|c){0,}d	-	abcd	abcd	c
412 | a(b|c){1,1}d	-	abd	abd	b
413 | a(b|c){1,1}d	-	acd	acd	c
414 | a(b|c){1,2}d	-	abd	abd	b
415 | a(b|c){1,2}d	-	abcd	abcd	c
416 | a(b|c){1,}d	-	abd	abd	b
417 | a(b|c){1,}d	-	abcd	abcd	c
418 | a(b|c){2,2}d	-	acbd	acbd	b
419 | a(b|c){2,2}d	-	abcd	abcd	c
420 | a(b|c){2,4}d	-	abcd	abcd	c
421 | a(b|c){2,4}d	-	abcbd	abcbd	b
422 | a(b|c){2,4}d	-	abcbcd	abcbcd	c
423 | a(b|c){2,}d	-	abcd	abcd	c
424 | a(b|c){2,}d	-	abcbd	abcbd	b
425 | a(b+|((c)*))+d	-	abd	abd	@d,@d,-
426 | a(b+|((c)*))+d	-	abcd	abcd	@d,@d,-
427 | 
428 | # check out the STARTEND option
429 | [abc]		&#	a(b)c	b
430 | [abc]		&#	a(d)c
431 | [abc]		&#	a(bc)d	b
432 | [abc]		&#	a(dc)d	c
433 | .		&#	a()c
434 | b.*c		&#	b(bc)c	bc
435 | b.*		&#	b(bc)c	bc
436 | .*c		&#	b(bc)c	bc
437 | 
438 | # plain strings, with the NOSPEC flag
439 | abc		m	abc	abc
440 | abc		m	xabcy	abc
441 | abc		m	xyz
442 | a*b		m	aba*b	a*b
443 | a*b		m	ab
444 | ""		mC	EMPTY
445 | 
446 | # cases involving NULs
447 | aZb		&	a	a
448 | aZb		&p	a
449 | aZb		&p#	(aZb)	aZb
450 | aZ*b		&p#	(ab)	ab
451 | a.b		&#	(aZb)	aZb
452 | a.*		&#	(aZb)c	aZb
453 | 
454 | # word boundaries (ick)
455 | [[:<:]]a	&	a	a
456 | [[:<:]]a	&	ba
457 | [[:<:]]a	&	-a	a
458 | a[[:>:]]	&	a	a
459 | a[[:>:]]	&	ab
460 | a[[:>:]]	&	a-	a
461 | [[:<:]]a.c[[:>:]]	&	axcd-dayc-dazce-abc	abc
462 | [[:<:]]a.c[[:>:]]	&	axcd-dayc-dazce-abc-q	abc
463 | [[:<:]]a.c[[:>:]]	&	axc-dayc-dazce-abc	axc
464 | [[:<:]]b.c[[:>:]]	&	a_bxc-byc_d-bzc-q	bzc
465 | [[:<:]].x..[[:>:]]	&	y_xa_-_xb_y-_xc_-axdc	_xc_
466 | [[:<:]]a_b[[:>:]]	&	x_a_b
467 | 
468 | # past problems, and suspected problems
469 | (A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A])	-	A1	A1
470 | abcdefghijklmnop	i	abcdefghijklmnop	abcdefghijklmnop
471 | abcdefghijklmnopqrstuv	i	abcdefghijklmnopqrstuv	abcdefghijklmnopqrstuv
472 | (ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN])	-	CC11	CC11
473 | CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a	-	CC11	CC11
474 | Char \([a-z0-9_]*\)\[.*	b	Char xyz[k	Char xyz[k	xyz
475 | a?b	-	ab	ab
476 | -\{0,1\}[0-9]*$	b	-5	-5
477 | a*a*a*a*a*a*a*	&	aaaaaa	aaaaaa
478 | 


--------------------------------------------------------------------------------
/ext/re/utils.h:
--------------------------------------------------------------------------------
 1 | /* utility definitions */
 2 | #ifdef _POSIX2_RE_DUP_MAX
 3 | #define	DUPMAX	_POSIX2_RE_DUP_MAX
 4 | #else
 5 | #define	DUPMAX	255
 6 | #endif
 7 | #define	INFINITY	(DUPMAX + 1)
 8 | #define	NC		(CHAR_MAX - CHAR_MIN + 1)
 9 | typedef unsigned char uch;
10 | 
11 | /* switch off assertions (if not already off) if no REDEBUG */
12 | #ifndef REDEBUG
13 | #ifndef NDEBUG
14 | #define	NDEBUG	/* no assertions please */
15 | #endif
16 | #endif
17 | #include <assert.h>
18 | 
19 | /* for old systems with bcopy() but no memmove() */
20 | #ifdef USEBCOPY
21 | #define	memmove(d, s, c)	bcopy(s, d, c)
22 | #endif
23 | 


--------------------------------------------------------------------------------
/ext/tsx/LICENSE:
--------------------------------------------------------------------------------
 1 |                          LICENSE TERMS
 2 | 
 3 | (c) 2002 Manuel Heras-Gilsanz (manuel@heras-gilsanz.com)
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are
 8 | met:
 9 | 
10 | Redistributions of source code must retain the above copyright notice,
11 | this list of conditions and the following disclaimer.
12 | 
13 | Redistributions in binary form must reproduce the above copyright
14 | notice, this list of conditions and the following disclaimer in the
15 | documentation and/or other materials provided with the distribution.
16 | 
17 | Neither the name of Manuel Heras-Gilsanz nor the names of the
18 | contributors may be used to endorse or promote products derived from
19 | this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/ext/tsx/Makefile:
--------------------------------------------------------------------------------
 1 | #DEBUG=-g
 2 | DEBUG=
 3 | SCHEME_H_DIR=..
 4 | CC=gcc
 5 | CFLAGS=-DUSE_DL=1 -I $(SCHEME_H_DIR)
 6 | 
 7 | tsx.so : tsx.c tsx.h Makefile
 8 | 	$(CC) -shared -Wall -fPIC $(CFLAGS) -o tsx.so $(DEBUG) tsx.c
 9 | 	strip tsx.so
10 | 	ls -l tsx.so
11 | 
12 | .PHONY : clean
13 | clean:
14 | 	rm -f *.o
15 | 	rm -f tsx.so
16 | 	rm -f *~


--------------------------------------------------------------------------------
/ext/tsx/README:
--------------------------------------------------------------------------------
  1 | TinyScheme Extensions (TSX) 1.1  [September, 2002]
  2 | (c) 2002 Manuel Heras-Gilsanz (manuel@heras-gilsanz.com)
  3 | 
  4 | This software is subject to the license terms contained in the
  5 | LICENSE file.
  6 | 
  7 | Changelog:
  8 | 1.1 (Sept. 2002) Updated to tinyscheme 1.31
  9 | 1.0 (April 2002) First released version
 10 | 
 11 | 
 12 | WHAT IS TSX?
 13 | 
 14 | TinyScheme Extensions is a set of dynamic libraries incorporating
 15 | additional funcionality to TinyScheme, a lightweight
 16 | implementation of the Scheme programming language. TinyScheme
 17 | (http://tinyscheme.sourceforge.net) is maintained by D. Souflis
 18 | (dsouflis@acm.org), and is based on MiniSCHEME version 0.85k4.
 19 | 
 20 | Scheme is a very nice and powerful programming language, but the
 21 | basic language is very minimalistic in terms of library functions;
 22 | only basic file input / output functionality is specified.
 23 | Different implementations of the language (MIT Scheme, GUILE,
 24 | Bigloo...) provide their own extension libraries. TSX attempts to
 25 | provide commonly needed functions at a small cost in terms of
 26 | additional program footprint. The library is modularized, so that
 27 | it is possible (and easy!) to select desired functionality via
 28 | #defines in tsx.h.
 29 | 
 30 | 
 31 | INSTALLATION
 32 | 
 33 | TSX has been tested on GNU/Linux 2.4.2 with gcc 2.96 and
 34 | libc-2.2.2, with TinyScheme 1.31.
 35 | 
 36 | To install, copy the distribution file to the directory
 37 | where TinyScheme is installed (and where scheme.h lies),
 38 | and run make. If building succeeds, a file called tsx.so
 39 | should be created. This file can be loaded as a TinyScheme
 40 | extension with 
 41 | 
 42 |     (load-extension "tsx-1.0/tsx")
 43 | 
 44 | After loading TSX, you can make use of its functions.
 45 | To reduce footprint, you can choose the functionality which
 46 | will be included. To do so, have a look at tsx.h and
 47 | comment the #defines for unneeded modules.
 48 | 
 49 | If you get compiler errors, make sure you have enabled
 50 | dynamic modules in your tinyscheme runtime (define USE_DL
 51 | somewhere near the top in scheme.h).
 52 | 
 53 | 
 54 | SAMPLE APPLICATIONS
 55 | 
 56 | Three sample applications are distributed with TSX 1.0.
 57 | The code is not particularly elegant, nor written in proper
 58 | functional style, but is provided for illustration of the
 59 | implemented calls.
 60 | 
 61 | -smtp.scm
 62 |     Sends an email to the user getting the username from
 63 |     the USER shell variable, connecting to the SMTP port
 64 |     on the local machine.
 65 | 
 66 | -listhome.scm
 67 |     Provides a list of all the files on the user's home
 68 |     directory (obtained with the HOME environment variable).
 69 | 
 70 | -srepl.scm
 71 |     Provides a socket-based read-eval-print-loop. It listens
 72 |     for connections on the 9000 port of the local machines,
 73 |     and executes the commands received. To test it, run
 74 | 
 75 |         telnet localhost 9000
 76 | 
 77 |     after starting the sample application, and type Scheme
 78 |     expressions. You will get the evaluations. To exit the
 79 |     session, type "quit" and TinyScheme will exit, closing
 80 |     the socket. The output of some functions will not
 81 |     be the same as you would obtain on TinyScheme's
 82 |     "command line", because standard output is not
 83 |     redirected to the socket, but most commands work ok.
 84 | 
 85 | You should copy these applications to the directory where
 86 | TinyScheme is installed (i.e., where the "scheme" binary
 87 | file resides), and can be runned with:
 88 | 
 89 |     ./scheme listhome.scm
 90 |     ./scheme smtp.scm
 91 |     ./scheme srepl.scm
 92 | 
 93 | 
 94 | TSX FUNCTIONS
 95 | 
 96 | The extension functions implemented by TinyScheme Extensions are
 97 | documented in the file "tsx-functions.txt".
 98 | 
 99 | END
100 | 


--------------------------------------------------------------------------------
/ext/tsx/listhome.scm:
--------------------------------------------------------------------------------
 1 | ; listhome.scm
 2 | ; Sample usage of TinyScheme Extension
 3 | ; This simple program lists the directory entries on the
 4 | ; user's home directory.
 5 | 
 6 | ; It uses the following TinyScheme Extension functions:
 7 | ;    getenv
 8 | ;      Used to get HOME environment variable.
 9 | ;    open-dir-stream
10 | ;      Used to open directory stream.
11 | ;    read-dir-entry
12 | ;      Used to read directory entries.
13 | ;    close-dir-entry
14 | ;      Used at the end, to close directory stream when done.
15 | 
16 | ; check that extensions are enabled
17 | (if (not (defined? 'load-extension))
18 |     (begin
19 |       (display "TinyScheme has extensions disabled. Enable them!!")
20 |       (newline)
21 |       (quit)))
22 | 
23 | ; load TinyScheme extension
24 | (load-extension "tsx-1.0/tsx")
25 | 
26 | ; check that the necessary functions are available (the user
27 | ; might have removed some functionality...)
28 | (if (or
29 |       (not (defined? 'getenv))
30 |       (not (defined? 'open-dir-stream))
31 |       (not (defined? 'read-dir-entry))
32 |       (not (defined? 'close-dir-stream)))
33 |     (begin
34 |       (display "Some necessary functions are not available. Exiting!")
35 |       (newline)
36 |       (quit)))
37 | 
38 | ; get user's home dir from HOME environment var
39 | (define homedir (getenv "HOME"))
40 | (display "Listing contents of ") (display homedir) (newline)
41 | 
42 | ; create directory stream to read dir entries
43 | (define dirstream (open-dir-stream homedir))
44 | (if (not dirstream)
45 |   (begin
46 |     (display "Unable to open home directory!! Check value of HOME environment var.")
47 |     (quit)))
48 | 
49 | (let listentry ((entry (read-dir-entry dirstream)))
50 |   (if (eof-object? entry)
51 |     #t
52 |     (begin
53 |       (display entry)
54 |       (newline)
55 |       (listentry (read-dir-entry dirstream)))))
56 | 
57 | (close-dir-stream dirstream)
58 | 
59 | 


--------------------------------------------------------------------------------
/ext/tsx/msvcbuild.bat:
--------------------------------------------------------------------------------
 1 | @setlocal
 2 | @set SCHEME_H_DIR=..\..\src
 3 | @set MSCOMPILE=cl /nologo /O2 /W3 /c /D_CRT_SECURE_NO_DEPRECATE /D_WINSOCK_DEPRECATED_NO_WARNINGS /I%SCHEME_H_DIR%
 4 | @set MSLIB=lib /nologo
 5 | @set MSLINK=link /nologo
 6 | 
 7 | @if not exist lib\ (
 8 |   mkdir lib
 9 | )
10 | 
11 | %MSCOMPILE% /MT tsx.c
12 | %MSLIB% /out:lib\tsx.lib tsx.obj ws2_32.lib
13 | 
14 | del tsx.obj
15 | 


--------------------------------------------------------------------------------
/ext/tsx/smtp.scm:
--------------------------------------------------------------------------------
  1 | ; smtp.scm
  2 | ; Sample usage of TinyScheme Extensions
  3 | ; This very simple program sends a message using SMTP to the local machine.
  4 | 
  5 | ; It uses the following TinyScheme Extension functions:
  6 | ;     getenv 
  7 | ;         used to get name of current user, wich is the recipient
  8 | ;         of the message.
  9 | ;     make-client-socket
 10 | ;         used to connect to SMTP port on local machine
 11 | ;     send
 12 | ;         used to send commands and email message
 13 | ;     recv-new-string
 14 | ;         used to read responses from SMTP server
 15 | ;     close-socket
 16 | ;         used to free socket at the end
 17 | 
 18 | ; check that string ports are available...
 19 | (if (not (defined? 'open-output-string))
 20 |   (begin
 21 |     (display "We need string ports!! Recompile TinyScheme with string ports, if you want to run this sample...")
 22 |     (quit)))
 23 | 
 24 | ; check that extensions are enabled
 25 | (if (not (defined? 'load-extension))
 26 |     (begin
 27 |       (display "TinyScheme has extensions disabled. Enable them!!")
 28 |       (newline)
 29 |       (quit)))
 30 | 
 31 | ; load TinyScheme Extensions
 32 | (load-extension "tsx-1.0/tsx")
 33 | 
 34 | ; check that the necessary functions are available (the user
 35 | ; might have removed some functionality...)
 36 | (if (or
 37 |       (not (defined? 'getenv))
 38 |       (not (defined? 'make-client-socket))
 39 |       (not (defined? 'send))
 40 |       (not (defined? 'close-socket))
 41 |       (not (defined? 'recv-new-string)))
 42 |     (begin
 43 |       (display "Some necessary functions are not available. Exiting!")
 44 |       (newline)
 45 |       (quit)))
 46 | 
 47 | ; get current user name
 48 | (define user-name (getenv "USER"))
 49 | 
 50 | ; if unable to get user name, use "nobody"
 51 | (if (not user-name)
 52 |   (set! user-name "nobody"))
 53 | 
 54 | ; create client socket to SMTP port (25)
 55 | (define sock (make-client-socket "localhost" 25))
 56 | (display "Socket: ") (display sock) (newline)
 57 | 
 58 | ; if unable to open socket, exit TinyScheme
 59 | (if (not sock)
 60 |   (begin
 61 |     (display "Unable to open socket! Is SMTP enabled on this machine?")
 62 |     (quit)))
 63 | 
 64 | ; define string buffers to send and receive
 65 | (define recv-buf '())
 66 | 
 67 | ; receive SMTP welcome message onto recv-buf var
 68 | (set! recv-buf (recv-new-string sock))
 69 | 
 70 | (display "Received:") (display recv-buf) (newline)
 71 | 
 72 | (define helo "HELO localhost\n")
 73 | (display "Sending HELO...") (newline)
 74 | (send sock helo)
 75 | 
 76 | ; receive response from server
 77 | (set! recv-buf (recv-new-string sock))
 78 | (display "Received:") (display recv-buf) (newline)
 79 | 
 80 | (define mailfrom (make-string (+ 20 (string-length user-name))))
 81 | (define mailfromport (open-output-string mailfrom))
 82 | (display "MAIL FROM: " mailfromport)
 83 | (display user-name mailfromport)
 84 | (display "\n" mailfromport)
 85 | (close-output-port mailfromport)
 86 | 
 87 | ; send MAIL FROM: command and receive response
 88 | (display "Sending MAIL FROM:...") (newline)
 89 | (send sock mailfrom)
 90 | (set! recv-buf (recv-new-string sock))
 91 | (display "Received:") (display recv-buf) (newline)
 92 | 
 93 | ; send RCPT TO: command and receive response
 94 | (display "Sending RCPT TO:...") (newline)
 95 | (define rcptto (make-string (+ 20 (string-length user-name))))
 96 | (define rcpttoport (open-output-string rcptto))
 97 | (display "RCPT TO: " rcpttoport)
 98 | (display user-name rcpttoport)
 99 | (display "\n" rcpttoport)
100 | (close-output-port rcpttoport)
101 | (send sock rcptto)
102 | (set! recv-buf (recv-new-string sock))
103 | (display "Received:") (display recv-buf) (newline)
104 | 
105 | 
106 | ; send DATA command
107 | (display "Sending DATA...") (newline)
108 | (define data "DATA\n")
109 | (send sock data)
110 | (set! recv-buf (recv-new-string sock))
111 | (display "Received:") (display recv-buf) (newline)
112 | 
113 | ; send message
114 | (display "Sending message...") (newline)
115 | (define message "Hello!\nThis is a sample message sent from TinyScheme!\n\n.\n")
116 | (send sock message)
117 | (set! recv-buf (recv-new-string sock))
118 | (display "Received:") (display recv-buf) (newline)
119 | 
120 | ; send QUIT command
121 | (display "Sending QUIT command...") (newline)
122 | (define quit "QUIT\n")
123 | (send sock quit)
124 | (set! recv-buf (recv-new-string sock))
125 | (display "Received:") (display recv-buf) (newline)
126 | 
127 | ; close socket
128 | (close-socket sock)
129 | 
130 | 


--------------------------------------------------------------------------------
/ext/tsx/srepl.scm:
--------------------------------------------------------------------------------
 1 | ; srepl.scm
 2 | ; Sample usage of TinyScheme Extensions
 3 | ; This program provides a socket-based read-eval-print-loop.
 4 | 
 5 | ; It uses the following TinyScheme Extension functions:
 6 | ;     make-server-socket
 7 | ;         used to create server socket on port 9000
 8 | ;     accept
 9 | ;         used to accept client requests for connection
10 | ;     recv-new-string
11 | ;         used to receive user's requests
12 | ;     send
13 | ;         used to send evaluation results
14 | ;     close-socket
15 | ;         used to free socket at the end
16 | 
17 | ; check that string ports are available...
18 | (if (not (defined? 'open-output-string))
19 |   (begin
20 |     (display "We need string ports!! Recompile TinyScheme with string ports,")
21 |     (display " if you want to run this sample...")
22 |     (quit)))
23 | 
24 | ; check that extensions are enabled
25 | (if (not (defined? 'load-extension))
26 |     (begin
27 |       (display "TinyScheme has extensions disabled. Enable them!!")
28 |       (newline)
29 |       (quit)))
30 | 
31 | ; load TinyScheme Extensions
32 | (load-extension "tsx-1.0/tsx")
33 | 
34 | ; check that the necessary functions are available (the user
35 | ; might have removed some functionality...)
36 | (if (or
37 |       (not (defined? 'make-server-socket))
38 |       (not (defined? 'accept))
39 |       (not (defined? 'send))
40 |       (not (defined? 'close-socket))
41 |       (not (defined? 'recv-new-string)))
42 |     (begin
43 |       (display "Some necessary functions are not available. Exiting!")
44 |       (newline)
45 |       (quit)))
46 | 
47 | ; create server socket on port 9000
48 | (define server-socket (make-server-socket 9000))
49 | 
50 | ; wait for client requests
51 | (define connected-socket (accept server-socket))
52 | 
53 | ; send welcome message
54 | (send connected-socket "Welcome to TinyScheme Extensions socket-REPL!\n")
55 | 
56 | ; define auxiliary variables
57 | (define command '())
58 | (define command-port '())
59 | (define result '())
60 | (define result-port '())
61 | (define to-eval '())
62 | 
63 | (define extenv (current-environment))
64 | (let repl ()
65 |   (send connected-socket "> ")
66 |   (set! command (recv-new-string connected-socket))
67 |   (set! command-port (open-input-string command))
68 |   (set! to-eval (read command-port))
69 |   (set! result (make-string 250))
70 |   (set! result-port (open-output-string result))
71 |   (display (eval to-eval) result-port)
72 |   (send connected-socket result)
73 |   (send connected-socket "\n")
74 |   (close-input-port command-port)
75 |   (close-output-port result-port)
76 |   (repl)
77 | )
78 | 


--------------------------------------------------------------------------------
/ext/tsx/tsx-functions.txt:
--------------------------------------------------------------------------------
  1 | TinyScheme Extensions (TSX) 1.1  [September, 2002]
  2 | (c) 2002 Manuel Heras-Gilsanz (manuel@heras-gilsanz.com)
  3 | 
  4 | This software is subject to the license terms contained in the
  5 | LICENSE file.
  6 | 
  7 | 
  8 | TSX FUNCTIONS
  9 | 
 10 | TSX incorporates the following functions:
 11 | 
 12 | *Sockets (included if HAVE_SOCKETS is defined in tsx.h)
 13 | 
 14 | (make-client-socket host port)
 15 |         host: string (IP address or host name)
 16 |         port: integer number
 17 | 
 18 |         Returns a socket which is already connected to the
 19 |         specified host and port, or #f if the connection could
 20 |         not be performed.
 21 | 
 22 | (make-server-socket port)
 23 |         port: integer number
 24 | 
 25 |         Returns a socket which is bound to the specified port on
 26 |         the local machine, and ready to accept connections. If the
 27 |         socket could not be created (e.g., because the port is
 28 |         already in use, or it is a privileged port and the user has
 29 |         no permissions on it), #f is returned.
 30 | 
 31 | (recv! sock buff)
 32 |         sock: socket obtained with make-client-socket or accept
 33 |         buff: string
 34 | 
 35 |         Waits for received data through the specified socket, and
 36 |         stores it on the buffer. The return value indicates the
 37 |         number of received bytes. This call blocks until some data
 38 |         is received, but does not guarantee that buff gets
 39 |         completely filled. If an error occurs (e.g., the other
 40 |         peer disconnects) then #f is returned.
 41 | 
 42 | (recv-new-string sock)
 43 |         sock: socket obtained with make-client-socket or accept
 44 | 
 45 |         Waits for received data through the specified socket, and
 46 |         returns it in a new string. This call blocks until some
 47 |         data is received. If an error occurs, then #f is returned.
 48 | 
 49 | (send sock buff)
 50 |         sock: socket obtained with make-client-socket or accept
 51 |         buff: string
 52 | 
 53 |         Sends the data contained in the string through the socket.
 54 |         It returns the number of transmitted bytes (could be
 55 |         different than the size of the string!), or #f if an error
 56 |         occured (e.g., the other peer disconnected).
 57 | 
 58 | (accept server-sock)
 59 |         server-sock: socket obtained with make-server-socket
 60 | 
 61 |         Waits until a connection is received on the specified
 62 |         server socket, and returns the connected socket. If an
 63 |         error occurs (e.g., the network interface shuts down), it
 64 |         returns #f instead.
 65 | 
 66 | (close-socket sock)
 67 |         sock: socket obtained with make-server-socket,
 68 |               make-client-socket or accept
 69 | 
 70 | 	The socket is closed. No further calls should be performed
 71 |         on this socket.
 72 | 
 73 | (sock-is-data-ready? sock)
 74 |         sock: socket obtained with make-server-socket,
 75 |               make-client-socket or accept
 76 | 
 77 | 	This function allows non-blocking operation with sockets.
 78 | 	It returns #t if data is available for reception on this
 79 | 	socket, and #f if no data has been received.
 80 | 
 81 | (sock-peek sock)
 82 |         sock: socket obtained with make-server-socket,
 83 |               make-client-socket or accept
 84 | 
 85 | 	This function returns (as a newly created string) the
 86 | 	data received in this socket. The information is not
 87 | 	removed from the input queue.
 88 | 
 89 | *File system (included if HAVE_FILESYSTEM is defined in tsx.h)
 90 | 
 91 | Scheme already defines functions to read and write files. These
 92 | functions allow access to the filesystem to check if a certain
 93 | file exists, to get its size, etc.
 94 | 
 95 | (file-size filename)
 96 |         filename: string
 97 | 
 98 |         This function returns the size (in bytes) of the
 99 |         indicated file, or #f if the file does not exists or
100 |         is not accessible to the requesting user.
101 | 
102 | (file-exists? filename)
103 |         filename: string
104 | 
105 |         This function returns #t if the indicated file exists, and
106 |         #f if it does not exists or it is not accessible to the
107 |         requesting user.
108 | 
109 | (delete-file filename)
110 |         filename: string
111 | 
112 |         Removes the specified file. It returns #t if the operation
113 |         succeeds, or #f otherwise (e.g., because the file is
114 |         read-only, or because the file does not exist).
115 | 
116 | (open-dir-stream path)
117 |         path: string
118 | 
119 |         Opens a "directory stream" on the provided directory path.
120 |         This stream will provide all the files within the directory,
121 |         using the function read-dir-entry. The stream should be closed
122 |         at the end with close-dir-stream.
123 | 
124 | (read-dir-entry dirstream)
125 |         dirstream: directory stream, obtained with open-dir-stream.
126 | 
127 |         It returns the name of the following directory entry, or eof
128 |         if all the entries were provided. Check the return value with
129 |         with eof-object?.
130 | 
131 | (close-dir-stream dirstream) 
132 |         dirstream: directory stream, obtained with open-dir-stream.
133 | 
134 |         Close directory stream. No further calls to read-dir-entry should
135 |         be performed.
136 | 
137 | 
138 | *Time (available if HAVE_TIME is defined in tsx.h)
139 | 
140 | (time)
141 |         Returns the current local time, as a list of integer
142 |         containing:
143 |           (year month day-of-month hour min sec millisec)
144 |         The year is expressed as an offsett from 1900.
145 | 
146 | (gettimeofday)
147 |         Returns a list containing the number of seconds from
148 |         the beginning of the day, and microseconds within the
149 |         current second.
150 | 
151 | (usleep microsec)
152 |         microsec: integer
153 | 
154 |         Suspends execution of the calling thread during the
155 |         specified number of microseconds.
156 | 
157 | 
158 | *Miscellaneous functions (available if HAVE_MISC is defined)
159 | 
160 | (getenv varname)
161 |         varname: string
162 | 
163 |         Returns a string with the content of the specified
164 |         environment variable, or #f if the variable is not
165 |         defined.
166 | 
167 | (system command)
168 |         command: string
169 | 
170 |         Executes a command on the /bin/sh shell. Returns #f if
171 |         it is unable to run /bin/sh or another error occurs,
172 |         or an integer return code which is the value returned
173 |         by the command to the shell.
174 | 
175 | END
176 | 
177 | 


--------------------------------------------------------------------------------
/ext/tsx/tsx.c:
--------------------------------------------------------------------------------
  1 | /* TinyScheme Extensions
  2 |  * (c) 2002 Visual Tools, S.A.
  3 |  * Manuel Heras-Gilsanz (manuel@heras-gilsanz.com)
  4 |  *
  5 |  * This software is subject to the terms stated in the
  6 |  * LICENSE file.
  7 |  */
  8 | 
  9 | #include "miniscm.h"
 10 | #include <sys/stat.h>
 11 | #ifdef _WIN32
 12 | #include <ws2tcpip.h>
 13 | #include <time.h>
 14 | #include <sys/timeb.h>
 15 | #include <io.h>
 16 | #else
 17 | #include <sys/time.h>
 18 | #include <sys/types.h>
 19 | #include <sys/socket.h>
 20 | #include <unistd.h>
 21 | #include <time.h>
 22 | #include <stdlib.h>
 23 | #include <arpa/inet.h>
 24 | #include <netdb.h>
 25 | #include <netinet/in.h>
 26 | #include <string.h>
 27 | #include <dirent.h>
 28 | #endif
 29 | #include "tsx.h"
 30 | 
 31 | #ifdef HAVE_MISC
 32 | pointer foreign_getenv(pointer args)
 33 | {
 34 |   pointer first_arg;
 35 |   pointer ret;
 36 |   char * varname;
 37 |   char * value;
 38 | 
 39 |   if(args == NIL)
 40 |   {
 41 |     return F;
 42 |   }
 43 | 
 44 |   first_arg = car(args);
 45 | 
 46 |   if(!is_string(first_arg))
 47 |   {
 48 |     return F;
 49 |   }
 50 | 
 51 |   varname = strvalue(first_arg);
 52 |   value = getenv(varname);
 53 |   if (0 == value)
 54 |   {
 55 |     ret = F;
 56 |   }
 57 |   else
 58 |   {
 59 |     ret = mk_string(value);
 60 |   }
 61 |   return ret;
 62 | }
 63 | 
 64 | pointer foreign_system(pointer args)
 65 | {
 66 |   pointer first_arg;
 67 |   char * command;
 68 |   int retcode;
 69 | 
 70 |   if(args == NIL)
 71 |     return F;
 72 | 
 73 |   first_arg = car(args);
 74 |   if(!is_string(first_arg))
 75 |     return F;
 76 | 
 77 |   command = strvalue(first_arg);
 78 |   if(0 == command)
 79 |     return F;
 80 | 
 81 |   retcode = system(command);
 82 |   if( (127 == retcode) || (-1 == retcode) )
 83 |     return F;
 84 | 
 85 |   return (mk_integer(retcode));
 86 | }
 87 | #endif /* defined (HAVE_MISC) */
 88 | 
 89 | #ifdef HAVE_FILESYSTEM
 90 | pointer foreign_filesize(pointer args)
 91 | {
 92 |   pointer first_arg;
 93 |   pointer ret;
 94 |   struct stat buf;
 95 |   char * filename;
 96 |   int retcode;
 97 | 
 98 |   if(args == NIL)
 99 |     return F;
100 | 
101 |   first_arg = car(args);
102 |   if(!is_string(first_arg)) {
103 |     return F;
104 |   }
105 | 
106 |   filename = strvalue(first_arg);
107 |   retcode = stat(filename, &buf);
108 |   if (0 == retcode)
109 |   {
110 |     ret = mk_integer(buf.st_size);
111 |   }
112 |   else
113 |   {
114 |     ret = F;
115 |   }
116 |   return ret;
117 | }
118 | 
119 | pointer foreign_fileexists(pointer args)
120 | {
121 |   pointer first_arg;
122 |   pointer ret;
123 |   struct stat buf;
124 |   char * filename;
125 |   int retcode;
126 | 
127 |   if(args == NIL)
128 |     return F;
129 | 
130 |   first_arg = car(args);
131 |   if(!is_string(first_arg)) {
132 |     return F;
133 |   }
134 | 
135 |   filename = strvalue(first_arg);
136 |   retcode = stat(filename, &buf);
137 |   if (0 == retcode)
138 |   {
139 |     ret = T;
140 |   }
141 |   else
142 |   {
143 |     ret = F;
144 |   }
145 |   return ret;
146 | }
147 | 
148 | pointer foreign_deletefile(pointer args)
149 | {
150 |   pointer first_arg;
151 |   pointer ret;
152 |   char * filename;
153 |   int retcode;
154 | 
155 |   if(args == NIL)
156 |     return F;
157 | 
158 |   first_arg = car(args);
159 |   if(!is_string(first_arg)) {
160 |     return F;
161 |   }
162 | 
163 |   filename = strvalue(first_arg);
164 | #ifdef _WIN32
165 |   retcode = _unlink(filename);
166 | #else
167 |   retcode = unlink(filename);
168 | #endif
169 |   if (0 == retcode) {
170 |     ret = T;
171 |   }
172 |   else {
173 |     ret = F;
174 |   }
175 |   return ret;
176 | }
177 | 
178 | pointer foreign_opendirstream(pointer args)
179 | {
180 |   pointer first_arg;
181 |   char * dirpath;
182 | #ifdef _WIN32
183 |   long dir;
184 |   struct _finddata_t entry;
185 | #else
186 |   DIR * dir;
187 | #endif
188 | 
189 |   if(args == NIL)
190 |     return F;
191 | 
192 |   first_arg = car(args);
193 |   if(!is_string(first_arg))
194 |     return F;
195 | 
196 |   dirpath = strvalue(first_arg);
197 | 
198 | #ifdef _WIN32
199 |   dir = _findfirst(dirpath, &entry);
200 |   if(-1L == dir)
201 | #else
202 |   dir = opendir(dirpath);
203 |   if(0 == dir)
204 | #endif
205 |     return F;
206 | 
207 |   return (mk_integer((int) dir));
208 | }
209 | 
210 | pointer foreign_readdirentry(pointer args)
211 | {
212 |   pointer first_arg;
213 | #ifdef _WIN32
214 |   long dir;
215 |   struct _finddata_t entry;
216 | #else
217 |   DIR * dir;
218 |   struct dirent * entry;
219 | #endif
220 | 
221 |   if(args == NIL)
222 |     return F;
223 | 
224 |   first_arg = car(args);
225 |   if(!is_number(first_arg))
226 |     return F;
227 | 
228 | #ifdef _WIN32
229 |   dir = (long) ivalue(first_arg);
230 |   if(-1L == dir)
231 | #else
232 |   dir = (DIR *) ivalue(first_arg);
233 |   if(0 == dir)
234 | #endif
235 |     return F;
236 | 
237 | #ifdef _WIN32
238 |   dir = _findnext(dir, &entry);
239 |   if(-1L == dir)
240 | #else
241 |   entry = readdir(dir);
242 |   if(0 == entry)
243 | #endif
244 |     return EOF_OBJ;
245 | 
246 | #ifdef _WIN32
247 |   return (mk_string(entry.name));
248 | #else
249 |   return (mk_string(entry->d_name));
250 | #endif
251 | }
252 | 
253 | pointer foreign_closedirstream(pointer args)
254 | {
255 |   pointer first_arg;
256 | #ifdef _WIN32
257 |   long dir;
258 | #else
259 |   DIR * dir;
260 | #endif
261 | 
262 |   if(args == NIL)
263 |     return F;
264 | 
265 |   first_arg = car(args);
266 |   if(!is_number(first_arg))
267 |     return F;
268 | 
269 | #ifdef _WIN32
270 |   dir = (long) ivalue(first_arg);
271 |   if(-1L == dir)
272 | #else
273 |   dir = (DIR *) ivalue(first_arg);
274 |   if(0 == dir)
275 | #endif
276 |     return F;
277 | 
278 | #ifdef _WIN32
279 |   _findclose(dir);
280 | #else
281 |   closedir(dir);
282 | #endif
283 |   return T;
284 | }
285 | #endif /* defined (HAVE_FILESYSTEM) */
286 | 
287 | #ifdef HAVE_TIME
288 | pointer foreign_time(pointer args)
289 | {
290 |   time_t now;
291 |   struct tm * now_tm;
292 |   pointer ret;
293 | 
294 |   if(args != NIL)
295 |   {
296 |     return F;
297 |   }
298 | 
299 |   time(&now);
300 |   now_tm = localtime(&now);
301 | 
302 |   ret = cons(mk_integer((long) now_tm->tm_year),
303 |          cons(mk_integer((long) now_tm->tm_mon),
304 |           cons(mk_integer((long) now_tm->tm_mday),
305 |            cons(mk_integer((long) now_tm->tm_hour),
306 |             cons(mk_integer((long) now_tm->tm_min),
307 |              cons(mk_integer((long) now_tm->tm_sec),NIL))))));
308 | 
309 |   return ret;
310 | }
311 | 
312 | pointer foreign_gettimeofday(pointer args)
313 | {
314 |   struct timeval tv;
315 |   pointer ret;
316 | 
317 | #ifdef _WIN32
318 |   struct _timeb tb;
319 | 
320 |   _ftime(&tb);
321 |   tv.tv_sec = (long) tb.time;
322 |   tv.tv_usec = tb.millitm * 1000;
323 | #else
324 |   gettimeofday(&tv, 0);
325 | #endif
326 | 
327 |   ret = cons(mk_integer((long) tv.tv_sec),
328 |          cons(mk_integer((long) tv.tv_usec),
329 |           NIL));
330 | 
331 |   return ret;
332 | }
333 | 
334 | pointer foreign_usleep(pointer args)
335 | {
336 |   pointer first_arg;
337 |   long usec;
338 | 
339 |   if(args == NIL)
340 |     return F;
341 | 
342 |   first_arg = car(args);
343 |   if(!is_integer(first_arg)) {
344 |     return F;
345 |   }
346 | 
347 |   usec = ivalue(first_arg);
348 | #ifdef _WIN32
349 |   if (usec > 0) {
350 |     HANDLE hTimer;
351 |     LARGE_INTEGER dueTime;
352 | 
353 |     dueTime.QuadPart = -10LL * usec;
354 | 
355 |     hTimer = CreateWaitableTimer(NULL, TRUE, NULL);
356 |     SetWaitableTimer(hTimer, &dueTime, 0, NULL, NULL, 0);
357 |     WaitForSingleObject(hTimer, INFINITE);
358 |     CloseHandle(hTimer);
359 |   }
360 | #else
361 |   usleep(usec);
362 | #endif
363 | 
364 |   return T;
365 | }
366 | #endif /* defined (HAVE_TIME) */
367 | 
368 | #ifdef HAVE_SOCKETS
369 | pointer foreign_makeclientsocket(pointer args)
370 | {
371 |   pointer first_arg;
372 |   pointer second_arg;
373 |   pointer ret;
374 |   struct sockaddr_in address;
375 |   struct in_addr inaddr;
376 |   struct hostent * host;
377 |   char * hostname;
378 |   int retcode;
379 |   long port;
380 |   int sock;
381 | #ifdef _WIN32
382 |   int size = sizeof(address);
383 | #endif
384 | 
385 |   if(args == NIL)
386 |     return F;
387 | 
388 |   first_arg = car(args);
389 |   if(!is_string(first_arg)) {
390 |     return F;
391 |   }
392 |   args = cdr(args);
393 |   second_arg = car(args);
394 |   if(!is_number(second_arg)) {
395 |     return F;
396 |   }
397 | 
398 |   hostname = strvalue(first_arg);
399 |   port = ivalue(second_arg);
400 | 
401 | #ifdef _WIN32
402 |   /* inet_pton() is not implemented in Windows XP, 2003 */
403 |   if (WSAStringToAddress(hostname, AF_INET, NULL, (struct sockaddr *)&address, &size) == 0) {
404 |     inaddr = address.sin_addr;
405 | #else
406 |   if(inet_aton(hostname, &inaddr)) {
407 | #endif
408 |     host = gethostbyaddr((char *) &inaddr, sizeof(inaddr), AF_INET);
409 |   } else
410 |     host = gethostbyname(hostname);
411 | 
412 |   if(0 == host) {
413 |     return F;
414 |   }
415 | 
416 |   sock = socket(PF_INET, SOCK_STREAM, 0);
417 |   if(-1==sock) {
418 |     return F;
419 |   }
420 | 
421 |   address.sin_family = AF_INET;
422 |   address.sin_port   = htons((u_short) port);
423 |   memcpy(&address.sin_addr, host->h_addr_list[0], sizeof(address.sin_addr));
424 | 
425 |   retcode = connect(sock, (struct sockaddr *)&address, sizeof(address));
426 |   if (0 == retcode) {
427 |     ret = mk_integer(sock);
428 |   }
429 |   else {
430 |     ret = F;
431 |   }
432 |   return ret;
433 | }
434 | 
435 | pointer foreign_makeserversocket(pointer args)
436 | {
437 |   pointer first_arg;
438 |   struct sockaddr_in address;
439 |   long port;
440 | #ifdef _WIN32
441 |   const char one = 1;
442 | #else
443 |   int one = 1;
444 | #endif
445 |   int sock;
446 | 
447 |   if(args == NIL)
448 |     return F;
449 | 
450 |   first_arg = car(args);
451 |   if(!is_number(first_arg)) {
452 |     return F;
453 |   }
454 | 
455 |   port = ivalue(first_arg);
456 | 
457 |   sock = socket(PF_INET, SOCK_STREAM, 0);
458 |   if(-1==sock) {
459 |     return F;
460 |   }
461 | 
462 |   setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
463 | 
464 |   address.sin_family = AF_INET;
465 |   address.sin_port   = htons((u_short) port);
466 |   memset(&address.sin_addr, 0, sizeof(address.sin_addr));
467 | 
468 |   if(bind(sock, (struct sockaddr *) &address, sizeof(address))) {
469 |     return F;
470 |   }
471 | 
472 |   if(listen(sock, 1)) {
473 |     return F;
474 |   }
475 | 
476 |   return (mk_integer(sock));
477 | }
478 | 
479 | pointer foreign_recv(pointer args)
480 | {
481 |   pointer first_arg;
482 |   pointer second_arg;
483 |   int sock;
484 |   char * buf;
485 |   pointer ret;
486 |   int retcode;
487 | 
488 |   if(args == NIL)
489 |     return F;
490 | 
491 |   first_arg = car(args);
492 |   if(!is_number(first_arg)) {
493 |     return F;
494 |   }
495 |   args = cdr(args);
496 |   second_arg = car(args);
497 |   if(!is_string(second_arg)) {
498 |     return F;
499 |   }
500 | 
501 |   sock = ivalue(first_arg);
502 |   buf  = strvalue(second_arg);
503 | 
504 |   retcode = recv(sock, buf, strlen(buf), 0);
505 |   if (-1 == retcode) {
506 |     ret = F;
507 |   }
508 |   else {
509 |     ret = mk_integer(retcode);
510 |   }
511 | 
512 |   return ret;
513 | }
514 | 
515 | pointer foreign_recvnewbuf(pointer args)
516 | {
517 |   pointer first_arg;
518 |   int sock;
519 |   pointer ret;
520 |   int lenreceived;
521 |   char buf[2500];
522 | 
523 |   if(args == NIL) return F;
524 | 
525 |   first_arg = car(args);
526 |   if(!is_number(first_arg)) return F;
527 | 
528 |   sock = ivalue(first_arg);
529 | 
530 |   lenreceived = recv(sock, buf, sizeof(buf) - 1, 0);
531 |   if (-1 == lenreceived) return F;
532 | 
533 |   buf[lenreceived] = 0;
534 |   ret = mk_string(buf);
535 | 
536 |   return ret;
537 | }
538 | 
539 | pointer foreign_isdataready(pointer args)
540 | {
541 |   pointer first_arg;
542 |   int sock;
543 |   struct timeval tv;
544 |   fd_set fds;
545 |   fd_set fdsin;
546 | 
547 |   if(args == NIL) return F;
548 | 
549 |   first_arg = car(args);
550 |   if(!is_number(first_arg)) return F;
551 | 
552 |   sock = ivalue(first_arg);
553 | 
554 |   tv.tv_sec = 0;
555 |   tv.tv_usec = 0;
556 | 
557 |   FD_ZERO(&fds);
558 |   FD_SET(sock, &fds);
559 |   fdsin = fds;
560 |   if (select(1+sock, &fdsin, NULL, NULL, &tv) < 0)
561 |     {
562 |       return F;
563 |     }
564 |   if (FD_ISSET(sock, &fdsin))
565 |     return T;
566 |   return F;
567 | }
568 | 
569 | pointer foreign_sockpeek(pointer args)
570 | {
571 |   pointer first_arg;
572 |   int sock;
573 |   pointer ret;
574 |   int lenreceived;
575 |   char buf[2500];
576 | 
577 |   if(args == NIL) return F;
578 | 
579 |   first_arg = car(args);
580 |   if(!is_number(first_arg)) return F;
581 | 
582 |   sock = ivalue(first_arg);
583 | 
584 |   lenreceived = recv(sock, buf, sizeof(buf) - 1, MSG_PEEK);
585 |   if (-1 == lenreceived) return F;
586 | 
587 |   buf[lenreceived] = 0;
588 |   ret = mk_string(buf);
589 | 
590 |   return ret;
591 | }
592 | 
593 | pointer foreign_send(pointer args)
594 | {
595 |   pointer first_arg;
596 |   pointer second_arg;
597 |   int sock;
598 |   char * buf;
599 |   pointer ret;
600 |   int retcode;
601 | 
602 |   if(args == NIL)
603 |     return F;
604 | 
605 |   first_arg = car(args);
606 |   if(!is_number(first_arg)) {
607 |     return F;
608 |   }
609 |   args = cdr(args);
610 |   second_arg = car(args);
611 |   if(!is_string(second_arg)) {
612 |     return F;
613 |   }
614 | 
615 |   sock = ivalue(first_arg);
616 |   buf  = strvalue(second_arg);
617 | 
618 |   retcode = send(sock, buf, strlen(buf), 0);
619 |   if (-1 == retcode) {
620 |     ret = F;
621 |   }
622 |   else {
623 |     ret = mk_integer(retcode);
624 |   }
625 | 
626 |   return ret;
627 | }
628 | 
629 | pointer foreign_accept(pointer args)
630 | {
631 |   pointer first_arg;
632 |   int sock;
633 |   struct sockaddr_in addr;
634 |   pointer ret;
635 |   socklen_t addr_len = sizeof(struct sockaddr_in);
636 |   int retcode;
637 | 
638 |   if(args == NIL)
639 |     return F;
640 | 
641 |   first_arg = car(args);
642 |   if(!is_number(first_arg)) {
643 |     return F;
644 |   }
645 | 
646 |   sock = ivalue(first_arg);
647 | 
648 |   retcode = accept(sock, (struct sockaddr *)&addr, &addr_len);
649 |   if (-1 == retcode) {
650 |     ret = F;
651 |   }
652 |   else {
653 |     ret = mk_integer(retcode);
654 |   }
655 | 
656 |   return ret;
657 | }
658 | 
659 | pointer foreign_closesocket(pointer args)
660 | {
661 |   pointer first_arg;
662 |   int sock;
663 |   int retcode;
664 | 
665 |   if(args == NIL)
666 |     return F;
667 | 
668 |   first_arg = car(args);
669 |   if(!is_number(first_arg))
670 |     return F;
671 | 
672 |   sock = ivalue(first_arg);
673 | 
674 | #ifdef _WIN32
675 |   retcode = _close(sock);
676 | #else
677 |   retcode = close(sock);
678 | #endif
679 |   if (-1 == retcode)
680 |     return F;
681 | 
682 |   return T;
683 | }
684 | #endif /* defined (HAVE_SOCKETS) */
685 | 
686 | 
687 | /* This function gets called when MiniScheme is loading the extension */
688 | void init_tsx(void)
689 | {
690 | #ifdef HAVE_MISC
691 |   scheme_register_foreign_func("getenv", foreign_getenv);
692 |   scheme_register_foreign_func("system", foreign_system);
693 | #endif /* defined (HAVE_MISC) */
694 | #ifdef HAVE_TIME
695 |   scheme_register_foreign_func("time", foreign_time);
696 |   scheme_register_foreign_func("gettimeofday", foreign_gettimeofday);
697 |   scheme_register_foreign_func("usleep", foreign_usleep);
698 | #endif /* defined (HAVE_TIME) */
699 | #ifdef HAVE_FILESYSTEM
700 |   scheme_register_foreign_func("file-size", foreign_filesize);
701 |   scheme_register_foreign_func("file-exists?", foreign_fileexists);
702 |   scheme_register_foreign_func("delete-file", foreign_deletefile);
703 |   scheme_register_foreign_func("open-dir-stream", foreign_opendirstream);
704 |   scheme_register_foreign_func("read-dir-entry", foreign_readdirentry);
705 |   scheme_register_foreign_func("close-dir-stream", foreign_closedirstream);
706 | #endif /* defined (HAVE_FILESYSTEM) */
707 | #ifdef HAVE_SOCKETS
708 |   scheme_register_foreign_func("make-client-socket", foreign_makeclientsocket);
709 |   scheme_register_foreign_func("make-server-socket", foreign_makeserversocket);
710 |   scheme_register_foreign_func("recv!", foreign_recv);
711 |   scheme_register_foreign_func("recv-new-string", foreign_recvnewbuf);
712 |   scheme_register_foreign_func("sock-peek", foreign_sockpeek);
713 |   scheme_register_foreign_func("sock-is-data-ready?", foreign_isdataready);
714 |   scheme_register_foreign_func("send", foreign_send);
715 |   scheme_register_foreign_func("accept", foreign_accept);
716 |   scheme_register_foreign_func("close-socket", foreign_closesocket);
717 | #endif /* defined (HAVE_SOCKETS) */
718 | }
719 | 


--------------------------------------------------------------------------------
/ext/tsx/tsx.h:
--------------------------------------------------------------------------------
 1 | /* TinyScheme Extensions
 2 |  * (c) 2001 Manuel Heras-Gilsanz
 3 |  *
 4 |  * This software is subject to the terms stated in the
 5 |  * LICENSE file.
 6 |  */
 7 | 
 8 | /* Comment those #defines whose functionality you don't
 9 |  * want to include. 
10 |  */
11 | 
12 | /* Comment the following line if you don't need sockets */
13 | #define HAVE_SOCKETS
14 | 
15 | /* Comment the following line if you don't need filesystem
16 |  * functionality (file-size, file-exists?, etc).
17 |  */
18 | #define HAVE_FILESYSTEM
19 | 
20 | /* Comment the following line if you don't need time functions */
21 | #define HAVE_TIME
22 | 
23 | /* Comment the following line if you don't need getenv and system */
24 | #define HAVE_MISC
25 | 


--------------------------------------------------------------------------------
/src/README:
--------------------------------------------------------------------------------
  1 |  =====================================================================
  2 | 
  3 |       ---------- Mini-Scheme Interpreter Version 0.85 ----------
  4 | 
  5 |                 coded by Atsushi Moriwaki (11/5/1989)
  6 | 
  7 |             E-MAIL :  moriwaki@kurims.kurims.kyoto-u.ac.jp
  8 |             MIX    :  riemann
  9 |             NIFTY  :  PBB01074
 10 | 	    (Note that these addresses are now obsolete, see below)
 11 | 
 12 |  =====================================================================
 13 | 
 14 | 		Revised by Akira KIDA
 15 | 
 16 | 		Version 0.85k4  (17 May 1994)
 17 | 		Version 0.85k3  (30 Nov 1989)
 18 | 		Version 0.85k2  (28 Nov 1989)
 19 | 		Version 0.85k1  (14 Nov 1989)
 20 | 
 21 | 	Mini-Scheme is now maintained by Akira KIDA.
 22 | 
 23 | 	E-Mail	: SDI00379@niftyserve.or.jp
 24 | 
 25 | 	Most part of this document is written by Akira KIDA.
 26 | 	Send comments/requests/bug reports to Akira KIDA at the above
 27 | 	email address.
 28 | 	
 29 |  =====================================================================
 30 | 
 31 |  This Mini-Scheme Interpreter is based on "SCHEME Interpreter in
 32 |  Common Lisp" in Appendix of T.Matsuda & K.Saigo, Programming of LISP,
 33 |  archive No5 (1987) p6 - p42 (published in Japan).
 34 | 
 35 | 
 36 |  Copyright Notice:
 37 | 	THIS SOFTWARE IS PLACED IN THE PUBLIC DOMAIN BY THE AUTHOR.
 38 | 
 39 |  This software is completely free to copy, modify and/or re-distribute.
 40 |  But I (Atsushi Moriwaki) would appreciate it if you left my name on the
 41 |  code as the author.
 42 | 
 43 |  DISCLAIMER:
 44 |     THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 45 |     IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 46 |     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 47 |     PURPOSE.
 48 | 
 49 | 
 50 |  Supported features (or, NOT supported features :-)
 51 | 	1) Lists, symbols, strings.
 52 | 	   However, strings have very limited capability.
 53 | 	   For instance, there is *NO* string-ref, string-set!, ... etc.
 54 | 	2) Numbers are limited to FIXNUM only.
 55 | 	   There is *NO* complex, real, rational and even bignum.
 56 | 	3) Macro feature is supported, though not the one defined in R4RS.
 57 | 
 58 |  Known problems:
 59 | 	1) Poor error recovery from illegal use of syntax and procedure.
 60 | 	2) Certain procedures do not check its argument type.
 61 | 
 62 |  Installation:
 63 | 	1) Select system declaration and configuration options by editing
 64 | 	   source file.
 65 | 
 66 | 	   You may choose one of the following systems by #define'ing
 67 | 	   the preprocessor symbol.
 68 | 
 69 | 	   Supported systems are:
 70 | 	     Macintosh:
 71 | 		LSC	-- LightSpeed C (3.0) for Macintosh
 72 | 		LSC4	-- LightSpeed C (4.0) for Macintosh
 73 | 			   They are different in #include header only.
 74 | 			   I (kida) think THINK C 5.0, 6.0, 7.0 may be OK
 75 | 			   with LSC4 configuration, though not tested.
 76 | 		MPW2	-- Macintosh Programmer's Workshop v2.0x
 77 | 			   I don't tested v3.x or later.
 78 | 	     DOS:
 79 | 		MSC4	-- Microsoft C v4.0 (use /AL)
 80 | 			   MSC v5.1, v6.0, v7.0 are all OK.
 81 | 		TURBO2	-- Bolarnd's Turbo C v2.0 (use -ml)
 82 | 			   Turbo C++ 1.0 is OK.
 83 | 	     UNIX:
 84 | 		BSD	-- UNIX of BSD flavor, ex. SuOS 4.x
 85 | 		SYSV	-- UNIX of System-V flavor, ex. Sun/Solaris 2.x
 86 | 
 87 | 	     VAX/VMS:
 88 | 		VAXC	-- VAX-C v3.x (this symbol may be defined by the 
 89 |                            compiler automatically).
 90 | 
 91 | 	2) Configure some preprocessor symbols by editing source files.
 92 | 
 93 | 	   Configurable #define's are:
 94 | 
 95 | 	#define VERBOSE
 96 | 		-- if defined, GC messages is verbose on default.
 97 | 
 98 | 	#define AVOID_HACK_LOOP
 99 | 		-- if defined, do _NOT_ use loop construction in the
100 | 		   form
101 | 			do { ... } while (0)
102 | 		   This form is used in macro expansion, since this is
103 | 		   the best "safety" blocking construct when used in
104 | 		   macro definition.
105 | 		   However, some compiler (including SunPRO CC 2.0.1)
106 | 		   is silly enough to warning this construct, like
107 | 		   "warning: end-of-loop code not reached", etc.
108 | 		   If you dislike such warning, please define this symbol.
109 | 		   NOTE: You may get some "statement not reached" warning
110 | 		   even if you have define this symbol. Please be patient,
111 | 		   or, use more smart compiler.
112 | 		   In short if you use GCC, undefine this and forget it
113 | 		   at all.
114 | 
115 | 	#define USE_SETJMP
116 | 		-- if defined, use setjmp to global jump on error.
117 | 		   if not defined, avoid to use it. Compiled with
118 | 		   this symbol defined, the interpreter issue fatal
119 | 		   error and return to the operating system immediately
120 | 		   when we run out of free cells. By default, i.e.,
121 | 		   compiled with this symbol is not defined, the
122 | 		   interpreter will just return to the top level in
123 | 		   such a case.
124 | 		   May not be used except for compiling as Mac DA.
125 | 
126 | 	#define USE_MACRO
127 | 		-- if defined, macro features are enabled.
128 | 
129 | 	#define	USE_QQUOTE
130 | 		-- if defined, you can use quasi-quote "`" in macro.
131 | 		   You can use macro even if this symbol is undefined.
132 | 
133 | 	3) Compile!
134 | 
135 | 	  I think there is virtually no problem about how to compile.
136 | 	  Since there is exactly one C source file. :-)
137 | 	  If you are on UNIX boxes with some BSD flavors, instead of
138 | 	  using make command, it's enough to type:
139 | 
140 | 		cc -DBSD -O -o miniscm miniscm.c
141 | 
142 | 	  You may have additional warnings like 'function should
143 | 	  return value'. This is due to omitting 'void' keyword
144 | 	  from the source in order to get pre-ANSI compatibility.
145 | 
146 | 
147 |  Usage : miniscm
148 | 
149 | 	Sorry, no command line argnumet is allowed.
150 | 
151 | 
152 |  Special procedures of this system:
153 | 
154 |    gc             : (gc) -- force garbage collection
155 | 
156 |    gc-verbose     : (gc-verbose bool) -- GC verbose on/off
157 | 			Argument #f turnes off the GC message.
158 | 			Enything else turn on the GC message.
159 | 
160 |    quit           : (quit) -- quit to the operating system
161 | 
162 |    put            : (put sym prop expr)
163 |                      -- set the value of a property of a symbol.
164 |    get            : (get sym prop)
165 |                      -- get the value of a property of a symbol.
166 | 
167 |    new-segment    : (new-segment n)
168 |                      -- allocate n new cell segments.
169 | 
170 |    print-width	  : (print-width list)
171 | 		     -- returns 'printed' width of list.
172 | 
173 |    closure?	  : (closure? obj)
174 | 		     -- test if obj is a closure or not.
175 | 
176 |    macro?	  : (macro? obj)
177 | 		     -- test if obj is a macro or not.
178 | 		        note that a macro is also a closure.
179 | 
180 |   get-closure-code
181 | 		 : (get-closure-code closure-obj)
182 | 		     -- extract S-expr from closure-obj.
183 | 
184 |  Scheme files:
185 |     init.scm	-- Automatically loaded at invocation.
186 | 		   Default setting assumes that this file is in the current
187 | 		   working directory.
188 | 		   Change #define InitFile if you don't like it.
189 | 
190 |     tools.scm	-- This is a sample file. Contains very tiny pretty-print
191 | 		   procedure.
192 | 		   After invoking miniscm, type:
193 | 				(load "tools.scm")
194 | 		   and try
195 | 				(pp getd)
196 | 				(pp do)
197 | 
198 |  Documents?:
199 | 
200 |   Sorry, there is no other documents.
201 |   Do not ask one for me, please see the source instead. :-)
202 | 
203 |   But if you _absolutely_ need help, please email to me at:
204 | 	<SDI00379@niftyserve.or.jp>
205 | 
206 |   Enjoy!
207 | 
208 |  -- Akira KIDA
209 | 	Sysop for FPL in NIFTY-Serve in JAPAN.
210 | 	(FPL stands for 'Forum for Program-Language')
211 | 
212 | 


--------------------------------------------------------------------------------
/src/bignum.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This software is released under the MIT License, see the LICENSE file.
  3 |  */
  4 | 
  5 | #include "bignum.h"
  6 | 
  7 | #include <ctype.h>
  8 | #include <stdlib.h>
  9 | #include <string.h>
 10 | 
 11 | /* greatest common divisor */
 12 | int64_t gcd(int32_t x, int32_t y)
 13 | {
 14 | 	uint32_t ux = (uint32_t)llabs(x), uy = (uint32_t)llabs(y), uz;
 15 | 	while (ux != 0) {
 16 | 		uz = ux;
 17 | 		ux = uy % ux;
 18 | 		uy = uz;
 19 | 	}
 20 | 	return uy;
 21 | }
 22 | 
 23 | /* least common multiple */
 24 | int64_t lcm(int32_t x, int32_t y)
 25 | {
 26 | 	if (x == 0 || y == 0) {
 27 | 		return 0;
 28 | 	}
 29 | 	return llabs(x / gcd(x, y) * y);
 30 | }
 31 | 
 32 | /* the first bit1 position */
 33 | int32_t find1_32(uint32_t val)
 34 | {
 35 | 	static const int32_t clz_table_4bit[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
 36 | 	int32_t n = 32;
 37 | 
 38 | 	if ((val & 0xFFFF0000) == 0) { n -= 16; val <<= 16; }
 39 | 	if ((val & 0xFF000000) == 0) { n -= 8; val <<= 8; }
 40 | 	if ((val & 0xF0000000) == 0) { n -= 4; val <<= 4; }
 41 | 
 42 | 	return n - clz_table_4bit[val >> (32 - 4)];
 43 | }
 44 | 
 45 | /* if x == y */
 46 | int32_t bn_eq(uint32_t x[], int32_t colx, uint32_t y[], int32_t coly)
 47 | {
 48 | 	int32_t i;
 49 | 	if (colx != coly) {
 50 | 		return  0;
 51 | 	}
 52 | 	for (i = colx - 1; i >= 0; i--) {
 53 | 		if (x[i] != y[i]) {
 54 | 			return 0;
 55 | 		}
 56 | 	}
 57 | 	return 1;
 58 | }
 59 | 
 60 | /* if x > y */
 61 | int32_t bn_gt(uint32_t x[], int32_t colx, uint32_t y[], int32_t coly)
 62 | {
 63 | 	int32_t i;
 64 | 	if (colx > coly) {
 65 | 		return  1;
 66 | 	} else if (colx < coly) {
 67 | 		return 0;
 68 | 	}
 69 | 	for (i = colx - 1; i >= 0; i--) {
 70 | 		if (x[i] > y[i]) {
 71 | 			return 1;
 72 | 		} else if (x[i] < y[i]) {
 73 | 			return 0;
 74 | 		}
 75 | 	}
 76 | 	return 0;
 77 | }
 78 | 
 79 | /* if x >= y */
 80 | int32_t bn_ge(uint32_t x[], int32_t colx, uint32_t y[], int32_t coly)
 81 | {
 82 | 	int32_t i;
 83 | 	if (colx > coly) {
 84 | 		return  1;
 85 | 	} else if (colx < coly) {
 86 | 		return 0;
 87 | 	}
 88 | 	for (i = colx - 1; i >= 0; i--) {
 89 | 		if (x[i] > y[i]) {
 90 | 			return 1;
 91 | 		} else if (x[i] < y[i]) {
 92 | 			return 0;
 93 | 		}
 94 | 	}
 95 | 	return 1;
 96 | }
 97 | 
 98 | /* z = x + y */
 99 | void bn_add(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly)
100 | {
101 | 	int32_t i, col = (colx < coly) ? colx : coly;
102 | 	uint64_t t = 0;
103 | 	for (i = 0; i < col; i++) {
104 | 		t = (uint64_t)x[i] + y[i] + (t >> 32);
105 | 		z[i] = (uint32_t)t;
106 | 	}
107 | 	if (colx > coly) {
108 | 		*colz = colx;
109 | 		for (; i < colx; i++) {
110 | 			t = x[i] + (t >> 32);
111 | 			z[i] = (uint32_t)t;
112 | 		}
113 | 	} else {
114 | 		*colz = coly;
115 | 		for (; i < coly; i++) {
116 | 			t = y[i] + (t >> 32);
117 | 			z[i] = (uint32_t)t;
118 | 		}
119 | 	}
120 | 	if (t >>= 32) {
121 | 		z[(*colz)++] = (uint32_t)t;
122 | 	}
123 | }
124 | 
125 | /* z = x - y */
126 | void bn_sub(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly)
127 | {
128 | 	int32_t i;
129 | 	uint32_t carry = 0;
130 | 	for (i = 0; i < coly; i++) {
131 | 		uint64_t t = (uint64_t)y[i] + carry;
132 | 		if (x[i] >= t) {
133 | 			z[i] = (uint32_t)(x[i] - t);
134 | 			carry = 0;
135 | 		} else {
136 | 			z[i] = (uint32_t)(((uint64_t)1 << 32) + x[i] - t);
137 | 			carry = 1;
138 | 		}
139 | 	}
140 | 	*colz = colx;
141 | 	for (; i < colx; i++) {
142 | 		if (x[i] >= carry) {
143 | 			z[i] = (uint32_t)(x[i] - carry);
144 | 			carry = 0;
145 | 		} else {
146 | 			z[i] = (uint32_t)(((uint64_t)1 << 32) + x[i] - carry);
147 | 			carry = 1;
148 | 		}
149 | 	}
150 | 	while (*colz > 0) {
151 | 		if (z[*colz - 1] > 0) {
152 | 			break;
153 | 		}
154 | 		(*colz)--;
155 | 	}
156 | }
157 | 
158 | /* z = x * y */
159 | void bn_mul(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly)
160 | {
161 | 	int32_t i, j;
162 | 	memset(z, 0, sizeof(uint32_t) * (colx + coly));
163 | 	for (i = 0; i < colx; i++) {
164 | 		uint64_t t = 0;
165 | 		for (j = 0; j < coly; j++) {
166 | 			t = z[i + j] + (uint64_t)x[i] * y[j] + (t >> 32);
167 | 			z[i + j] = (uint32_t)t;
168 | 		}
169 | 		if (t >>= 32) {
170 | 			z[i + j] = (uint32_t)t;
171 | 		}
172 | 	}
173 | 	*colz = colx + coly;
174 | 	while (*colz > 0) {
175 | 		if (z[*colz - 1] > 0) {
176 | 			break;
177 | 		}
178 | 		(*colz)--;
179 | 	}
180 | }
181 | 
182 | /* z = x^2 */
183 | void bn_sqr(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx)
184 | {
185 | 	int32_t i, j = 0;
186 | 	uint64_t t;
187 | 	*colz = 2 * colx;
188 | 	memset(z, 0, sizeof(uint32_t) * *colz);
189 | 	for (i = 0; i < colx - 1; i++) {
190 | 		t = 0;
191 | 		for (j = i + 1; j < colx; j++) {
192 | 			t = z[i + j] + (uint64_t)x[i] * x[j] + (t >> 32);
193 | 			z[i + j] = (uint32_t)t;
194 | 		}
195 | 		if (t >>= 32) {
196 | 			z[i + j] = (uint32_t)t;
197 | 		}
198 | 	}
199 | 	for (i = i + j - 1; i >= 0; i--) {
200 | 		z[i + 1] |= z[i] >> 31;
201 | 		z[i] = z[i] << 1;
202 | 	}
203 | 	t = 0;
204 | 	for (i = 0; i < colx * 2; i++) {
205 | 		t = z[i] + (uint64_t)x[i >> 1] * x[i >> 1] + (t >> 32);
206 | 		z[i++] = (uint32_t)t;
207 | 		t = z[i] + (t >> 32);
208 | 		z[i] = (uint32_t)t;
209 | 	}
210 | 	while (*colz > 0) {
211 | 		if (z[*colz - 1] > 0) {
212 | 			break;
213 | 		}
214 | 		(*colz)--;
215 | 	}
216 | }
217 | 
218 | /* z = x << n */
219 | void bn_sftl(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, int32_t n)
220 | {
221 | 	int32_t i, q = n / 32, r = n & 0x1F;
222 | 
223 | 	if (r == 0) {
224 | 		for (i = colx - 1; i >= 0; --i) {
225 | 			z[i + q] = x[i];
226 | 		}
227 | 		memset(z, 0, sizeof(int32_t) * q);
228 | 		*colz = colx + q;
229 | 	} else {
230 | 		int32_t col = 0;
231 | 		if (x[colx - 1] >> (32 - r)) {
232 | 			col++;
233 | 			z[colx + q] = 0;
234 | 		}
235 | 		for (i = colx - 1; i >= 0; --i) {
236 | 			z[i + q + 1] |= x[i] >> (32 - r);
237 | 			z[i + q] = x[i] << r;
238 | 		}
239 | 		memset(z, 0, sizeof(int32_t) * q);
240 | 		*colz = colx + q + col;
241 | 	}
242 | 	while (*colz > 0) {
243 | 		if (z[*colz - 1] > 0) {
244 | 			break;
245 | 		}
246 | 		(*colz)--;
247 | 	}
248 | }
249 | 
250 | /* z = x >> n */
251 | void bn_sftr(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, int32_t n)
252 | {
253 | 	int32_t i, q = n / 32, r = n & 0x1F;
254 | 
255 | 	if (r == 0) {
256 | 		if (colx - q <= 0) {
257 | 			*colz = 0;
258 | 		} else {
259 | 			for (i = q; i < colx; i++) {
260 | 				z[i - q] = x[i];
261 | 			}
262 | 			*colz = colx - q;
263 | 		}
264 | 	} else {
265 | 		if (colx - q <= 0) {
266 | 			*colz = 0;
267 | 		} else {
268 | 			int32_t col = 0;
269 | 			if ((x[colx - 1] >> r) == 0) {
270 | 				col--;
271 | 			}
272 | 			z[0] = x[q] >> r;
273 | 			for (i = q + 1; i < colx; i++) {
274 | 				z[i - q - 1] |= x[i] << (32 - r);
275 | 				z[i - q] = x[i] >> r;
276 | 			}
277 | 			*colz = colx - q + col;
278 | 		}
279 | 	}
280 | 	while (*colz > 0) {
281 | 		if (z[*colz - 1] > 0) {
282 | 			break;
283 | 		}
284 | 		(*colz)--;
285 | 	}
286 | }
287 | 
288 | /* q = x / y + r */
289 | void bn_div(uint32_t q[], int32_t *colq, uint32_t r[], int32_t *colr, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly)
290 | {
291 | 	int32_t i;
292 | 	if (coly == 1) {
293 | 		uint64_t t = 0;
294 | 		for (i = colx - 1; i >= 0; i--) {
295 | 			t = t << 32 | x[i];
296 | 			q[i] = (uint32_t)(t / y[0]);
297 | 			t = t % y[0];
298 | 		}
299 | 		*colq = colx;
300 | 		while (*colq > 0) {
301 | 			if (q[*colq - 1] > 0) {
302 | 				break;
303 | 			}
304 | 			(*colq)--;
305 | 		}
306 | 		r[0] = (int32_t)t;
307 | 		*colr = 1;
308 | 	} else {
309 | 		uint32_t *t_a = r + *colr, *t_b = t_a + 1 + colx, *t_x = t_b + 1 + coly;
310 | 		int32_t cola, colb, d = 32 - find1_32(y[coly - 1]);
311 | 		if (d == 0) {
312 | 			memcpy(t_a, x, sizeof(uint32_t) * colx);
313 | 			memcpy(t_b, y, sizeof(uint32_t) * coly);
314 | 			cola = colx;
315 | 			colb = coly;
316 | 		} else {
317 | 			bn_sftl(t_a, &cola, x, colx, d);
318 | 			bn_sftl(t_b, &colb, y, coly, d);
319 | 		}
320 | 		i = cola - colb;
321 | 		if (i > 0) {
322 | 			*colq = i;
323 | 		} else if (i == 0) {
324 | 			if (bn_ge(t_a, cola, t_b, colb)) {
325 | 				q[0] = 1;
326 | 				*colq = 1;
327 | 				bn_sub(t_a, &cola, t_a, cola, t_b, colb);
328 | 			} else {
329 | 				*colq = 0;
330 | 			}
331 | 		} else {
332 | 			*colq = 0;
333 | 		}
334 | 		while (--i >= 0) {
335 | 			if (cola > 1 && bn_gt(t_a, cola, t_b, colb)) {
336 | 				uint64_t aa = (uint64_t)t_a[cola - 1] << 32 | t_a[cola - 2];
337 | 				uint64_t qq = aa / t_b[colb - 1];
338 | 				uint32_t q2[2];
339 | 				if (cola > 2 && colb > 1) {
340 | 					uint64_t rr = aa % t_b[colb - 1];
341 | 					while ((rr << 32 | t_a[cola - 3]) < qq * t_b[colb - 2]) {
342 | 						qq--;
343 | 						rr += t_b[colb - 2];
344 | 						if (rr > UINT32_MAX) break;
345 | 					}
346 | 				}
347 | 				do {
348 | 					q2[0] = (uint32_t)qq;
349 | 					q2[1] = (uint32_t)(qq >> 32);
350 | 					bn_mul(t_x, &colx, t_b, colb, q2, q2[1] > 0 ? 2 : 1);
351 | 					--qq;
352 | 					bn_sftl(t_x, &colx, t_x, colx, i * 32);
353 | 				} while (bn_gt(t_x, colx, t_a, cola));
354 | 				q[i] = (uint32_t)(qq + 1);
355 | 				if (qq >= UINT32_MAX) {
356 | 					q2[0] = 1;
357 | 					bn_add(&q[i + 1], colq, &q[i + 1], *colq - i - 1, q2, 1);
358 | 					*colq += i + 1;
359 | 				}
360 | 				bn_sub(t_a, &cola, t_a, cola, t_x, colx);
361 | 			} else if (bn_eq(t_a, cola, t_b, colb)) {
362 | 				q[i] = 1;
363 | 				while (i > 0) q[--i] = 0;
364 | 				while (cola > 0) t_a[--cola] = 0;
365 | 			} else {
366 | 				q[i] = 0;
367 | 			}
368 | 		}
369 | 		if (d > 0) {
370 | 			bn_sftr(r, colr, t_a, cola, d);
371 | 		} else {
372 | 			memcpy(r, t_a, cola * sizeof(uint32_t));
373 | 			*colr = cola;
374 | 			while (*colr > 0) {
375 | 				if (r[*colr - 1] > 0) {
376 | 					break;
377 | 				}
378 | 				(*colr)--;
379 | 			}
380 | 		}
381 | 	}
382 | }
383 | 
384 | int32_t bn_str2num_base2(const char s[], int32_t len, uint32_t x[], int32_t col)
385 | {
386 | 	int32_t i, j;
387 | 	memset(x, 0, col * sizeof(uint32_t));
388 | 	for (i = 0; s[i]; i++) {
389 | 		if (s[i] < '0' || '1' < s[i]) {
390 | 			return -1;
391 | 		}
392 | 		j = len - i - 1;
393 | 		x[j / 32] |= (s[i] - '0') << j % 32;
394 | 	}
395 | 	while (col > 0) {
396 | 		if (x[col - 1] > 0) {
397 | 			break;
398 | 		}
399 | 		col--;
400 | 	}
401 | 	return col;
402 | }
403 | 
404 | int32_t bn_str2num_base8(const char s[], int32_t len, uint32_t x[], int32_t col)
405 | {
406 | 	int32_t i, j, k;
407 | 	memset(x, 0, col * sizeof(uint32_t));
408 | 	for (i = 0; s[i]; i++) {
409 | 		if (s[i] < '0' || '7' < s[i]) {
410 | 			return -1;
411 | 		}
412 | 		k = (len - i - 1) / 32;
413 | 		j = (len - i - 1) % 32;
414 | 		if (j == 10) {
415 | 			x[k * 3 + 1] |= (uint32_t)(s[i] - '0') >> 2;
416 | 		} else if (j == 21) {
417 | 			x[k * 3 + 2] |= (uint32_t)(s[i] - '0') >> 1;
418 | 		}
419 | 		x[k * 3 + j / 11] |= (uint32_t)(s[i] - '0') << j % 11 * 3 << j / 11 % 3;
420 | 	}
421 | 	while (col > 0) {
422 | 		if (x[col - 1] > 0) {
423 | 			break;
424 | 		}
425 | 		col--;
426 | 	}
427 | 	return col;
428 | }
429 | 
430 | int32_t bn_str2num_base10(const char s[], int32_t len, uint32_t x[], int32_t col)
431 | {
432 | 	uint64_t t = 0;
433 | 	int32_t i, j, k = (len - 1) % 9 + 1;
434 | 	memset(x, 0, col * sizeof(uint32_t));
435 | 	for (col = 1, i = 0; s[i]; i++) {
436 | 		if (s[i] < '0' || '9' < s[i]) {
437 | 			return -1;
438 | 		}
439 | 		t = t * 10 + (uint64_t)(s[i] - '0');
440 | 		if (--k > 0) continue;
441 | 		t <<= 32;
442 | 		for (j = 0; j < col; j++) {
443 | 			t = (uint64_t)x[j] * 1000000000 + (t >> 32);
444 | 			x[j] = (uint32_t)t;
445 | 		}
446 | 		if (t >> 32) x[col++] = (uint32_t)(t >> 32);
447 | 		t = 0;
448 | 		k = 9;
449 | 	}
450 | 	while (col > 0) {
451 | 		if (x[col - 1] > 0) {
452 | 			break;
453 | 		}
454 | 		col--;
455 | 	}
456 | 	return col;
457 | }
458 | 
459 | int32_t bn_str2num_base16(const char s[], int32_t len, uint32_t x[], int32_t col)
460 | {
461 | 	int32_t i, j;
462 | 	memset(x, 0, col * sizeof(uint32_t));
463 | 	for (i = 0; s[i]; i++) {
464 | 		int c = toupper(s[i]);
465 | 		if ('0' <= c && c <= '9') {
466 | 			c -= '0';
467 | 		} else if ('A' <= c && c <= 'F') {
468 | 			c += 10 - 'A';
469 | 		} else {
470 | 			return -1;
471 | 		}
472 | 		j = len - i - 1;
473 | 		x[j / 8] |= c << j % 8 * 4;
474 | 	}
475 | 	while (col > 0) {
476 | 		if (x[col - 1] > 0) {
477 | 			break;
478 | 		}
479 | 		col--;
480 | 	}
481 | 	return col;
482 | }
483 | 
484 | char *bn_num2str_base2(char *p, uint32_t x[], int32_t col)
485 | {
486 | 	int32_t i, j;
487 | 	*p = 0;
488 | 	for (i = 0; i < col; i++) {
489 | 		for (j = 0; j < 32; j++) {
490 | 			uint32_t n = x[i] >> j;
491 | 			if (i < col - 1 || n != 0) {
492 | 				*--p = (n & 0x1) + '0';
493 | 			}
494 | 		}
495 | 	}
496 | 	return p;
497 | }
498 | 
499 | char *bn_num2str_base8(char *p, uint32_t x[], int32_t col)
500 | {
501 | 	int32_t i;
502 | 	*p = 0;
503 | 	for (i = 0; i < col; i++) {
504 | 		uint32_t j, k, m = x[i];
505 | 		if (i % 3 == 0) {
506 | 			k = 11;
507 | 		} else if (i % 3 == 1) {
508 | 			*p += (m & 0x1) << 2;
509 | 			k = 11;
510 | 		} else {
511 | 			*p += (m & 0x3) << 1;
512 | 			k = 10;
513 | 		}
514 | 		for (j = 0; j < k; j++) {
515 | 			uint32_t n = m >> (3 * j + i % 3);
516 | 			if (i < col - 1 || n != 0) {
517 | 				*--p = (n & 0x7) + '0';
518 | 			}
519 | 		}
520 | 	}
521 | 	return p;
522 | }
523 | 
524 | char *bn_num2str_base10(char *p, uint32_t x[], int32_t col)
525 | {
526 | 	*p = 0;
527 | 	while (col > 0) {
528 | 		int32_t i;
529 | 		uint64_t t = 0;
530 | 		for (i = col - 1; i >= 0; i--) {
531 | 			t = t << 32 | x[i];
532 | 			x[i] = (uint32_t)(t / 1000000000);
533 | 			t = t % 1000000000;
534 | 		}
535 | 		while (col > 0) {
536 | 			if (x[col - 1] > 0) {
537 | 				break;
538 | 			}
539 | 			col--;
540 | 		}
541 | 		for (i = 0; i < 9 && (col > 0 || t > 0); i++, t /= 10) {
542 | 			*--p = (char)(t % 10) + '0';
543 | 		}
544 | 	}
545 | 	return p;
546 | }
547 | 
548 | char *bn_num2str_base16(char *p, uint32_t x[], int32_t col)
549 | {
550 | 	int32_t i, j;
551 | 	*p = 0;
552 | 	for (i = 0; i < col; i++) {
553 | 		for (j = 0; j < 8; j++) {
554 | 			uint32_t n = x[i] >> (4 * j);
555 | 			if (i < col - 1 || n != 0) {
556 | 				char c = n & 0xf;
557 | 				*--p = (c < 10) ? c + '0' : c - 10 + 'a';
558 | 			}
559 | 		}
560 | 	}
561 | 	return p;
562 | }
563 | 


--------------------------------------------------------------------------------
/src/bignum.h:
--------------------------------------------------------------------------------
 1 | #ifndef BIGNUM_H
 2 | #define BIGNUM_H
 3 | 
 4 | #include <inttypes.h>
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | int64_t gcd(int32_t x, int32_t y);
11 | int64_t lcm(int32_t x, int32_t y);
12 | int32_t find1_32(uint32_t val);
13 | int32_t bn_eq(uint32_t x[], int32_t colx, uint32_t y[], int32_t coly);
14 | int32_t bn_gt(uint32_t x[], int32_t colx, uint32_t y[], int32_t coly);
15 | int32_t bn_ge(uint32_t x[], int32_t colx, uint32_t y[], int32_t coly);
16 | void bn_add(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly);
17 | void bn_sub(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly);
18 | void bn_mul(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly);
19 | void bn_sqr(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx);
20 | void bn_sftl(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, int32_t n);
21 | void bn_sftr(uint32_t z[], int32_t *colz, uint32_t x[], int32_t colx, int32_t n);
22 | void bn_div(uint32_t q[], int32_t *colq, uint32_t r[], int32_t *colr, uint32_t x[], int32_t colx, uint32_t y[], int32_t coly);
23 | int32_t bn_str2num_base2(const char s[], int32_t len, uint32_t x[], int32_t col);
24 | int32_t bn_str2num_base8(const char s[], int32_t len, uint32_t x[], int32_t col);
25 | int32_t bn_str2num_base10(const char s[], int32_t len, uint32_t x[], int32_t col);
26 | int32_t bn_str2num_base16(const char s[], int32_t len, uint32_t x[], int32_t col);
27 | char *bn_num2str_base2(char *p, uint32_t x[], int32_t col);
28 | char *bn_num2str_base8(char *p, uint32_t x[], int32_t col);
29 | char *bn_num2str_base10(char *p, uint32_t x[], int32_t col);
30 | char *bn_num2str_base16(char *p, uint32_t x[], int32_t col);
31 | 
32 | #ifdef __cplusplus
33 | }
34 | #endif
35 | 
36 | #endif /* BIGNUM_H */
37 | 


--------------------------------------------------------------------------------
/src/init.scm:
--------------------------------------------------------------------------------
1 | ; This is a init file for Mini-Scheme.
2 | 
3 | (define call/cc call-with-current-continuation)
4 | 


--------------------------------------------------------------------------------
/src/makefile:
--------------------------------------------------------------------------------
 1 | #  Makefile for System-V flavoured UNIX
 2 | #
 3 | #CC = gcc  # you may use both ANSI and pre-ANSI
 4 | 
 5 | #
 6 | #  Please see source and/or README  for system defition
 7 | #
 8 | #CFLAGS = -g -DSYSV -traditional -traditional-cpp -Wid-clash-8
 9 | CFLAGS = -O
10 | 
11 | all :	miniscm
12 | 
13 | miniscm : miniscm.o bignum.o
14 | 	$(CC) $(CFLAGS) -o miniscm miniscm.o bignum.o -lm
15 | 
16 | miniscm.o : miniscm.c miniscm.h
17 | 	$(CC) $(CFLAGS) -c miniscm.c
18 | 
19 | bignum.o : bignum.c bignum.h
20 | 	$(CC) $(CFLAGS) -c bignum.c
21 | 
22 | clean :
23 | 	rm -f core *.o miniscm *~
24 | 
25 | 


--------------------------------------------------------------------------------
/src/miniscm.h:
--------------------------------------------------------------------------------
  1 | #ifndef MINISCHEME_H
  2 | #define MINISCHEME_H
  3 | 
  4 | #include <stdio.h>
  5 | #include <inttypes.h>
  6 | #include <setjmp.h>
  7 | 
  8 | #ifdef __cplusplus
  9 | extern "C" {
 10 | #endif
 11 | 
 12 | /*
 13 |  * Define or undefine following symbols as you need.
 14 |  */
 15 | #ifndef STANDALONE
 16 | #define STANDALONE 1	/* define 0 if you want to build as a Library */
 17 | #endif
 18 | /* #define USE_SCHEME_STACK */	/* define this if you want original-Stack */
 19 | #define USE_COPYING_GC	/* undef this if you do not want to use Copying GC */
 20 | 
 21 | /*
 22 |  *  Basic memory allocation units
 23 |  */
 24 | #define CELL_SEGSIZE 500000	/* # of cells in one segment */
 25 | 
 26 | #define MAXFIL 64	/* stack size of loading files */
 27 | 
 28 | typedef struct cell *pointer;
 29 | typedef pointer (*foreign_func)(pointer);
 30 | 
 31 | /* cell structure */
 32 | struct cell {
 33 | 	unsigned short _flag;
 34 | 	unsigned char  _extflag;
 35 | 	unsigned char  _isfixnum;
 36 | 	union {
 37 | 		struct {
 38 | 			char   *_svalue;
 39 | 			size_t  _length;
 40 | 		} _string;
 41 | 		union {
 42 | 			struct {
 43 | 				int32_t _ivalue;
 44 | 				struct cell *_bignum;
 45 | 			} _integer;
 46 | 			double  _rvalue;
 47 | 		} _number;
 48 | 		struct {
 49 | 			FILE   *_file;
 50 | 			char   *_curr;
 51 | 		} _port;
 52 | 		foreign_func _ff;
 53 | 		struct {
 54 | 			struct cell *_car;
 55 | 			struct cell *_cdr;
 56 | 		} _cons;
 57 | #ifdef USE_COPYING_GC
 58 | 		struct cell *_forwarded;
 59 | #endif
 60 | 	} _object;
 61 | };
 62 | 
 63 | #define T_STRING         1	/* 0000000000000001 */
 64 | #define T_NUMBER         2	/* 0000000000000010 */
 65 | #define T_SYMBOL         4	/* 0000000000000100 */
 66 | #define T_SYNTAX         8	/* 0000000000001000 */
 67 | #define T_PROC          16	/* 0000000000010000 */
 68 | #define T_PAIR          32	/* 0000000000100000 */
 69 | #define T_CLOSURE       64	/* 0000000001000000 */
 70 | #define T_CONTINUATION 128	/* 0000000010000000 */
 71 | #define T_CHARACTER    256	/* 0000000100000000 */
 72 | #define T_PORT         512	/* 0000001000000000 */
 73 | #define T_VECTOR      1024	/* 0000010000000000 */
 74 | #define T_FOREIGN     2048	/* 0000100000000000 */
 75 | #define T_MEMBLOCK    4096	/* 0001000000000000 */
 76 | #define T_ENVIRONMENT 8192	/* 0010000000000000 */
 77 | #define T_ATOM       16384	/* 0100000000000000 */	/* only for gc */
 78 | #define CLRATOM      49151	/* 1011111111111111 */	/* only for gc */
 79 | #define MARK         32768	/* 1000000000000000 */
 80 | #define UNMARK       32767	/* 0111111111111111 */
 81 | #ifdef USE_COPYING_GC
 82 | # define T_FORWARDED 32768	/* 1000000000000000 */	/* only for gc */
 83 | #endif
 84 | 
 85 | #define T_VALUES         1	/* 0000000000000001 */	/* for call-with-values */
 86 | 
 87 | #define T_PROMISE        1	/* 00000001 */
 88 | #define T_RESULTREADY    2	/* 00000010 */
 89 | #define T_MACRO          4	/* 00000100 */
 90 | #define T_DEFMACRO       8	/* 00001000 */	/* for define-macro */
 91 | #define T_DEFSYNTAX  32768	/* 1000000000000000 */	/* for define-syntax */
 92 | #define T_SYNTAXNUM  32767	/* 0111111111111111 */	/* for define-syntax */
 93 | 
 94 | /* macros for cell operations */
 95 | #define type(p)         ((p)->_flag)
 96 | #define exttype(p)      ((p)->_extflag)
 97 | 
 98 | #define is_string(p)    (type(p)&T_STRING)
 99 | #define strvalue(p)     ((p)->_object._string._svalue)
100 | #define strlength(p)    ((p)->_object._string._length)
101 | 
102 | #define is_number(p)    (type(p)&T_NUMBER)
103 | #define ivalue(p)       ((p)->_object._number._integer._ivalue)
104 | #define bignum(p)       ((p)->_object._number._integer._bignum)
105 | #define rvalue(p)       ((p)->_object._number._rvalue)
106 | #define nvalue(p)       ((p)->_isfixnum ? ivalue(p) : rvalue(p))
107 | #define is_integer(p)   (is_number(p) && ((p)->_isfixnum || floor(rvalue(p) + 0.5) == rvalue(p)))
108 | #define set_num_integer(p)   ((p)->_isfixnum = 1)
109 | #define set_num_real(p)      ((p)->_isfixnum = 0)
110 | 
111 | #define is_pair(p)      (type(p)&T_PAIR)
112 | #define car(p)          ((p)->_object._cons._car)
113 | #define cdr(p)          ((p)->_object._cons._cdr)
114 | 
115 | #define is_symbol(p)    (type(p)&T_SYMBOL)
116 | #define symname(p)      strvalue(p)
117 | 
118 | #define is_syntax(p)    (type(p)&T_SYNTAX)
119 | #define is_proc(p)      (type(p)&T_PROC)
120 | #define syntaxnum(p)    (*(short *)&(p)->_extflag)
121 | #define procnum(p)      (int)ivalue(p)
122 | 
123 | #define is_closure(p)   (type(p)&T_CLOSURE)
124 | #define is_macro(p)     (exttype(p)&T_MACRO)
125 | #define closure_code(p) car(p)
126 | #define closure_env(p)  cdr(p)
127 | 
128 | #define is_continuation(p) (type(p)&T_CONTINUATION)
129 | #define cont_dump(p)    cdr(p)
130 | 
131 | #define is_character(p) (type(p)&T_CHARACTER)
132 | 
133 | enum {
134 | 	port_input = 1,
135 | 	port_output = 2,
136 | 	port_file = 4,
137 | 	port_string = 8,
138 | 	port_eof = 16,
139 | };
140 | #define is_port(p)      (type(p) & T_PORT)
141 | #define is_inport(p)    (is_port(p) && ((p)->_isfixnum & port_input))
142 | #define is_outport(p)   (is_port(p) && ((p)->_isfixnum & port_output))
143 | #define is_fileport(p)  (is_port(p) && ((p)->_isfixnum & port_file))
144 | #define is_strport(p)   (is_port(p) && ((p)->_isfixnum & port_string))
145 | #define is_eofport(p)   (is_port(p) && ((p)->_isfixnum & port_eof))
146 | #define port_file(p)    ((p)->_object._port._file)
147 | #define port_curr(p)    ((p)->_object._port._curr)
148 | 
149 | #define is_vector(p)    (type(p) & T_VECTOR)
150 | 
151 | #define is_foreign(p)   (type(p) & T_FOREIGN)
152 | #define foreignfnc(p)   ((p)->_object._ff)
153 | 
154 | #define is_memblock(p)  (type(p) & T_MEMBLOCK)
155 | 
156 | #define is_environment(p) (type(p) & T_ENVIRONMENT)
157 | #define setenvironment(p) type(p) |= T_ENVIRONMENT
158 | 
159 | #define is_promise(p)   (exttype(p) & T_PROMISE)
160 | #define setpromise(p)   exttype(p) |= T_PROMISE
161 | #define is_resultready(p) (exttype(p) & T_RESULTREADY)
162 | #define setresultready(p) exttype(p) |= T_RESULTREADY
163 | 
164 | #define is_atom(p)      (type(p)&T_ATOM)
165 | #define setatom(p)      type(p) |= T_ATOM
166 | #define clratom(p)      type(p) &= CLRATOM
167 | 
168 | #define is_mark(p)      (type(p)&MARK)
169 | #define setmark(p)      type(p) |= MARK
170 | #define clrmark(p)      type(p) &= UNMARK
171 | 
172 | #define caar(p)         car(car(p))
173 | #define cadr(p)         car(cdr(p))
174 | #define cdar(p)         cdr(car(p))
175 | #define cddr(p)         cdr(cdr(p))
176 | #define cadar(p)        car(cdr(car(p)))
177 | #define caddr(p)        car(cdr(cdr(p)))
178 | #define cadaar(p)       car(cdr(car(car(p))))
179 | #define cadddr(p)       car(cdr(cdr(cdr(p))))
180 | #define cddddr(p)       cdr(cdr(cdr(cdr(p))))
181 | 
182 | extern pointer NIL;
183 | extern pointer T;
184 | extern pointer F;
185 | extern pointer EOF_OBJ;
186 | extern pointer mark_x;
187 | extern pointer mark_y;
188 | extern jmp_buf error_jmp;
189 | 
190 | pointer cons(pointer a, pointer b);
191 | pointer mk_character(int c);
192 | pointer mk_integer(int32_t num);
193 | pointer mk_real(double num);
194 | pointer mk_number(pointer v);
195 | pointer mk_string(const char *str);
196 | pointer mk_empty_string(size_t len, int fill);
197 | pointer mk_symbol(const char *name);
198 | pointer mk_uninterned_symbol(const char *name);
199 | pointer gensym(void);
200 | pointer mk_atom(const char *q);
201 | pointer mk_const(const char *name);
202 | pointer mk_port(FILE *fp, int prop);
203 | pointer mk_port_string(pointer p, int prop);
204 | pointer mk_vector(int len);
205 | pointer vector_elem(pointer v, int i);
206 | pointer set_vector_elem(pointer v, int i, pointer a);
207 | int list_length(pointer a);
208 | 
209 | void scheme_init(void);
210 | void scheme_deinit(void);
211 | int scheme_load_file(FILE *fin);
212 | int scheme_load_string(const char *cmd);
213 | void scheme_register_foreign_func(const char *name, foreign_func ff);
214 | pointer scheme_apply0(const char *procname);
215 | pointer scheme_apply1(const char *procname, pointer argslist);
216 | 
217 | #ifdef __cplusplus
218 | }
219 | #endif
220 | 
221 | #endif /* MINISCHEME_H */
222 | 


--------------------------------------------------------------------------------
/src/msvcbuild.bat:
--------------------------------------------------------------------------------
 1 | @setlocal
 2 | @set MSCOMPILE=cl /nologo /O2 /W3 /c /D_CRT_SECURE_NO_WARNINGS
 3 | @set MSLINK=link /nologo
 4 | @set MSLIB=lib /nologo
 5 | 
 6 | @if "%1" == "static" goto STATIC
 7 | @if not exist bin\ (
 8 |   mkdir bin
 9 | )
10 | %MSCOMPILE% /MT /DSTANDALONE=1 miniscm.c
11 | %MSCOMPILE% /MT bignum.c
12 | %MSLINK% /out:bin\miniscm.exe miniscm.obj bignum.obj
13 | 
14 | @goto END
15 | 
16 | :STATIC
17 | @if not exist lib\ (
18 |   mkdir lib
19 | )
20 | %MSCOMPILE% /MT /DSTANDALONE=0 miniscm.c
21 | %MSCOMPILE% /MT bignum.c
22 | %MSLIB% /out:lib\miniscm.lib miniscm.obj bignum.obj
23 | 
24 | :END
25 | del miniscm.obj bignum.obj
26 | 


--------------------------------------------------------------------------------