├── CONTACT ├── ChangeLog.beebe ├── INSTALL ├── Makefile.in ├── README ├── REPOSITORY ├── THANKS ├── array.c ├── array.h ├── array.w ├── bi_funct.c ├── bi_funct.h ├── bi_vars.c ├── bi_vars.h ├── cast.c ├── cdoc ├── Makefile ├── array.dvi ├── array.pdf ├── printf.dvi └── printf.pdf ├── code.c ├── code.h ├── config.hin ├── configure ├── configure.ac ├── da.c ├── error.c ├── examples ├── ct_length.awk ├── decl.awk ├── deps.awk ├── eatc.awk ├── gdecl.awk ├── hcal ├── hical ├── nocomment.awk ├── primes.awk └── qsort.awk ├── execute.c ├── fcall.c ├── field.c ├── field.h ├── files.c ├── files.h ├── fin.c ├── fin.h ├── hash.c ├── init.c ├── init.h ├── int.c ├── int.h ├── jmp.c ├── jmp.h ├── kw.c ├── main.c ├── makescan.c ├── man ├── mawk.1 └── mawk.txt ├── mawk.h ├── memory.c ├── memory.h ├── old ├── ACKNOWLEDGMENT └── CHANGES ├── packing.list ├── parse.c ├── parse.h ├── parse.y ├── print.c ├── printf.c ├── printf.h ├── printf.w ├── re_cmpl.c ├── regexp.h ├── repl.h ├── rexp ├── Makefile ├── rexp.c ├── rexp.h ├── rexp0.c ├── rexp1.c ├── rexp2.c ├── rexp3.c ├── rexpdb.c ├── wait.c └── wait.h ├── scan.c ├── scan.h ├── scancode.c ├── sizes.h ├── split.c ├── split.h ├── symtype.h ├── test ├── decl-awk.out ├── fpe_test ├── fpetest1.awk ├── fpetest2.awk ├── fpetest3.awk ├── mawktest ├── mawktest.dat ├── okay │ ├── mawktest.err │ └── mawktest.out ├── pipetest ├── pipetest.out ├── reg-awk.out ├── reg0.awk ├── reg1.awk ├── reg2.awk ├── wc-awk.out ├── wc.awk ├── wfrq-awk.out └── wfrq0.awk ├── types.h ├── version.c ├── zmalloc.c └── zmalloc.h /CONTACT: -------------------------------------------------------------------------------- 1 | Mike Brennan 2 | mawkeddy@gmail.com 3 | -------------------------------------------------------------------------------- /ChangeLog.beebe: -------------------------------------------------------------------------------- 1 | This log documents changes to mawk-1.3.9.1 to prepare 2 | mawk-1.3.9.1-autoconf, with drastically-improved portability, a few bug 3 | fixes, and removal of a bug on all Solaris systems that prevented the 4 | test suite from passing. 5 | 6 | At the conclusion of the effort on a large collection of about 90 7 | physical and virtual machines at the Department of Mathematics at the 8 | University of Utah, using numerous versions of different C compilers, 9 | including compilers named c89, c99, cc, clang, gcc, icc, lcc, opencc, 10 | nvcc, pcc, pgcc, and suncc, and their companion C++ compilers, mawk now 11 | builds and validates with almost all of them. The remaining failures 12 | can all safely be characterized as compiler bugs or compiler 13 | limitations, none of which mawk source code can address. 14 | 15 | The CPU platforms tested include ARM, IA-64, MIPS, PowerPC, SPARC, x86, 16 | and x86-64. Of those, MIPS and SPARC are big endian, and the remainder 17 | are little-endian. PowerPC can be either: on Mac OS X, it is big 18 | endian, and on GNU/Linux CentOS 7, it is little endian. 19 | 20 | To give an idea of the number of builds, at the time of writing this, 21 | there are 678 logs, of which 630 report that all tests passed. 22 | 23 | Only relatively small changes to most of the mawk code were needed to 24 | ensure that c89 compilers can handle it (except on Solaris, where c89 25 | does not support 64-bit integers): these were just movement of 26 | declaration before prior executable statements, and are documented 27 | below. 28 | 29 | Rather more changes were needed to make the code compilable with C++ 30 | compilers. A few C++ reserved words had to be #define'd to acceptable 31 | ones, and a fair number of old K&R-style function definitions had to be 32 | rewritten into c89 form to be acceptable to C++ compilers. Also, 33 | because C++ does not permit silent casts of void to non-void types, a 34 | number of statements, notably those involving memory allocations, had to 35 | have explicit casts added to function calls. The C++ effort was 36 | worthwhile, because it uncovered two bugs that are now fixed, thanks to 37 | the stricter rules that compilers for C++ enforce compared to those for 38 | C. Also, having C++ compilability for C software can double the number 39 | of compilers to which the code can be exposed, and that is always a good 40 | thing in support of portable software. 41 | 42 | Initially, autoconfiguration support was added to make it possible to 43 | choose the compiler at build time in simultaneous builds. As more 44 | machines were tested, it was found necessary to introduce 45 | 46 | #include "config.h" 47 | 48 | to just two files to get configure-time symbol definitions for use in 49 | preprocessor statements; no compiler-supplied symbols could be used for 50 | that job. 51 | 52 | The original Makefile became Makefile.in, which configure then turns 53 | into Makefile. The Makefile.in was then substantionally rewritten to 54 | improve the easy of testing of many builds: if a log reports 55 | 56 | SUCCESS: ALL 4 TESTS PASSED! 57 | 58 | then it can be ignored by the developer, apart from knowing that a "make 59 | install" on that system is now possible. A small wrapper script finds 60 | such log files, and applies "xz -9" maximal compression to them, to the 61 | remaining *.log files are the only ones that human needs to examine. 62 | 63 | Testing found that on all Solaris systems, and their derivatives of 64 | OpenIndiana, Illumos, and Hipster, log(x) for x < 0 returns -Infinity, 65 | instead of a NaN. To remove that test failure, a bit of wrapper code 66 | was introduced in function bi_log() in bi_funct.c to test arguments 67 | before calling the math-library log() function. Now, the Solaris tests 68 | all pass (except for c89 compilation, as noted above). 69 | 70 | All files are under RCS control, and for uniformity, all trailing 71 | whitespace has been removed, so the changed recorded below are based on 72 | "rcsdiff -b -w" listings to ignore spacing differences: 73 | 74 | Here are brief descriptions of the changes from the mawk-1.3.9.1 test 75 | release: 76 | 77 | array.c: 78 | Add typecasts on void*-assignments and/or function arguments. 79 | 80 | Comment out two duplication function prototypes to remove 81 | compiler warnings. 82 | 83 | bi_funct.c: 84 | Add typecasts on void*-assignments and/or function arguments. 85 | 86 | Convert K&R function definitions to c89 syntax. 87 | 88 | Add simple infty_() and nan_() functions, because infty() and 89 | nan() are not universally available, or defined in C89. 90 | 91 | Define THROW__ macro to attach to end of prototypes for 92 | random() and srandom(), which are not reliably declared in 93 | system header files on all systems, and to which C++ 94 | implementations may, or may not, attach a throw() attribute. 95 | 96 | Make bi_log() test for negative and zero arguments before 97 | calling log() to hide misbehavior of that function on Solaris. 98 | 99 | bi_vars.c: 100 | Add const modifier to declaration of bi_var_names[]. 101 | 102 | cast.c: 103 | Add typecasts on void*-assignments and/or function arguments. 104 | 105 | Convert K&R function definitions to c89 syntax. 106 | 107 | code.c: 108 | Convert K&R function definitions to c89 syntax. 109 | 110 | config.h: 111 | New file generated by configure script from config.hin, which 112 | in turn is generated by autoheader from configure.in. 113 | 114 | da.c: 115 | Add typecasts on void*-assignments and/or function arguments. 116 | 117 | Convert K&R function definitions to c89 syntax. 118 | 119 | Change shadowed name variable to name_ in one code block to 120 | eliminate compiler warnings. 121 | 122 | error.c: 123 | Convert K&R function definitions to c89 syntax. 124 | 125 | execute.c: 126 | Add typecasts on void*-assignments and/or function arguments. 127 | 128 | Convert K&R function definitions to c89 syntax. 129 | 130 | fcall.c: 131 | Convert K&R function definitions to c89 syntax. 132 | 133 | field.c: 134 | Add typecasts on void*-assignments and/or function arguments. 135 | 136 | files.c: 137 | Convert K&R function definitions to c89 syntax. 138 | 139 | fin.c: 140 | Add prototype for isatty(). 141 | 142 | Convert K&R function definitions to c89 syntax. 143 | 144 | fin.h: 145 | Add preprocessor #define to change "setmode" to "setmode_" to 146 | avoid conflict with declaration in on BitRig BSD. 147 | 148 | fpe_check.c: 149 | Convert K&R function definitions to c89 syntax. 150 | 151 | hash.c: 152 | Add preprocessor #define to rename C++ reserved word "delete" 153 | to "delete_". 154 | 155 | Convert K&R function definitions to c89 syntax. 156 | 157 | init.c: 158 | Convert K&R function definitions to c89 syntax. 159 | 160 | Add preprocessor #define to rename optarg (declared in 161 | on some systems) to optarg_. 162 | 163 | Add print_help() (output largely derived from "man 164 | mawk"), and recognize --help and --version options, with 165 | immediate exit 0, for compatibility with lots of other 166 | free and open-source software. 167 | 168 | Add support for --help and --version options. 169 | 170 | kw.c: 171 | Convert K&R function definitions to c89 syntax. 172 | 173 | main.c: 174 | Convert K&R function definitions to c89 syntax. 175 | 176 | makescan.c: 177 | Convert K&R function definitions to c89 syntax. 178 | 179 | Fix bug caught by C++ compilers: close(fp) should be 180 | fclose(fp). 181 | 182 | matherr.c: 183 | Include config.h for HAVE_SIGINFO symbol. 184 | 185 | Convert K&R function definitions to c89 syntax. 186 | 187 | Define THROW__ appropriately for Solaris (__SunOS), C++, and 188 | other. 189 | 190 | mawk.h: 191 | Include for declarations of extended integer types. 192 | 193 | parse.y: 194 | Add typecasts on void*-assignments and/or function arguments. 195 | 196 | Convert K&R function definitions to c89 syntax. 197 | 198 | print.c: 199 | Convert K&R function definitions to c89 syntax. 200 | 201 | printf.c: 202 | Add typecasts on void*-assignments and/or function arguments. 203 | 204 | Move one declaration before executable code to allow C89 205 | compilation. 206 | 207 | re_cmpl.c: 208 | Add typecasts on void*-assignments and/or function arguments. 209 | 210 | Convert K&R function definitions to c89 syntax. 211 | 212 | scan.c: 213 | Add typecasts on void*-assignments and/or function arguments. 214 | 215 | Convert K&R function definitions to c89 syntax. 216 | 217 | version.c: 218 | Convert K&R function definitions to c89 syntax. 219 | 220 | zmalloc.c: 221 | Add typecasts on void*-assignments and/or function arguments. 222 | 223 | Convert K&R function definitions to c89 syntax. 224 | 225 | rexp/rexp.c: 226 | Convert K&R function definitions to c89 syntax. 227 | 228 | rexp/rexp.h: 229 | Add const to RE_panic() prototype. 230 | 231 | rexp/rexp0.c: 232 | Add typecasts on void*-assignments and/or function arguments. 233 | 234 | Convert K&R function definitions to c89 syntax. 235 | 236 | Add definition of nonstandard (but often available) macro isblank(). 237 | 238 | Add preprocessor #define to rename C++ reserved word "try" to 239 | "try_". 240 | 241 | rexp/rexp1.c: 242 | Add typecasts on void*-assignments and/or function arguments. 243 | 244 | rexp/rexp2.c: 245 | Add typecasts on void*-assignments and/or function arguments. 246 | 247 | Convert K&R function definitions to c89 syntax. 248 | 249 | rexp/rexp3.c: 250 | Comment out duplication prototype for RE_new_run_stack(). 251 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | 2 | $ ./configure 3 | $ ./make 4 | $ ./make check 5 | 6 | This gives you an executable mawk. 7 | You can copy it to a bin directory or make install to /usr/local/bin 8 | 9 | Report builld problems to Mike Brennan 10 | -------------------------------------------------------------------------------- /Makefile.in: -------------------------------------------------------------------------------- 1 | ### ==================================================================== 2 | ### This section gets transformed by the configure script into Makefile 3 | 4 | prefix = @prefix@ 5 | BINDIR = $(prefix)/bin 6 | CC = @CC@ 7 | CFLAGS = @CFLAGS@ 8 | CPP = @CPP@ 9 | MANDIR = $(prefix)/man/man$(MANEXT) 10 | MANEXT = 1 11 | LIBS = @LIBS@ 12 | SHELL = /bin/sh 13 | YACC = @YACC@ 14 | 15 | ### ==================================================================== 16 | ### The remainder of this file is copied verbatim into Makefile 17 | 18 | ### Some systems and libraries print -nan, while others print nan or 19 | ### NaN: the sign of NaN is undefined, so this filter eliminates that 20 | ### difference. Similarly, the lettercase of Inf may vary. On SGI 21 | ### IRIX, NaN prints as nan0x10000000, so we also have to filter out 22 | ### the payload 23 | 24 | FPE_FILTER = sed \ 25 | -e 's/[Nn][Aa][Nn]/nan/' \ 26 | -e 's/-nan/nan/' \ 27 | -e 's/nan0x[0-9a-fA-Z][0-9a-fA-Z]*/nan/' \ 28 | -e 's/[Ii][Nn][Ff]/inf/' \ 29 | -e 's/[Ii][Nn][Ff][Ii][Nn][Ii][Yy]/inf/' 30 | 31 | O = array.o bi_funct.o bi_vars.o cast.o code.o da.o error.o \ 32 | execute.o fcall.o field.o files.o fin.o hash.o init.o int.o \ 33 | jmp.o kw.o main.o memory.o parse.o print.o \ 34 | printf.o re_cmpl.o scan.o scancode.o split.o version.o \ 35 | zmalloc.o 36 | 37 | REXP_O = $(REXP_C:.c=.o) 38 | 39 | REXP_C = rexp/rexp.c rexp/rexp0.c rexp/rexp1.c rexp/rexp2.c \ 40 | rexp/rexp3.c rexp/rexpdb.c rexp/wait.c 41 | 42 | STATUS = check.status 43 | 44 | EXCLUDEFLAGS = --exclude=$$$$b/Makefile \ 45 | --exclude=RCS \ 46 | --exclude=autom4te.cache \ 47 | --exclude=config.h \ 48 | --exclude=config.log \ 49 | --exclude=config.status \ 50 | --exclude='*~' 51 | 52 | ### ==================================================================== 53 | 54 | .SUFFIXES: .i 55 | 56 | .c.i: 57 | $(CPP) $(CFLAGS) $< > $*.i 58 | 59 | ### ==================================================================== 60 | 61 | 62 | all : mkrexp mawk 63 | 64 | check: check_begin mawk_test check_end 65 | 66 | check_begin: 67 | -@rm -f $(STATUS) 68 | 69 | check_end: 70 | @echo ; \ 71 | npass=`grep -c '^PASS' $(STATUS) || true` ; \ 72 | nfail=`grep -c '^FAIL' $(STATUS) || true` ; \ 73 | test $$npass -eq 2 && echo SUCCESS: TEST PASSED || \ 74 | ( cat $(STATUS) ; echo ; echo FAILURE: $$nfail of `expr $$npass + $$nfail` tests failed ) 75 | @for f in mawktest.err mawktest.out ; \ 76 | do \ 77 | cmp -s test/okay/$$f test/$$f || (echo ; echo Differences in $$f ; diff test/okay/$$f test/$$f ) ; \ 78 | done 79 | 80 | mawk : $(O) $(REXP_O) 81 | $(CC) $(CFLAGS) -o mawk $(O) $(REXP_O) $(LIBS) 82 | 83 | mkrexp : 84 | cd rexp ;\ 85 | $(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS) -DMAWK -I.." 86 | 87 | 88 | mawk_test : mawk # test that we have a sane mawk 89 | @echo ; echo testing mawk 90 | -cd test ; ./mawktest > mawktest.out 2> mawktest.err 91 | @( cmp -s test/okay/mawktest.err test/mawktest.err && echo PASS: mawktest.err || echo FAIL: mawktest.err ) >> $(STATUS) 92 | @( cmp -s test/okay/mawktest.out test/mawktest.out && echo PASS: mawktest.out || echo FAIL: mawktest.out ) >> $(STATUS) 93 | 94 | fpe_test : mawk # test FPEs are handled OK 95 | @echo ; echo testing floating point exception handling 96 | -cd test ; ./fpe_test 97 | 98 | $(REXP_O) : 99 | cd rexp ;\ 100 | $(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS) -DMAWK -I.." 101 | 102 | parse.c parse.h : parse.y 103 | @echo expect 6 shift/reduce conflicts 104 | $(YACC) -d parse.y 105 | mv y.tab.c parse.c 106 | mv y.tab.h parse.h 107 | 108 | array.c : array.w 109 | notangle -R'"array.c"' array.w | cpif array.c 110 | 111 | array.h : array.w 112 | notangle -R'"array.h"' array.w | cpif array.h 113 | 114 | printf.c : printf.w 115 | notangle -R'"printf.c"' printf.w | cpif printf.c 116 | 117 | printf.h : printf.w 118 | notangle -R'"printf.h"' printf.w | cpif printf.h 119 | 120 | scancode.c : makescan.c scan.h 121 | $(CC) -o makescan.exe makescan.c 122 | rm -f scancode.c 123 | ./makescan.exe > scancode.c 124 | rm makescan.exe 125 | 126 | MAWKMAN = $(MANDIR)/mawk.$(MANEXT) 127 | install : mawk 128 | cp mawk $(BINDIR) 129 | chmod 0755 $(BINDIR)/mawk 130 | cp man/mawk.1 $(MAWKMAN) 131 | chmod 0644 $(MAWKMAN) 132 | 133 | clean : 134 | -rm -f *.i *.o rexp/*.o test/mawk mawk $(STATUS) 135 | 136 | dist : 137 | -d=`pwd` ; \ 138 | b=`basename $$d` ; \ 139 | $(MAKE) clean ; \ 140 | (cd ..; env GZIP=-9 tar -c -z ${EXCLUDEFLAGS} -f $$b.tar.gz $$b 2> /dev/null || true) ; \ 141 | (cd ..; env BZIP2=-9 tar -c -j ${EXCLUDEFLAGS} -f $$b.tar.bz2 $$b 2> /dev/null || true) ; \ 142 | (cd ..; env XZ=-9 tar -c -J ${EXCLUDEFLAGS} -f $$b.tar.xz $$b 2> /dev/null || true) ; \ 143 | ls -l ../$$b.tar.* 144 | 145 | distclean : clean 146 | -rm -f config.h Makefile \ 147 | config.status config.user config.log config.cache 148 | -rm -rf autom4te.cache 149 | 150 | configure config.hin : configure.ac 151 | autoconf 152 | autoheader 153 | 154 | doc : 155 | cd cdoc ; $(MAKE) 156 | 157 | 158 | ### ==================================================================== 159 | 160 | # dependencies computed by compiling with -MMD and makedeps *.d 161 | array.o: array.c mawk.h types.h sizes.h int.h symtype.h array.h memory.h \ 162 | zmalloc.h split.h field.h bi_vars.h 163 | bi_funct.o: bi_funct.c mawk.h types.h sizes.h bi_funct.h symtype.h \ 164 | array.h bi_vars.h memory.h zmalloc.h init.h int.h files.h fin.h field.h \ 165 | regexp.h repl.h 166 | bi_vars.o: bi_vars.c mawk.h types.h sizes.h symtype.h array.h bi_vars.h \ 167 | field.h init.h memory.h zmalloc.h 168 | cast.o: cast.c mawk.h types.h sizes.h field.h memory.h zmalloc.h scan.h \ 169 | symtype.h array.h parse.h repl.h int.h 170 | code.o: code.c mawk.h types.h sizes.h code.h memory.h zmalloc.h init.h \ 171 | symtype.h array.h jmp.h field.h 172 | da.o: da.c mawk.h types.h sizes.h code.h memory.h zmalloc.h bi_funct.h \ 173 | symtype.h array.h repl.h field.h printf.h files.h 174 | error.o: error.c mawk.h types.h sizes.h scan.h symtype.h array.h parse.h \ 175 | bi_vars.h 176 | execute.o: execute.c mawk.h types.h sizes.h code.h memory.h zmalloc.h \ 177 | symtype.h array.h int.h field.h bi_funct.h bi_vars.h regexp.h repl.h \ 178 | fin.h 179 | fcall.o: fcall.c mawk.h types.h sizes.h symtype.h array.h code.h memory.h \ 180 | zmalloc.h 181 | field.o: field.c mawk.h types.h sizes.h int.h split.h field.h init.h \ 182 | symtype.h array.h memory.h zmalloc.h scan.h parse.h bi_vars.h repl.h \ 183 | regexp.h 184 | files.o: files.c mawk.h types.h sizes.h files.h memory.h zmalloc.h fin.h 185 | fin.o: fin.c mawk.h types.h sizes.h fin.h memory.h zmalloc.h bi_vars.h \ 186 | field.h symtype.h array.h scan.h parse.h 187 | hash.o: hash.c mawk.h types.h sizes.h memory.h zmalloc.h symtype.h \ 188 | array.h 189 | init.o: init.c mawk.h types.h sizes.h code.h memory.h zmalloc.h symtype.h \ 190 | array.h init.h bi_vars.h field.h 191 | int.o: int.c int.h 192 | jmp.o: jmp.c mawk.h types.h sizes.h symtype.h array.h jmp.h code.h \ 193 | memory.h zmalloc.h init.h 194 | kw.o: kw.c mawk.h types.h sizes.h symtype.h array.h parse.h init.h 195 | main.o: main.c mawk.h types.h sizes.h init.h symtype.h array.h code.h \ 196 | memory.h zmalloc.h files.h 197 | memory.o: memory.c mawk.h types.h sizes.h memory.h zmalloc.h 198 | parse.o: parse.c mawk.h types.h sizes.h symtype.h array.h code.h memory.h \ 199 | zmalloc.h bi_funct.h bi_vars.h jmp.h field.h files.h printf.h 200 | print.o: print.c mawk.h types.h sizes.h bi_vars.h bi_funct.h symtype.h \ 201 | array.h memory.h zmalloc.h field.h scan.h parse.h files.h int.h printf.h 202 | printf.o: printf.c mawk.h types.h sizes.h scan.h symtype.h array.h \ 203 | parse.h printf.h memory.h zmalloc.h files.h int.h 204 | re_cmpl.o: re_cmpl.c mawk.h types.h sizes.h memory.h zmalloc.h scan.h \ 205 | symtype.h array.h parse.h regexp.h repl.h split.h 206 | scan.o: scan.c mawk.h types.h sizes.h scan.h symtype.h array.h parse.h \ 207 | memory.h zmalloc.h field.h init.h int.h fin.h repl.h code.h files.h 208 | scancode.o: scancode.c 209 | split.o: split.c mawk.h types.h sizes.h split.h symtype.h array.h \ 210 | bi_vars.h bi_funct.h memory.h zmalloc.h scan.h parse.h regexp.h repl.h \ 211 | field.h 212 | version.o: version.c mawk.h types.h sizes.h 213 | zmalloc.o: zmalloc.c mawk.h types.h sizes.h zmalloc.h 214 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | This is mawk 1.9.9.x, a beta release for 2.0.0. 2 | 3 | I first released mawk 1.0 in 1991 and last released mawk 1.3.3 in 1996. 4 | (A few people had mawk 1.3.3.1 with nextfile, 1999.) 5 | 6 | Why a 25 and 20 year anniversary release? Because I always knew a 7 | few things could be done better and design decisions that were right for 8 | the 90's were wrong for 21st century. 9 | 10 | In my absence, there have been other developers that produced mawk 1.3.4-xxx. 11 | I started from 1.3.3 and there is no code from the 1.3.4 developers in 12 | this mawk, because their work either did not address my concerns or 13 | inadequately addressed my concerns or, in some cases, 14 | was wrong. I did look at the 15 | bug reports and fixed those that applied to 1.3.3. 16 | I did switch to the FNV-1a hash function as suggested in a bug report. 17 | 18 | Here is what is new. 19 | 20 | (1) Oddly written but legal regular expressions could cause exponential 21 | blowup of execution time versus input length. 22 | Consider, 23 | 24 | mawk '!/(a|aa)*Z/' aN 25 | 26 | where the contents of file aN is one line with N a's and terminated with X. 27 | E.g., 28 | a5 is aaaaaX 29 | a10 is aaaaaaaaaaX 30 | a20 is aaaaaaaaaaaaaaaaaaaaX 31 | etc 32 | 33 | 34 | On a 5000 bogomips box, using mawk133, times are: 35 | 36 | a5 .002 sec 37 | a20 .005 38 | a40 53.2 sec 39 | a50 1 hour 41 min 40 | a1000 more seconds than there are atoms in the universe 41 | 42 | This released mawk does a1000 in .005 seconds. 43 | 44 | For reasonably written regular expressions and normal input, this 45 | bug for most people never came up. In that sense, it is a minor bug. 46 | However in the sense that a regular expression algorithm should 47 | have linear execution time relative to the input length in all 48 | cases, it was a major error by me. 49 | 50 | (2) Fixed limit on number of fields, $1 $2 ... is removed. 51 | 52 | (3) Fixed limit on length of a string produced by sprintf() is removed. 53 | 54 | (4) Sizes chosen for 1991-96 have been adjusted for the 21st century. 55 | Most important, the input buffer is bigger and grows faster to handle 56 | long input records. The memory allocator blocks are bigger. 57 | The hash tables have more slots. 58 | 59 | (5) gsub() is no longer recursive which makes it faster and more 60 | reliable. ^ is handled correctly. 61 | 62 | (6) printf and sprintf handle bigger integers. For example, 63 | 64 | $ mawk 'BEGIN{ printf "%x %x %d\n", -1, 2^63, -2^63}' 65 | ffffffffffffffff 8000000000000000 -9223372036854775808 66 | 67 | Awk prints an integer as an integer (%d) and other numbers 68 | using OFMT (default to %.6g). The new mawk recognizes bigger integers. 69 | 70 | $ mawk133 'BEGIN { print 2^33}' 71 | 8.58993e+09 72 | 73 | $ mawk 'BEGIN { print 2^33}' 74 | 8589934592 75 | 76 | In this area, there is a mild disagreement between gawk and mawk. 77 | 78 | $ mawk 'BEGIN { print exp(37)}' 79 | 1.17191e+16 80 | 81 | $ gawk 'BEGIN { print exp(37)}' 82 | 11719142372802612 83 | 84 | Actual value is 85 | 11719142372802611.3086... 86 | 87 | (7) The character '\0' (zero) can be an element of a string. 88 | 89 | (8) Design of arrays was simplified. No effect from user perspective, 90 | but more maintainable from developer perspective. 91 | 92 | (9) nextfile 93 | 94 | (10) length(A) where A is an array returns the number of elements in the 95 | array. 96 | 97 | (11) Backslash in replacement strings. 98 | 99 | $ echo ABC | mawk133 '{sub(/B/,"\\\\") ; print}' 100 | A\C 101 | 102 | $ echo ABC | mawk '{sub(/B/,"\\\\") ; print}' 103 | A\\C 104 | 105 | The 133 behavior follows the early 90's posix spec, but it is confusing 106 | that a string without & is altered. Gawk and Kernighan's awk do it 107 | differently and now mawk agrees with them. 108 | 109 | \ escapes \ and \ escapes &, but only if the run of \ ends in &. 110 | 111 | For example, 112 | 113 | $ echo ABC | mawk '{sub(/B/,"\\\\&") ; print}' 114 | A\BC 115 | 116 | (12) Some years ago, 117 | 118 | $ echo 0x4 inf nan | awk '{ print 7 + $1, 8 + $2, 9+$3}' 119 | 7 8 9 120 | 121 | for all awk's, but now 122 | 123 | $ echo 0x4 inf nan | mawk133 '{ print 7 + $1, 8 + $2, 9+$3}' 124 | 11 inf nan 125 | 126 | What changed was the C-library strtod() started recognizing "inf", "nan" 127 | and hex strings. But changes for a low level C library, are not 128 | right for a high level language like awk. So, in agreement with 129 | gawk, the new mawk gives the old result. 130 | 131 | $ echo 0x4 inf nan | mawk '{ print 7 + $1, 8 + $2, 9+$3}' 132 | 7 8 9 133 | 134 | (13) Regular expression character classes such as /[[:digit:]]/ 135 | are now supported. 136 | The complete list is alnum, alpha, blank, cntrl, digit, graph, 137 | lower, print, space, upper, xdigit. 138 | 139 | ------------------------------------------------------ 140 | TBD. The man pages need updating. 141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /REPOSITORY: -------------------------------------------------------------------------------- 1 | # lastest mawk is at 2 | 3 | https://drive.google.com/open?id=0B_Q-mbHiy9g-Ry1WY1BjVUtJX3M 4 | -------------------------------------------------------------------------------- /THANKS: -------------------------------------------------------------------------------- 1 | I appreciate the help from: 2 | 3 | Arnold Robbins advised me on new features such as length(array) and 4 | changing environment such as strtod() behavior. Most important 5 | he encouraged me to make a new mawk release. He also introduced 6 | me to Nelson H.F. Beebe. 7 | 8 | Nelson tested mawk on about 90 machines. He modernized the code so 9 | it could be compiled with different C and C++ compilers. He did 10 | the autoconf setup so the build could be controlled by configure. 11 | The details are in ChangeLog.beebe. 12 | -------------------------------------------------------------------------------- /array.h: -------------------------------------------------------------------------------- 1 | /* array.h */ 2 | /* 3 | copyright 1991-1996,2014-2016 Michael D. Brennan 4 | 5 | This is a source file for mawk, an implementation of 6 | the AWK programming language. 7 | 8 | Mawk is distributed without warranty under the terms of 9 | the GNU General Public License, version 3, 2007. 10 | 11 | array.c and array.h were generated with the commands 12 | 13 | notangle -R'"array.c"' array.w > array.c 14 | notangle -R'"array.h"' array.w > array.h 15 | 16 | Notangle is part of Norman Ramsey's noweb literate programming package. 17 | Noweb home page: http://www.cs.tufts.edu/~nr/noweb/ 18 | 19 | It's easiest to read or modify this file by working with array.w. 20 | */ 21 | 22 | #ifndef ARRAY_H 23 | #define ARRAY_H 1 24 | 25 | #include "types.h" 26 | #include "int.h" 27 | 28 | typedef struct array { 29 | void* ptr ; /* What this points to depends on the type */ 30 | size_t size ; /* number of elts in the table */ 31 | int type ; /* values in AY_NULL .. AY_SPLIT */ 32 | } *ARRAY ; 33 | enum { 34 | AY_NULL = 0, 35 | AY_SPLIT, 36 | AY_STR, 37 | AY_INT 38 | } ; 39 | #define new_ARRAY() ((ARRAY)memset(ZMALLOC(struct array),0,sizeof(struct array))) 40 | #define NO_CREATE 0 41 | #define CREATE 1 42 | #define DELETE_ 2 43 | typedef struct aloop { 44 | struct aloop* link ; 45 | int type ; /* AY_NULL .. AY_INT */ 46 | unsigned size ; 47 | unsigned next ; 48 | CELL* cp ; 49 | union { 50 | STRING** sval ; /* for AY_STR */ 51 | int64_t* ival ; /* for AY_INT */ 52 | } ptr ; 53 | } ALoop ; 54 | CELL* array_find(ARRAY, CELL*, int); 55 | void array_delete(ARRAY, CELL*); 56 | void array_load(ARRAY, size_t); 57 | void array_clear(ARRAY); 58 | CELL* array_cat(CELL*, int); 59 | 60 | ALoop* make_aloop(ARRAY, CELL*) ; 61 | void aloop_free(ALoop*) ; 62 | int aloop_next(ALoop*) ; 63 | 64 | 65 | #endif /* ARRAY_H */ 66 | 67 | -------------------------------------------------------------------------------- /bi_funct.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | bi_funct.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | 18 | #ifndef BI_FUNCT_H 19 | #define BI_FUNCT_H 1 20 | 21 | #include "symtype.h" 22 | 23 | extern BI_REC bi_funct[] ; 24 | 25 | void bi_init(void) ; 26 | 27 | CELL * bi_print(CELL *) ; 28 | CELL * bi_printf(CELL *) ; 29 | CELL * bi_printf1(CELL *) ; 30 | CELL * bi_length(CELL *) ; 31 | CELL* bi_alength(CELL*) ; /* length/size of an array */ 32 | CELL * bi_index(CELL *) ; 33 | CELL * bi_substr(CELL *) ; 34 | CELL * bi_sprintf(CELL *) ; 35 | CELL * bi_sprintf1(CELL *) ; 36 | CELL * bi_split(CELL *) ; 37 | CELL * bi_match(CELL *) ; 38 | CELL * bi_getline(CELL *) ; 39 | CELL * bi_sub(CELL *) ; 40 | CELL * bi_gsub(CELL *) ; 41 | CELL * bi_toupper(CELL*) ; 42 | CELL * bi_tolower(CELL*) ; 43 | 44 | /* builtin arith functions */ 45 | CELL * bi_sin(CELL *) ; 46 | CELL * bi_cos(CELL *) ; 47 | CELL * bi_atan2(CELL *) ; 48 | CELL * bi_log(CELL *) ; 49 | CELL * bi_exp(CELL *) ; 50 | CELL * bi_int(CELL *) ; 51 | CELL * bi_sqrt(CELL *) ; 52 | CELL * bi_srand(CELL *) ; 53 | CELL * bi_rand(CELL *) ; 54 | 55 | /* other builtins */ 56 | CELL * bi_close(CELL *) ; 57 | CELL * bi_system(CELL *) ; 58 | CELL * bi_fflush(CELL *) ; 59 | 60 | #endif /* BI_FUNCT_H */ 61 | 62 | -------------------------------------------------------------------------------- /bi_vars.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | bi_vars.c 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | 18 | /* bi_vars.c */ 19 | 20 | #include "mawk.h" 21 | #include "symtype.h" 22 | #include "bi_vars.h" 23 | #include "field.h" 24 | #include "init.h" 25 | #include "memory.h" 26 | 27 | /* the builtin variables */ 28 | CELL bi_vars[NUM_BI_VAR] ; 29 | 30 | /* the order here must match the order in bi_vars.h */ 31 | 32 | static const char *bi_var_names[NUM_BI_VAR] = { 33 | "NR" , 34 | "FNR" , 35 | "ARGC" , 36 | "FILENAME" , 37 | "OFS" , 38 | "ORS" , 39 | "RLENGTH" , 40 | "RSTART" , 41 | "SUBSEP" 42 | #if MSDOS 43 | , "BINMODE" 44 | #endif 45 | } ; 46 | 47 | /* insert the builtin vars in the hash table */ 48 | 49 | void bi_vars_init(void) 50 | { register int i ; 51 | register SYMTAB *s ; 52 | 53 | 54 | for ( i = 0 ; i < NUM_BI_VAR ; i++ ) 55 | { s = insert( bi_var_names[i] ) ; 56 | s->type = i <= 1 ? ST_NR : ST_VAR ; 57 | s->stval.cp = bi_vars + i ; 58 | /* bi_vars[i].type = 0 which is C_NOINIT */ 59 | } 60 | 61 | s = insert("ENVIRON") ; 62 | s->type = ST_ENV ; 63 | 64 | /* set defaults */ 65 | 66 | FILENAME->type = C_STRING ; 67 | FILENAME->ptr = (PTR) new_STRING( "" ) ; 68 | 69 | OFS->type = C_STRING ; 70 | OFS->ptr = (PTR) new_STRING( " " ) ; 71 | 72 | ORS->type = C_STRING ; 73 | ORS->ptr = (PTR) new_STRING( "\n" ) ; 74 | 75 | SUBSEP->type = C_STRING ; 76 | SUBSEP->ptr = (PTR) new_STRING( "\034" ) ; 77 | 78 | NR->type = FNR->type = C_DOUBLE ; 79 | /* dval is already 0.0 */ 80 | 81 | #if MSDOS 82 | BINMODE->type = C_DOUBLE ; 83 | #endif 84 | } 85 | -------------------------------------------------------------------------------- /bi_vars.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | bi_vars.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | /* bi_vars.h */ 18 | 19 | #ifndef BI_VARS_H 20 | #define BI_VARS_H 1 21 | 22 | 23 | /* builtin variables NF, RS, FS, OFMT are stored 24 | internally in field[], so side effects of assignment can 25 | be handled 26 | */ 27 | 28 | /* NR and FNR must be next to each other */ 29 | #define NR bi_vars 30 | #define FNR (bi_vars+1) 31 | #define ARGC (bi_vars+2) 32 | #define FILENAME (bi_vars+3) 33 | #define OFS (bi_vars+4) 34 | #define ORS (bi_vars+5) 35 | #define RLENGTH (bi_vars+6) 36 | #define RSTART (bi_vars+7) 37 | #define SUBSEP (bi_vars+8) 38 | 39 | #if MSDOS 40 | #define BINMODE (bi_vars+9) 41 | #define NUM_BI_VAR 10 42 | #else 43 | #define NUM_BI_VAR 9 44 | #endif 45 | 46 | extern CELL bi_vars[NUM_BI_VAR] ; 47 | 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /cast.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | cast.c 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | 18 | 19 | /* cast.c */ 20 | 21 | #include "mawk.h" 22 | #include "field.h" 23 | #include "memory.h" 24 | #include "scan.h" 25 | #include "repl.h" 26 | #include "int.h" 27 | 28 | int mpow2[NUM_CELL_TYPES] = 29 | {1, 2, 4, 8, 16, 32, 64, 128, 256, 512} ; 30 | 31 | 32 | /* modern strtod accepts "inf" "nan" and hex numbers 33 | awk should not (gawk and mawk agree on this) 34 | 35 | maybe if --posix, but stubbed out for now] 36 | */ 37 | 38 | int posix_flag = 0 ; 39 | 40 | static 41 | double awk_strtod(const STRING* sval) 42 | { 43 | double ret = 0.0 ; 44 | const char* s = sval->str ; 45 | char* stop ; 46 | /* eat space ourselves because it makes it easy to eliminate "inf"/"nan" */ 47 | while (scan_code[*(unsigned char*)s] == SC_SPACE) s++ ; 48 | 49 | switch (scan_code[*(unsigned char*)s]) 50 | { 51 | case SC_DIGIT: 52 | case SC_PLUS: 53 | case SC_MINUS: 54 | case SC_DOT: 55 | errno = 0 ; 56 | ret = strtod(s, &stop) ; 57 | #if FPE_TRAPS_ON 58 | if (errno && ret != 0.0) { 59 | rt_error("overflow converting \"%s\" to double",s) ; 60 | } 61 | #endif 62 | 63 | /* check for hex number */ 64 | while(s < stop) { 65 | if (*s == 'x' || *s == 'X') return 0.0 ; 66 | s++ ; 67 | } 68 | 69 | } 70 | 71 | return ret ; 72 | } 73 | 74 | static 75 | double posix_strtod(const STRING* sval) 76 | { 77 | double ret ; 78 | 79 | errno = 0 ; 80 | ret = strtod(sval->str, 0) ; 81 | #if FPE_TRAPS_ON 82 | if (errno && ret != 0) { 83 | rt_error("overflow converting \"%s\" to double",sval->str) ; 84 | } 85 | #endif 86 | return ret ; 87 | } 88 | 89 | void 90 | cast1_to_d(CELL* cp) 91 | { 92 | switch (cp->type) 93 | { 94 | case C_NOINIT: 95 | cp->dval = 0.0 ; 96 | break ; 97 | 98 | case C_DOUBLE: 99 | return ; 100 | 101 | case C_MBSTRN: 102 | case C_STRING: 103 | { 104 | STRING* sval = (STRING *) cp->ptr ; 105 | 106 | cp->dval = posix_flag ? posix_strtod(sval) 107 | : awk_strtod(sval) ; 108 | 109 | free_STRING(sval) ; 110 | } 111 | break ; 112 | 113 | case C_STRNUM: 114 | /* don't need to convert, but do need to free the STRING part */ 115 | free_STRING(string(cp)) ; 116 | break ; 117 | 118 | 119 | default: 120 | bozo("cast on bad type") ; 121 | } 122 | cp->type = C_DOUBLE ; 123 | } 124 | 125 | 126 | static 127 | STRING* slow_convfmt(const char* conv, double d, size_t need) 128 | { 129 | /* don't expect to get here for a reasonable program */ 130 | STRING* ret ; 131 | char* buffer = (char *)emalloc(need+1) ; 132 | buffer[need] = 0 ; 133 | sprintf(buffer, conv, d) ; 134 | ret = new_STRING(buffer) ; 135 | free(buffer) ; 136 | return ret ; 137 | } 138 | 139 | void 140 | cast1_to_s(CELL* cp) 141 | { 142 | switch (cp->type) { 143 | case C_NOINIT: 144 | cp->ptr = STRING_dup(the_empty_str) ; 145 | break ; 146 | 147 | case C_DOUBLE: 148 | { 149 | char buffer[1024] ; 150 | double d = cp->dval ; 151 | if (is_int_double(d)) { 152 | #if LONG64 153 | sprintf(buffer,"%ld", (int64_t) d) ; 154 | #else 155 | sprintf(buffer,"%lld", (int64_t) d) ; 156 | #endif 157 | cp->ptr = new_STRING(buffer) ; 158 | } 159 | else { 160 | const char* conv = string(CONVFMT)->str ; 161 | unsigned used ; 162 | used = snprintf(buffer, 1024, conv, d) ; 163 | if ((int) used < 0) { 164 | rt_error("snprintf bozo (%d)", errno) ; 165 | } 166 | if (used > 1024) { 167 | cp->ptr = slow_convfmt(conv, d, used) ; 168 | } 169 | else { 170 | cp->ptr = new_STRING2(buffer,used) ; 171 | } 172 | } 173 | break ; 174 | } 175 | 176 | case C_STRING: 177 | return ; 178 | 179 | case C_MBSTRN: 180 | case C_STRNUM: 181 | break ; 182 | 183 | default: 184 | bozo("bad type on cast") ; 185 | } 186 | cp->type = C_STRING ; 187 | } 188 | 189 | 190 | void 191 | cast_to_RE(CELL *cp) 192 | { 193 | register PTR p ; 194 | 195 | if (cp->type < C_STRING) cast1_to_s(cp) ; 196 | 197 | p = re_compile(string(cp)) ; 198 | free_STRING(string(cp)) ; 199 | cp->type = C_RE ; 200 | cp->ptr = p ; 201 | 202 | } 203 | 204 | void 205 | cast_for_split(CELL* cp) 206 | { 207 | static char meta[] = "^$.*+?|[]()" ; 208 | static char xbuff[] = "\\X" ; 209 | int c ; 210 | size_t len ; 211 | 212 | if (cp->type < C_STRING) cast1_to_s(cp) ; 213 | 214 | if ((len = string(cp)->len) == 1) 215 | { 216 | if ((c = string(cp)->str[0]) == ' ') 217 | { 218 | free_STRING(string(cp)) ; 219 | cp->type = C_SPACE ; 220 | return ; 221 | } 222 | else if (c != 0 && strchr(meta, c)) 223 | { 224 | xbuff[1] = c ; 225 | free_STRING(string(cp)) ; 226 | cp->ptr = (PTR) new_STRING(xbuff) ; 227 | } 228 | } 229 | else if (len == 0) 230 | { 231 | free_STRING(string(cp)) ; 232 | cp->type = C_SNULL ; 233 | return ; 234 | } 235 | 236 | cast_to_RE(cp) ; 237 | } 238 | 239 | /* input: cp-> a CELL of type C_MBSTRN (maybe strnum) 240 | test it -- casting it to the appropriate type 241 | which is C_STRING or C_STRNUM 242 | 243 | eliminate some values strtod likes 0x 0X inf nan 244 | 245 | */ 246 | 247 | void 248 | check_strnum(CELL* cp) 249 | { 250 | unsigned char *test ; 251 | char** tp = (char**)&test ; 252 | unsigned char *s ; 253 | unsigned char *q ; 254 | 255 | cp->type = C_STRING ; /* assume not C_STRNUM */ 256 | s = (unsigned char *) string(cp)->str ; 257 | q = s + string(cp)->len ; 258 | while (scan_code[*s] == SC_SPACE) s++ ; 259 | 260 | switch (scan_code[*s]) 261 | { 262 | case SC_DIGIT: 263 | case SC_PLUS: 264 | case SC_MINUS: 265 | case SC_DOT: 266 | errno = 0 ; 267 | cp->dval = strtod((char *) s, tp) ; 268 | /* make overflow and underflow pure string */ 269 | if (errno || test == s) { 270 | errno = 0 ; 271 | return ; 272 | } 273 | 274 | /* we have a number, but must be all of it . 275 | we allow space at back */ 276 | 277 | while(q > test && scan_code[q[-1]] == SC_SPACE) q-- ; 278 | 279 | if (q != test) return ; 280 | /* and finally eliminate hex strings */ 281 | while(s < q) { 282 | if (*s == 'x' || *s == 'X') return ; 283 | s++ ; 284 | } 285 | cp->type = C_STRNUM ; 286 | return ; 287 | 288 | default: 289 | /* not strnum */ 290 | return ; 291 | } 292 | } 293 | 294 | /* cast a CELL to a replacement cell */ 295 | 296 | void 297 | cast_to_REPL(CELL* cp) 298 | { 299 | STRING *sval ; 300 | 301 | if (cp->type < C_STRING) { 302 | cast1_to_s(cp) ; 303 | } 304 | sval = (STRING *) cp->ptr ; 305 | /* cp no longer ownes sval */ 306 | cp->type = C_NOINIT ; 307 | replacement_scan(sval,cp) ; 308 | free_STRING(sval) ; 309 | } 310 | 311 | 312 | -------------------------------------------------------------------------------- /cdoc/Makefile: -------------------------------------------------------------------------------- 1 | 2 | DOT_W = $(shell cd ..; ls *.w) 3 | DVI_FILES = $(DOT_W:.w=.dvi) 4 | PDF_FILES = $(DOT_W:.w=.pdf) 5 | PS_FILES = $(DOT_W:.w=.ps) 6 | 7 | doc : dvi pdf 8 | 9 | dvi : $(DVI_FILES) 10 | 11 | pdf : $(PDF_FILES) 12 | 13 | ps : $(PS_FILES) 14 | 15 | %.dvi : ../%.w 16 | mweave $< > $*.tex 17 | mwtex $*.tex 18 | rm -f $*.tex *.log *.cn? *.toc 19 | 20 | %.ps : %.dvi 21 | dvips -o $*.ps $< 22 | 23 | %.pdf : %.dvi 24 | dvipdfm $< 25 | 26 | clean : 27 | rm -f *.dvi *.ps *.tex *.cn? *.toc *.mpx *.pdf 28 | 29 | -------------------------------------------------------------------------------- /cdoc/array.dvi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aksr/mawk2/949abae95ca3a814efc9625abba0d743af508b54/cdoc/array.dvi -------------------------------------------------------------------------------- /cdoc/array.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aksr/mawk2/949abae95ca3a814efc9625abba0d743af508b54/cdoc/array.pdf -------------------------------------------------------------------------------- /cdoc/printf.dvi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aksr/mawk2/949abae95ca3a814efc9625abba0d743af508b54/cdoc/printf.dvi -------------------------------------------------------------------------------- /cdoc/printf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aksr/mawk2/949abae95ca3a814efc9625abba0d743af508b54/cdoc/printf.pdf -------------------------------------------------------------------------------- /code.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | code.c 4 | copyright 1991-93,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | 18 | #include "mawk.h" 19 | #include "code.h" 20 | #include "init.h" 21 | #include "jmp.h" 22 | #include "field.h" 23 | 24 | 25 | static CODEBLOCK* new_code(void) ; 26 | 27 | CODEBLOCK active_code ; 28 | 29 | CODEBLOCK *main_code_p, *begin_code_p, *end_code_p ; 30 | 31 | INST *begin_start, *main_start, *end_start ; 32 | unsigned begin_size, main_size ; 33 | 34 | INST *execution_start = 0 ; 35 | 36 | 37 | /* grow the active code */ 38 | void 39 | code_grow(void) 40 | { 41 | unsigned oldsize = code_limit - code_base ; 42 | unsigned newsize = PAGESZ + oldsize ; 43 | unsigned delta = code_ptr - code_base ; 44 | 45 | if (code_ptr > code_limit) bozo("CODEWARN is too small") ; 46 | 47 | code_base = (INST *) 48 | zrealloc(code_base, INST_BYTES(oldsize), 49 | INST_BYTES(newsize)) ; 50 | code_limit = code_base + newsize ; 51 | code_warn = code_limit - CODEWARN ; 52 | code_ptr = code_base + delta ; 53 | } 54 | 55 | /* shrinks executable code that's done to its final size */ 56 | INST * 57 | code_shrink(CODEBLOCK* p, unsigned* sizep) 58 | { 59 | 60 | unsigned oldsize = INST_BYTES(p->limit - p->base) ; 61 | unsigned newsize = INST_BYTES(p->ptr - p->base) ; 62 | INST *retval ; 63 | 64 | *sizep = newsize ; 65 | 66 | retval = (INST *) zrealloc(p->base, oldsize, newsize) ; 67 | ZFREE(p) ; 68 | return retval ; 69 | } 70 | 71 | 72 | /* code an op and a pointer in the active_code */ 73 | void 74 | xcode2(int op, void* ptr) 75 | { 76 | register INST *p = code_ptr + 2 ; 77 | 78 | if (p >= code_warn) 79 | { 80 | code_grow() ; 81 | p = code_ptr + 2 ; 82 | } 83 | 84 | p[-2].op = op ; 85 | p[-1].ptr = ptr ; 86 | code_ptr = p ; 87 | } 88 | 89 | /* code two ops in the active_code */ 90 | void 91 | code2op(int x, int y) 92 | { 93 | register INST *p = code_ptr + 2 ; 94 | 95 | if (p >= code_warn) 96 | { 97 | code_grow() ; 98 | p = code_ptr + 2 ; 99 | } 100 | 101 | p[-2].op = x ; 102 | p[-1].op = y ; 103 | code_ptr = p ; 104 | } 105 | 106 | void 107 | code_init(void) 108 | { 109 | main_code_p = new_code() ; 110 | 111 | active_code = *main_code_p ; 112 | code1(_OMAIN) ; 113 | } 114 | 115 | /* final code relocation 116 | set_code() as in set concrete */ 117 | void 118 | set_code(void) 119 | { 120 | /* set the main code which is active_code */ 121 | if (end_code_p || code_offset > 1) 122 | { 123 | int gl_offset = code_offset ; 124 | extern int NR_flag ; 125 | 126 | if (NR_flag) code2op(OL_GL_NR, _HALT) ; 127 | else code2op(OL_GL, _HALT) ; 128 | 129 | *main_code_p = active_code ; 130 | main_start = code_shrink(main_code_p, &main_size) ; 131 | next_label = main_start + gl_offset ; 132 | execution_start = main_start ; 133 | } 134 | else /* only BEGIN */ 135 | { 136 | zfree(code_base, INST_BYTES(PAGESZ)) ; 137 | ZFREE(main_code_p) ; 138 | } 139 | 140 | /* set the END code */ 141 | if (end_code_p) 142 | { 143 | unsigned dummy ; 144 | 145 | active_code = *end_code_p ; 146 | code2op(_EXIT0, _HALT) ; 147 | *end_code_p = active_code ; 148 | end_start = code_shrink(end_code_p, &dummy) ; 149 | } 150 | 151 | /* set the BEGIN code */ 152 | if (begin_code_p) 153 | { 154 | active_code = *begin_code_p ; 155 | if (main_start) code2op(_JMAIN, _HALT) ; 156 | else code2op(_EXIT0, _HALT) ; 157 | *begin_code_p = active_code ; 158 | begin_start = code_shrink(begin_code_p, &begin_size) ; 159 | 160 | execution_start = begin_start ; 161 | } 162 | 163 | if ( ! execution_start ) 164 | { 165 | /* program had functions but no pattern-action bodies */ 166 | execution_start = begin_start = (INST*) zmalloc(2*sizeof(INST)) ; 167 | execution_start[0].op = _EXIT0 ; 168 | execution_start[1].op = _HALT ; 169 | } 170 | } 171 | 172 | void 173 | dump_code(void) 174 | { 175 | fdump() ; /* dumps all user functions */ 176 | if (begin_start) 177 | { fprintf(stdout, "BEGIN\n") ; 178 | da(begin_start, stdout) ; } 179 | if (end_start) 180 | { fprintf(stdout, "END\n") ; 181 | da(end_start, stdout) ; } 182 | if (main_start) 183 | { fprintf(stdout, "MAIN\n") ; 184 | da(main_start, stdout) ; } 185 | } 186 | 187 | 188 | static CODEBLOCK * 189 | new_code(void) 190 | { 191 | CODEBLOCK *p = ZMALLOC(CODEBLOCK) ; 192 | 193 | p->base = (INST *) zmalloc(INST_BYTES(PAGESZ)) ; 194 | p->limit = p->base + PAGESZ ; 195 | p->warn = p->limit - CODEWARN ; 196 | p->ptr = p->base ; 197 | 198 | return p ; 199 | } 200 | 201 | /* moves the active_code from MAIN to a BEGIN or END */ 202 | 203 | void 204 | be_setup(int scope) 205 | { 206 | *main_code_p = active_code ; 207 | 208 | if (scope == SCOPE_BEGIN) 209 | { 210 | if (!begin_code_p) begin_code_p = new_code() ; 211 | active_code = *begin_code_p ; 212 | } 213 | else 214 | { 215 | if (!end_code_p) end_code_p = new_code() ; 216 | active_code = *end_code_p ; 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /code.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | code.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | /* code.h */ 18 | 19 | #ifndef CODE_H 20 | #define CODE_H 21 | 22 | #include "memory.h" 23 | 24 | #define PAGESZ 512 25 | /* number of code instructions allocated at one time */ 26 | #define CODEWARN 16 27 | 28 | /* coding scope */ 29 | #define SCOPE_MAIN 0 30 | #define SCOPE_BEGIN 1 31 | #define SCOPE_END 2 32 | #define SCOPE_FUNCT 3 33 | 34 | 35 | typedef struct { 36 | INST *base, *limit, *warn, *ptr ; 37 | } CODEBLOCK ; 38 | 39 | extern CODEBLOCK active_code ; 40 | extern CODEBLOCK *main_code_p, *begin_code_p, *end_code_p ; 41 | 42 | extern INST *main_start, *begin_start, *end_start ; 43 | extern unsigned main_size, begin_size ; 44 | extern INST *execution_start ; 45 | extern INST *next_label ; /* next statements jump to here */ 46 | extern int dump_code_flag ; 47 | 48 | #define code_ptr active_code.ptr 49 | #define code_base active_code.base 50 | #define code_warn active_code.warn 51 | #define code_limit active_code.limit 52 | #define code_offset (code_ptr-code_base) 53 | 54 | #define INST_BYTES(x) (sizeof(INST)*(unsigned)(x)) 55 | 56 | extern CELL eval_stack[] ; 57 | extern int exit_code ; 58 | 59 | 60 | #define code1(x) code_ptr++ -> op = (x) 61 | #define code2(x,p) xcode2(x,(PTR)(p)) 62 | 63 | void xcode2(int, PTR) ; 64 | void code2op(int, int) ; 65 | INST * code_shrink(CODEBLOCK*, unsigned*) ; 66 | void code_grow(void) ; 67 | void set_code(void) ; 68 | void be_setup(int) ; 69 | void dump_code(void) ; 70 | 71 | 72 | /* the machine opcodes */ 73 | /* to avoid confusion with a ptr FE_PUSHA must have op code 0 */ 74 | 75 | enum { 76 | FE_PUSHA = 0, 77 | FE_PUSHI, 78 | F_PUSHA, 79 | F_PUSHI, 80 | NF_PUSHI, 81 | _HALT, 82 | _STOP, 83 | _PUSHC, 84 | _PUSHD, 85 | _PUSHS, 86 | _PUSHINT, 87 | _PUSHA, 88 | _PUSHI, 89 | PUSHFM, 90 | L_PUSHA, 91 | L_PUSHI, 92 | AE_PUSHA, 93 | AE_PUSHI, 94 | A_PUSHA, 95 | LAE_PUSHA, 96 | LAE_PUSHI, 97 | LA_PUSHA, 98 | _POP, 99 | _ADD, 100 | _SUB, 101 | _MUL, 102 | _DIV, 103 | _MOD, 104 | _POW, 105 | _NOT, 106 | _TEST, 107 | A_TEST, 108 | A_DEL, 109 | ALOOP, 110 | A_CAT, 111 | _UMINUS, 112 | _UPLUS, 113 | _ASSIGN, 114 | _ADD_ASG, 115 | _SUB_ASG, 116 | _MUL_ASG, 117 | _DIV_ASG, 118 | _MOD_ASG, 119 | _POW_ASG, 120 | F_ASSIGN, 121 | F_ADD_ASG, 122 | F_SUB_ASG, 123 | F_MUL_ASG, 124 | F_DIV_ASG, 125 | F_MOD_ASG, 126 | F_POW_ASG, 127 | _CAT, 128 | _BUILTIN, 129 | _PRINT, 130 | _POST_INC, 131 | _POST_DEC, 132 | _PRE_INC, 133 | _PRE_DEC, 134 | F_POST_INC, 135 | F_POST_DEC, 136 | F_PRE_INC, 137 | F_PRE_DEC, 138 | _JMP, 139 | _JNZ, 140 | _JZ, 141 | _LJZ, 142 | _LJNZ, 143 | _EQ, 144 | _NEQ, 145 | _LT, 146 | _LTE, 147 | _GT, 148 | _GTE, 149 | _MATCH0, 150 | _MATCH1, 151 | _MATCH2, 152 | _EXIT, 153 | _EXIT0, 154 | _NEXT, 155 | _NEXTFILE, 156 | _RANGE, 157 | _CALL, 158 | _RET, 159 | _RET0, 160 | SET_ALOOP, 161 | POP_AL, 162 | OL_GL, 163 | OL_GL_NR, 164 | _OMAIN, 165 | _JMAIN, 166 | DEL_A, 167 | PI_LOAD, 168 | LPI_LOAD 169 | } ; 170 | 171 | #endif /* CODE_H */ 172 | -------------------------------------------------------------------------------- /config.hin: -------------------------------------------------------------------------------- 1 | /* config.hin. Generated from configure.ac by autoheader. */ 2 | 3 | /* Define to 1 if you have the declaration of `random', and to 0 if you don't. 4 | */ 5 | #undef HAVE_DECL_RANDOM 6 | 7 | /* Define to 1 if you have the declaration of `srandom', and to 0 if you 8 | don't. */ 9 | #undef HAVE_DECL_SRANDOM 10 | 11 | /* Define to 1 if the system has the type `int64_t'. */ 12 | #undef HAVE_INT64_T 13 | 14 | /* Define to 1 if you have the header file. */ 15 | #undef HAVE_INTTYPES_H 16 | 17 | /* Define to 1 if you have the header file. */ 18 | #undef HAVE_MEMORY_H 19 | 20 | /* Define to 1 if you have the header file. */ 21 | #undef HAVE_STDINT_H 22 | 23 | /* Define to 1 if you have the header file. */ 24 | #undef HAVE_STDLIB_H 25 | 26 | /* Define to 1 if you have the header file. */ 27 | #undef HAVE_STRINGS_H 28 | 29 | /* Define to 1 if you have the header file. */ 30 | #undef HAVE_STRING_H 31 | 32 | /* Define to 1 if you have the header file. */ 33 | #undef HAVE_SYS_STAT_H 34 | 35 | /* Define to 1 if you have the header file. */ 36 | #undef HAVE_SYS_TYPES_H 37 | 38 | /* Define to 1 if the system has the type `uint64_t'. */ 39 | #undef HAVE_UINT64_T 40 | 41 | /* Define to 1 if you have the header file. */ 42 | #undef HAVE_UNISTD_H 43 | 44 | /* Define to 1 if the system has the type `__int64_t'. */ 45 | #undef HAVE___INT64_T 46 | 47 | /* Define to 1 if the system has the type `__uint64_t'. */ 48 | #undef HAVE___UINT64_T 49 | 50 | /* Define to the address where bug reports for this package should be sent. */ 51 | #undef PACKAGE_BUGREPORT 52 | 53 | /* Define to the full name of this package. */ 54 | #undef PACKAGE_NAME 55 | 56 | /* Define to the full name and version of this package. */ 57 | #undef PACKAGE_STRING 58 | 59 | /* Define to the one symbol short name of this package. */ 60 | #undef PACKAGE_TARNAME 61 | 62 | /* Define to the version of this package. */ 63 | #undef PACKAGE_VERSION 64 | 65 | /* Define to 1 if you have the ANSI C header files. */ 66 | #undef STDC_HEADERS 67 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_INIT(mawk, 1.9.9.6, mawkeddy@gmail.com) 2 | 3 | dnl ==================================================================== 4 | dnl Checks for compilers: 5 | 6 | AC_PROG_CC 7 | AC_PROG_CPP 8 | 9 | dnl ==================================================================== 10 | dnl Force creation of config.h 11 | 12 | AC_CONFIG_HEADERS([config.h:config.hin]) 13 | 14 | dnl ==================================================================== 15 | dnl Checks for selected function declarations that are sometimes missing 16 | dnl from particular compilation environments: 17 | 18 | AC_CHECK_DECLS([random, srandom],,,[#include ]) 19 | 20 | dnl ==================================================================== 21 | dnl Checks for selected type definitions that are sometimes missing 22 | 23 | AC_CHECK_TYPES([int64_t, __int64_t, uint64_t, __uint64_t]) 24 | 25 | dnl ==================================================================== 26 | dnl Checks for separate math library (unless the user already set LIBS): 27 | 28 | if test -z "$LIBS" 29 | then 30 | needlm=0 31 | 32 | AC_SEARCH_LIBS(exp, [m], needlm=1) 33 | AC_SEARCH_LIBS(log, [m], needlm=1) 34 | AC_SEARCH_LIBS(sin, [m], needlm=1) 35 | 36 | dnl Common elementary functions are implemented in hardware on the 37 | dnl Intel x86 and Motorola 680x0 families, and thus, may not 38 | dnl require library calls for evaluation, so pick a library 39 | dnl function that we know is not in hardware. The best choice 40 | dnl seems to be one of the Bessel functions that all Unix systems 41 | dnl have supplied since the 1980s Berkeley days. 42 | 43 | AC_SEARCH_LIBS(j0, [m], needlm=1) 44 | 45 | test $needlm -eq 1 && LIBS="-lm" 46 | fi 47 | 48 | dnl ==================================================================== 49 | dnl Checks for yacc alternatives: 50 | 51 | dnl AC_CHECK_PROGS(YACC, [bison byacc yacc ]) 52 | dnl 53 | dnl AC_MSG_CHECKING(for bison flags) 54 | dnl YACC="$ac_cv_prog_YACC" 55 | dnl if test "xx$YACC" = "xxbison" 56 | dnl then 57 | dnl YFLAGS="$YFLAGS -y" 58 | dnl AC_MSG_RESULT(-y) 59 | dnl else 60 | dnl AC_MSG_RESULT() 61 | dnl fi 62 | 63 | AC_PROG_YACC 64 | 65 | dnl ==================================================================== 66 | dnl Final output: 67 | 68 | AC_CONFIG_FILES([Makefile]) 69 | AC_OUTPUT 70 | 71 | -------------------------------------------------------------------------------- /error.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | error.c 4 | copyright 1991, 1992,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | 18 | #include 19 | #include "mawk.h" 20 | #include "scan.h" 21 | #include "bi_vars.h" 22 | 23 | static void rt_where(void) ; 24 | static void missing(int, const char *, int) ; 25 | static const char* type_to_str(int) ; 26 | 27 | 28 | /* for run time error messages only */ 29 | unsigned rt_nr , rt_fnr ; 30 | 31 | static struct token_str { 32 | int token ; 33 | const char *str ; 34 | } token_str[] = { 35 | { EOF , "end of file" }, 36 | { NL , "end of line"}, 37 | { SEMI_COLON , ";" }, 38 | { LBRACE , "{" }, 39 | { RBRACE , "}" }, 40 | { SC_FAKE_SEMI_COLON, "}"}, 41 | { LPAREN , "(" }, 42 | { RPAREN , ")" }, 43 | { LBOX , "["}, 44 | { RBOX , "]"}, 45 | { QMARK , "?"}, 46 | { COLON , ":"}, 47 | { OR, "||"}, 48 | { AND, "&&"}, 49 | { ASSIGN , "=" }, 50 | { ADD_ASG, "+="}, 51 | { SUB_ASG, "-="}, 52 | { MUL_ASG, "*="}, 53 | { DIV_ASG, "/="}, 54 | { MOD_ASG, "%="}, 55 | { POW_ASG, "^="}, 56 | { EQ , "==" }, 57 | { NEQ , "!="}, 58 | { LT, "<" }, 59 | { LTE, "<=" }, 60 | { GT, ">"}, 61 | { GTE, ">=" }, 62 | { PLUS , "+" }, 63 | { MINUS, "-" }, 64 | { MUL , "*" }, 65 | { DIV, "/" }, 66 | { MOD, "%" }, 67 | { POW, "^" }, 68 | { NOT, "!" }, 69 | { COMMA, "," }, 70 | { IO_IN, "<" }, 71 | { PIPE, "|" }, 72 | { DOLLAR, "$" }, 73 | { FIELD, "$" }, 74 | { 0, 0} 75 | } ; 76 | 77 | static int token_in_string_buff[] = { 78 | MATCH, INC_or_DEC , DOUBLE , STRING_ , ID , FUNCT_ID , 79 | BUILTIN , IO_OUT , 0 } ; 80 | 81 | /* if paren_cnt >0 and we see one of these, we are missing a ')' */ 82 | static int missing_rparen[] = 83 | { EOF, NL, SEMI_COLON, SC_FAKE_SEMI_COLON, RBRACE, 0 } ; 84 | 85 | /* ditto for '}' */ 86 | static int missing_rbrace[] = 87 | { EOF, BEGIN, END , 0 } ; 88 | 89 | static void missing(int c, const char* n , int ln) 90 | { const char *s0, *s1 ; 91 | 92 | if ( pfile_name ) 93 | { s0 = pfile_name ; s1 = ": " ; } 94 | else s0 = s1 = "" ; 95 | 96 | errmsg(0, "%s%sline %u: missing %c near %s" ,s0, s1, ln, c, n) ; 97 | } 98 | 99 | void yyerror(const char* s) 100 | /* we don't use s for input, 101 | (yacc and bison force this). 102 | We use s as a var to keep the compiler off our back */ 103 | { 104 | struct token_str *p ; 105 | int *ip ; 106 | 107 | s = 0 ; 108 | 109 | for ( p = token_str ; p->token ; p++ ) { 110 | if (current_token == p->token ) { 111 | s = p->str ; 112 | break ; 113 | } 114 | } 115 | 116 | if (!s) { 117 | unsigned i = 0 ; 118 | int tok ; 119 | while((tok = token_in_string_buff[i])) { 120 | if (current_token == tok) { 121 | s = string_buff ; 122 | break ; /* while */ 123 | } 124 | i++ ; 125 | } 126 | } 127 | 128 | if ( ! s ) /* search the keywords */ 129 | s = find_kw_str(current_token) ; 130 | 131 | if ( s ) 132 | { 133 | if ( paren_cnt ) 134 | for( ip = missing_rparen ; *ip ; ip++) 135 | if ( *ip == current_token ) 136 | { missing(')', s, token_lineno) ; 137 | paren_cnt = 0 ; 138 | goto done ; 139 | } 140 | 141 | if ( brace_cnt ) 142 | for( ip = missing_rbrace ; *ip ; ip++) 143 | if ( *ip == current_token ) 144 | { missing('}', s, token_lineno) ; 145 | brace_cnt = 0 ; 146 | goto done ; 147 | } 148 | 149 | compile_error("syntax error at or near %s", s) ; 150 | 151 | } 152 | else /* special cases */ 153 | switch ( current_token ) 154 | { 155 | case UNEXPECTED : 156 | unexpected_char() ; 157 | goto done ; 158 | 159 | case BAD_DECIMAL : 160 | compile_error( 161 | "syntax error in decimal constant %s", 162 | string_buff ) ; 163 | break ; 164 | 165 | case RE : 166 | compile_error( 167 | "syntax error at or near /%s/", 168 | string_buff ) ; 169 | break ; 170 | 171 | default : 172 | compile_error("syntax error") ; 173 | break ; 174 | } 175 | return ; 176 | 177 | done : 178 | if ( ++compile_error_count == MAX_COMPILE_ERRORS ) mawk_exit(2) ; 179 | } 180 | 181 | 182 | /* generic error message with a hook into the system error 183 | messages if errnum > 0 */ 184 | 185 | void errmsg (int errnum, const char * format,...) 186 | { 187 | va_list args ; 188 | fprintf(stderr, "%s: " , progname) ; 189 | va_start(args, format) ; 190 | vfprintf(stderr, format, args) ; 191 | va_end(args) ; 192 | 193 | if ( errnum > 0 ) fprintf(stderr, " (%s)" , strerror(errnum) ) ; 194 | 195 | fprintf( stderr, "\n") ; 196 | fflush(stderr) ; 197 | } 198 | 199 | void compile_error(const char* format, ...) 200 | { 201 | va_list args ; 202 | const char* s0; 203 | const char* s1; 204 | 205 | /* with multiple program files put program name in 206 | error message */ 207 | if ( pfile_name ) { 208 | s0 = pfile_name ; 209 | s1 = ": " ; 210 | } 211 | else { 212 | s0 = s1 = "" ; 213 | } 214 | 215 | fprintf(stderr, "%s: %s%sline %u: " , progname, s0, s1,token_lineno) ; 216 | va_start(args, format) ; 217 | vfprintf(stderr, format, args) ; 218 | va_end(args) ; 219 | fprintf(stderr, "\n") ; 220 | fflush(stderr) ; 221 | if ( ++compile_error_count == MAX_COMPILE_ERRORS ) mawk_exit(2) ; 222 | } 223 | 224 | void call_error(unsigned lineno, const char* format, ...) 225 | { 226 | va_list args ; 227 | const char* s0 = pfile_name ; 228 | const char* s1 = ": " ; 229 | 230 | if (!pfile_name) { 231 | s0 = s1 = "" ; 232 | } 233 | 234 | fprintf(stderr, "%s: %s%sline %u: " , progname, s0, s1,lineno) ; 235 | va_start(args, format) ; 236 | vfprintf(stderr, format, args) ; 237 | va_end(args) ; 238 | fprintf(stderr, "\n") ; 239 | fflush(stderr) ; 240 | if (++compile_error_count == MAX_COMPILE_ERRORS) mawk_exit(2) ; 241 | } 242 | 243 | void rt_error( const char * format, ...) 244 | { 245 | va_list args ; 246 | 247 | fprintf(stderr, "%s: run time error: " , progname ) ; 248 | va_start(args, format) ; 249 | vfprintf(stderr, format, args) ; 250 | va_end(args) ; 251 | fputc('\n',stderr) ; 252 | rt_where() ; 253 | mawk_exit(2) ; 254 | } 255 | 256 | void compile_or_rt_error(const char* format, ...) 257 | { 258 | /* up to caller not to exceed this buffer */ 259 | char buffer[1024] ; 260 | va_list args ; 261 | 262 | va_start(args,format) ; 263 | vsprintf(buffer, format, args) ; 264 | if (mawk_state == EXECUTION) { 265 | rt_error(buffer) ; 266 | } 267 | else { 268 | compile_error(buffer) ; 269 | } 270 | } 271 | 272 | void bozo(const char* s) 273 | { 274 | errmsg(0, "bozo: %s" , s) ; 275 | mawk_exit(3) ; 276 | } 277 | 278 | void overflow(const char* s, unsigned size) 279 | { 280 | errmsg(0 , "program limit exceeded: %s size=%u", s, size) ; 281 | mawk_exit(2) ; 282 | } 283 | 284 | 285 | /* print as much as we know about where a rt error occured */ 286 | 287 | static void rt_where(void) 288 | { 289 | if ( FILENAME->type != C_STRING ) cast1_to_s(FILENAME) ; 290 | 291 | fprintf(stderr, "\tFILENAME=\"%s\" FNR=%u NR=%u\n", 292 | string(FILENAME)->str, rt_fnr, rt_nr) ; 293 | } 294 | 295 | /* run time */ 296 | void rt_overflow(const char* s, unsigned size) 297 | { 298 | errmsg(0 , "program limit exceeded: %s size=%u", s, size) ; 299 | rt_where() ; 300 | mawk_exit(2) ; 301 | } 302 | 303 | void 304 | unexpected_char(void) 305 | { int c = yylval.ival ; 306 | 307 | fprintf(stderr, "%s: %u: ", progname, token_lineno) ; 308 | if ( c > ' ' && c < 127 ) 309 | fprintf(stderr, "unexpected character '%c'\n" , c) ; 310 | else 311 | fprintf(stderr, "unexpected character 0x%02x\n" , c) ; 312 | } 313 | 314 | static const char* 315 | type_to_str( int type ) 316 | { 317 | const char *retval ; 318 | 319 | switch( type ) { 320 | case ST_VAR : 321 | retval = "variable" ; 322 | break ; 323 | case ST_ARRAY : 324 | retval = "array" ; 325 | break ; 326 | case ST_FUNCT : 327 | retval = "function" ; 328 | break ; 329 | case ST_LOCAL_VAR : 330 | retval = "local variable" ; 331 | break ; 332 | case ST_LOCAL_ARRAY : 333 | retval = "local array" ; 334 | break ; 335 | default : 336 | bozo("type_to_str") ; 337 | /* not reached */ 338 | retval = 0 ; 339 | } 340 | return retval ; 341 | } 342 | 343 | /* emit an error message about a type clash */ 344 | void type_error(SYMTAB* p) 345 | { 346 | compile_error("illegal reference to %s %s", 347 | type_to_str(p->type) , p->name) ; 348 | } 349 | 350 | -------------------------------------------------------------------------------- /examples/ct_length.awk: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/mawk -f 2 | 3 | # ct_length.awk 4 | # 5 | # replaces all length 6 | # by length($0) 7 | # 8 | 9 | 10 | { 11 | 12 | while ( i = index($0, "length") ) 13 | { 14 | printf "%s" , substr($0,1, i+5) # ...length 15 | $0 = substr($0,i+6) 16 | 17 | if ( match($0, /^[ \t]*\(/) ) 18 | { 19 | # its OK 20 | printf "%s", substr($0, 1, RLENGTH) 21 | $0 = substr($0, RLENGTH+1) 22 | } 23 | else # length alone 24 | printf "($0)" 25 | 26 | } 27 | print 28 | } 29 | -------------------------------------------------------------------------------- /examples/decl.awk: -------------------------------------------------------------------------------- 1 | 2 | # parse a C declaration by recursive descent 3 | # based on a C program in KR ANSI edition 4 | # 5 | # run on a C file it finds the declarations 6 | # 7 | # restrictions: one declaration per line 8 | # doesn't understand struct {...} 9 | # makes assumptions about type names 10 | # 11 | # 12 | # some awks need double escapes on strings used as 13 | # regular expressions. If not run on mawk, use gdecl.awk 14 | 15 | 16 | ################################################ 17 | # lexical scanner -- gobble() 18 | # input : string s -- treated as a regular expression 19 | # gobble eats SPACE, then eats longest match of s off front 20 | # of global variable line. 21 | # Cuts the matched part off of line 22 | # 23 | 24 | 25 | function gobble(s, x) 26 | { 27 | sub( /^ /, "", line) # eat SPACE if any 28 | 29 | # surround s with parenthesis to make sure ^ acts on the 30 | # whole thing 31 | 32 | match(line, "^" "(" s ")") 33 | x = substr(line, 1, RLENGTH) 34 | line = substr(line, RLENGTH+1) 35 | return x 36 | } 37 | 38 | 39 | function ptr_to(n, x) # print "pointer to" , n times 40 | { n = int(n) 41 | if ( n <= 0 ) return "" 42 | x = "pointer to" ; n-- 43 | while ( n-- ) x = x " pointer to" 44 | return x 45 | } 46 | 47 | 48 | #recursively get a decl 49 | # returns an english description of the declaration or 50 | # "" if not a C declaration. 51 | 52 | function decl( x, t, ptr_part) 53 | { 54 | 55 | x = gobble("[* ]+") # get list of *** ... 56 | gsub(/ /, "", x) # remove all SPACES 57 | ptr_part = ptr_to( length(x) ) 58 | 59 | # We expect to see either an identifier or '(' 60 | # 61 | 62 | if ( gobble("\(") ) 63 | { 64 | # this is the recursive descent part 65 | # we expect to match a declaration and closing ')' 66 | # If not return "" to indicate failure 67 | 68 | if ( (x = decl()) == "" || gobble( "\)" ) == "" ) return "" 69 | 70 | } 71 | else # expecting an identifier 72 | { 73 | if ( (x = gobble(id)) == "" ) return "" 74 | x = x ":" 75 | } 76 | 77 | # finally look for () 78 | # or [ opt_size ] 79 | 80 | while ( 1 ) 81 | if ( gobble( funct_mark ) ) x = x " function returning" 82 | else 83 | if ( t = gobble( array_mark ) ) 84 | { gsub(/ /, "", t) 85 | x = x " array" t " of" 86 | } 87 | else break 88 | 89 | 90 | x = x " " ptr_part 91 | return x 92 | } 93 | 94 | 95 | BEGIN { id = "[_A-Za-z][_A-Za-z0-9]*" 96 | funct_mark = "\([ \t]*\)" 97 | array_mark = "\[[ \t]*[_A-Za-z0-9]*[ \t]*\]" 98 | 99 | # I've assumed types are keywords or all CAPS or end in _t 100 | # Other conventions could be added. 101 | 102 | type0 = "int|char|short|long|double|float|void" 103 | type1 = "[_A-Z][_A-Z0-9]*" # types are CAPS 104 | type2 = "[_A-Za-z][_A-Za-z0-9]*_t" # end in _t 105 | 106 | types = "(" type0 "|" type1 "|" type2 ")" 107 | } 108 | 109 | 110 | { 111 | 112 | gsub( "/\*([^*]|\*[^/])*(\*/|$)" , " ") # remove comments 113 | gsub( /[ \t]+/, " ") # squeeze white space to a single space 114 | 115 | 116 | line = $0 117 | 118 | scope = gobble( "extern|static" ) 119 | 120 | if ( type = gobble("(struct|union|enum) ") ) 121 | type = type gobble(id) # get the tag 122 | else 123 | { 124 | 125 | type = gobble("(un)?signed ") gobble( types ) 126 | 127 | } 128 | 129 | if ( ! type ) next 130 | 131 | if ( (x = decl()) && gobble( ";") ) 132 | { 133 | x = x " " type 134 | if ( scope ) x = x " (" scope ")" 135 | gsub( / +/, " ", x) # 136 | print x 137 | } 138 | 139 | } 140 | 141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /examples/deps.awk: -------------------------------------------------------------------------------- 1 | 2 | # find include dependencies in C source 3 | # 4 | # mawk -f deps.awk C_source_files 5 | # -- prints a dependency list suitable for make 6 | # -- ignores #include < > 7 | # 8 | 9 | 10 | BEGIN { stack_index = 0 # stack[] holds the input files 11 | 12 | for(i = 1 ; i < ARGC ; i++) 13 | { 14 | file = ARGV[i] 15 | if ( file !~ /\.[cC]$/ ) continue # skip it 16 | outfile = substr(file, 1, length(file)-2) ".o" 17 | 18 | # INCLUDED[] stores the set of included files 19 | # -- start with the empty set 20 | for( j in INCLUDED ) delete INCLUDED[j] 21 | 22 | while ( 1 ) 23 | { 24 | if ( getline line < file <= 0 ) # no open or EOF 25 | { close(file) 26 | if ( stack_index == 0 ) break # empty stack 27 | else 28 | { file = stack[ stack_index-- ] 29 | continue 30 | } 31 | } 32 | 33 | if ( line ~ /^#include[ \t]+".*"/ ) 34 | { 35 | split(line, X, "\"") # filename is in X[2] 36 | 37 | if ( X[2] in INCLUDED ) # we've already included it 38 | continue 39 | 40 | #push current file 41 | stack[ ++stack_index ] = file 42 | INCLUDED[ file = X[2] ] = "" 43 | } 44 | } # end of while 45 | 46 | # test if INCLUDED is empty 47 | flag = 0 # on once the front is printed 48 | for( j in INCLUDED ) 49 | if ( ! flag ) 50 | { printf "%s : %s" , outfile, j ; flag = 1 } 51 | else printf " %s" , j 52 | 53 | if ( flag ) print "" 54 | 55 | }# end of loop over files in ARGV[i] 56 | 57 | } 58 | -------------------------------------------------------------------------------- /examples/eatc.awk: -------------------------------------------------------------------------------- 1 | 2 | 3 | # eatc.awk 4 | # another program to remove comments 5 | # 6 | 7 | 8 | { while( t = index($0 , "/*") ) 9 | { 10 | printf "%s" , substr($0,1,t-1) 11 | $0 = eat_comment( substr($0, t+2) ) 12 | } 13 | 14 | print 15 | } 16 | 17 | 18 | function eat_comment(s, t) 19 | { 20 | #replace comment by one space 21 | printf " " 22 | 23 | while ( (t = index(s, "*/")) == 0 ) 24 | if ( getline s == 0 ) 25 | { # input error -- unterminated comment 26 | system("echo unterminated comment 1>&2") 27 | exit 1 28 | } 29 | 30 | return substr(s,t+2) 31 | } 32 | 33 | -------------------------------------------------------------------------------- /examples/gdecl.awk: -------------------------------------------------------------------------------- 1 | 2 | # parse a C declaration by recursive descent 3 | # 4 | # decl.awk with extra escapes \ 5 | 6 | ################################################ 7 | ############################################ 8 | 9 | 10 | # lexical scanner -- gobble() 11 | # input : string s -- treated as a regular expression 12 | # gobble eats SPACE, then eats longest match of s off front 13 | # of global variable line. 14 | # Cuts the matched part off of line 15 | # 16 | 17 | 18 | function gobble(s, x) 19 | { 20 | sub( /^ /, "", line) # eat SPACE if any 21 | 22 | # surround s with parenthesis to make sure ^ acts on the 23 | # whole thing 24 | 25 | match(line, "^" "(" s ")") 26 | x = substr(line, 1, RLENGTH) 27 | line = substr(line, RLENGTH+1) 28 | return x 29 | } 30 | 31 | 32 | function ptr_to(n, x) # print "pointer to" , n times 33 | { n = int(n) 34 | if ( n <= 0 ) return "" 35 | x = "pointer to" ; n-- 36 | while ( n-- ) x = x " pointer to" 37 | return x 38 | } 39 | 40 | 41 | #recursively get a decl 42 | # returns an english description of the declaration or 43 | # "" if not a C declaration. 44 | 45 | function decl( x, t, ptr_part) 46 | { 47 | 48 | x = gobble("[* ]+") # get list of *** ... 49 | gsub(/ /, "", x) # remove all SPACES 50 | ptr_part = ptr_to( length(x) ) 51 | 52 | # We expect to see either an identifier or '(' 53 | # 54 | 55 | if ( gobble("\\(") ) 56 | { 57 | # this is the recursive descent part 58 | # we expect to match a declaration and closing ')' 59 | # If not return "" to indicate failure 60 | 61 | if ( (x = decl()) == "" || gobble( "\\)" ) == "" ) return "" 62 | 63 | } 64 | else # expecting an identifier 65 | { 66 | if ( (x = gobble(id)) == "" ) return "" 67 | x = x ":" 68 | } 69 | 70 | # finally look for () 71 | # or [ opt_size ] 72 | 73 | while ( 1 ) 74 | if ( gobble( funct_mark ) ) x = x " function returning" 75 | else 76 | if ( t = gobble( array_mark ) ) 77 | { gsub(/ /, "", t) 78 | x = x " array" t " of" 79 | } 80 | else break 81 | 82 | 83 | x = x " " ptr_part 84 | return x 85 | } 86 | 87 | 88 | BEGIN { id = "[_A-Za-z][_A-Za-z0-9]*" 89 | funct_mark = "\\([ \t]*\\)" 90 | array_mark = "\\[[ \t]*[_A-Za-z0-9]*[ \t]*\\]" 91 | 92 | # I've assumed types are keywords or all CAPS or end in _t 93 | # Other conventions could be added. 94 | 95 | type0 = "int|char|short|long|double|float|void" 96 | type1 = "[_A-Z][_A-Z0-9]*" # types are CAPS 97 | type2 = "[_A-Za-z][_A-Za-z0-9]*_t" # end in _t 98 | 99 | types = "(" type0 "|" type1 "|" type2 ")" 100 | } 101 | 102 | 103 | { 104 | 105 | gsub( /\/\*([^*]|\*[^\/])*(\*\/|$)/ , " ") # remove comments 106 | gsub( /[ \t]+/, " ") # squeeze white space to a single space 107 | 108 | 109 | line = $0 110 | 111 | scope = gobble( "extern|static" ) 112 | 113 | if ( type = gobble("(struct|union|enum) ") ) 114 | type = type gobble(id) # get the tag 115 | else 116 | { 117 | 118 | type = gobble("(un)?signed ") gobble( types ) 119 | 120 | } 121 | 122 | if ( ! type ) next 123 | 124 | if ( (x = decl()) && gobble( ";") ) 125 | { 126 | x = x " " type 127 | if ( scope ) x = x " (" scope ")" 128 | gsub( / +/, " ", x) # 129 | print x 130 | } 131 | 132 | } 133 | 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /examples/hical: -------------------------------------------------------------------------------- 1 | : 2 | # @(#) hical - displays previous, current & next months - today highlighted 3 | # @(#) an "internationalizationable" version of a 3-month 'cal' display, it 4 | # @(#) may be edited for week to start with Sun or Mon & for local language 5 | 6 | prog=/tmp/hical.$$ ; trap 'rm -f $prog ; trap 0 ; exit' 0 1 2 3 15 7 | 8 | : ${so:=`tput smso`} ${se:=`tput rmso`} 9 | 10 | # USER EDITS MAY BE REQUIRED for the arguments to the 'date' command 11 | # the script presumes 'date' recognizes these arguments in these ways: 12 | # w - Day of the week - Sunday = 0 13 | # m - Month of year - 01 to 12 14 | # d - Day of month - 01 to 31 15 | # T - Time as HH:MM:SS 16 | # Y - Year (including century), as decimal numbers 17 | DATE_ARGS='%w %m %d %T 19%y' 18 | 19 | # the 'awk' program file is written to a temporary file to avoid any 20 | # "arg list too long" error messages, yet have all the code in one file 21 | # observe when editing the program file that '\n' must be '\\n' 22 | # NOTE: for the 'bash' shell on Linux, use 'echo -e' in the next line 23 | echo '{ 24 | # USER EDITS MAY BE REQUIRED (for FMT, day & month names, and the time stuff) 25 | # FMT = 0 # for weekdays ordered "Mo Tu We Th Fr Sa Su" 26 | FMT = 1 # for weekdays ordered "Su Mo Tu We Th Fr Sa" 27 | Header[0] = "Mo Tu We Th Fr Sa Su" 28 | Header[1] = "Su Mo Tu We Th Fr Sa" 29 | months = "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec" 30 | time_is = "The time is:" ; time_fmt = "%s %s %s %s\\n" 31 | # NO MORE USER EDITS REQUIRED (I think!) 32 | split(months,M_Name) ; split("31 28 31 30 31 30 31 31 30 31 30 31",M_Len) 33 | daynum = $1 + FMT 34 | Mon[2] = $2 + 0 35 | today = $3 + 0 36 | time = $4 37 | Year[1] = Year[2] = Year[3] = $NF 38 | if ( Mon[2] == 1 ) { Year[1] = Year[1] - 1 ; Mon[1] = 12 } 39 | else { Mon[1] = Mon[2] - 1 } 40 | if ( Mon[2] == 12 ) { Year[3] = Year[3] + 1 ; Mon[3] = 1 } 41 | else { Mon[3] = Mon[2] + 1 } 42 | if ( Year[2] % 4 == 0 && \ 43 | Year[2] % 100 != 0 || \ 44 | Year[2] % 400 == 0 ) M_Len[2] = 29 45 | Start[2] = 7 - ( ( today - daynum ) % 7 ) 46 | Start[1] = 7 - ( ( M_Len[Mon[1]] - Start[2] ) % 7 ) 47 | Start[3] = ( M_Len[Mon[2]] + Start[2] ) % 7 48 | for (i=1;i<=3;i++) { while ( Start[i] >= 7 ) Start[i] -= 7 } 49 | for (mm=1;mm<=3;mm++) { 50 | if ( Year[mm] != Year[mm-1] ) 51 | printf( "%s %s %s\\n", so, Year[mm], se ) 52 | if ( mm == 1 ) printf( "%s %s %s\\n", so, Header[FMT], se ) 53 | j = k = 1 54 | while ( j <= M_Len[Mon[mm]] ) { 55 | line = "" 56 | for (i=1;i<=7;i++) { 57 | if ( Start[mm] > 0 || j > M_Len[Mon[mm]] ) { date = "" ; Start[mm]-- } 58 | else date = j++ 59 | if ( mm == 2 && date == today ) { So = so ; Se = se } 60 | else { So = Se = "" } 61 | line = sprintf( "%s%s%2s%s ", line, So, date, Se ) 62 | } 63 | m1 = substr(M_Name[Mon[mm]],k++,1) 64 | printf( "%s %1s %s %s%s %s\\n", so, m1, se, line, so, se ) 65 | } 66 | } 67 | printf( time_fmt, so, time_is, time, se ) 68 | }' >$prog 69 | 70 | date +"$DATE_ARGS" | ${AWK:=mawk} -f $prog so=$so se=$se 71 | 72 | exit 0 73 | 74 | # EOF 'hical' - Tue Dec 19 19:19:19 EST 1994 75 | # Bob Stockler - bob@trebor.iglou.com - CIS: 72726,452 76 | -------------------------------------------------------------------------------- /examples/nocomment.awk: -------------------------------------------------------------------------------- 1 | 2 | # remove C comments from a list of files 3 | # using a comment as the record separator 4 | # 5 | # this is trickier than I first thought 6 | # The first version in .97-.9993 was wrong 7 | 8 | BEGIN { 9 | # RS is set to a comment (this is mildly tricky, I blew it here 10 | RS = "/\*([^*]|\*+[^*/])*\*+/" 11 | ORS = " " 12 | getline hold 13 | filename = FILENAME 14 | } 15 | 16 | # if changing files 17 | filename != FILENAME { 18 | filename = FILENAME 19 | printf "%s" , hold 20 | hold = $0 21 | next 22 | } 23 | 24 | { # hold one record because we don't want ORS on the last 25 | # record in each file 26 | print hold 27 | hold = $0 28 | } 29 | 30 | END { printf "%s", hold } 31 | -------------------------------------------------------------------------------- /examples/primes.awk: -------------------------------------------------------------------------------- 1 | 2 | # primes.awk 3 | # 4 | # mawk -f primes.awk [START] STOP 5 | # find all primes between 2 and STOP 6 | # or START and STOP 7 | # 8 | 9 | 10 | 11 | function usage() 12 | { printf("usage: %s [start] stop\n", ARGV[0]) > "/dev/stderr" 13 | exit 1 14 | } 15 | 16 | 17 | BEGIN { if (ARGC == 1 || ARGC > 3 ) usage() 18 | if ( ARGC == 2 ) { start = 2 ; stop = ARGV[1]+0 } 19 | else 20 | if ( ARGC == 3 ) { start = ARGV[1]+0 ; stop = ARGV[2]+0 } 21 | 22 | if ( start < 2 ) start = 2 23 | if ( stop < start ) stop = start 24 | 25 | prime[ p_cnt = 1 ] = 3 # keep primes in prime[] 26 | 27 | # keep track of integer part of square root by adding 28 | # odd integers 29 | odd = test = 5 30 | root = 2 31 | squares = 9 32 | 33 | 34 | while ( test <= stop ) 35 | { 36 | if ( test >= squares ) 37 | { root++ 38 | odd += 2 39 | squares += odd 40 | } 41 | 42 | flag = 1 43 | for ( i = 1 ; prime[i] <= root ; i++ ) 44 | if ( test % prime[i] == 0 ) # not prime 45 | { flag = 0 ; break } 46 | 47 | if ( flag ) prime[ ++p_cnt ] = test 48 | 49 | test += 2 50 | } 51 | 52 | prime[0] = 2 53 | 54 | for( i = 0 ; prime[i] < start ; i++) ; 55 | 56 | for ( ; i <= p_cnt ; i++ ) print prime[i] 57 | 58 | } 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /examples/qsort.awk: -------------------------------------------------------------------------------- 1 | 2 | 3 | # qsort text files 4 | # 5 | 6 | function middle(x,y,z) #return middle of 3 7 | { 8 | if ( x <= y ) 9 | { if ( z >= y ) return y 10 | if ( z < x ) return x 11 | return z 12 | } 13 | 14 | if ( z >= x ) return x 15 | if ( z < y ) return y 16 | return z 17 | } 18 | 19 | 20 | function isort(A , n, i, j, hold) 21 | { 22 | # if needed a sentinal at A[0] will be created 23 | 24 | for( i = 2 ; i <= n ; i++) 25 | { 26 | hold = A[ j = i ] 27 | while ( A[j-1] > hold ) 28 | { j-- ; A[j+1] = A[j] } 29 | 30 | A[j] = hold 31 | } 32 | } 33 | 34 | 35 | # recursive quicksort 36 | function qsort(A, left, right ,i , j, pivot, hold) 37 | { 38 | 39 | pivot = middle(A[left], A[int((left+right)/2)], A[right]) 40 | 41 | i = left 42 | j = right 43 | 44 | while ( i <= j ) 45 | { 46 | while ( A[i] < pivot ) i++ 47 | while ( A[j] > pivot ) j-- 48 | 49 | if ( i <= j ) 50 | { hold = A[i] 51 | A[i++] = A[j] 52 | A[j--] = hold 53 | } 54 | } 55 | 56 | if ( j - left > BLOCK ) qsort(A,left,j) 57 | if ( right - i > BLOCK ) qsort(A,i,right) 58 | } 59 | 60 | BEGIN { BLOCK = 5 } 61 | 62 | 63 | { line[NR] = $0 "" # sort as string 64 | } 65 | 66 | END { 67 | 68 | if ( NR > BLOCK ) qsort(line, 1, NR) 69 | 70 | isort(line, NR) 71 | 72 | for(i = 1 ; i <= NR ; i++) print line[i] 73 | } 74 | 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /field.h: -------------------------------------------------------------------------------- 1 | /******************************************** 2 | field.h 3 | copyright 1991-1995,2014-2016 Michael D. Brennan 4 | 5 | This is a source file for mawk, an implementation of 6 | the AWK programming language. 7 | 8 | Mawk is distributed without warranty under the terms of 9 | the GNU General Public License, version 3, 2007. 10 | 11 | If you import elements of this code into another product, 12 | you agree to not name that product mawk. 13 | ********************************************/ 14 | 15 | 16 | /* field.h */ 17 | 18 | #ifndef MAWK_FIELD_H 19 | #define MAWK_FIELD_H 1 20 | 21 | #include "types.h" 22 | 23 | extern void set_field0(const char *, size_t); 24 | extern void split_field0(void); 25 | extern void field_assign(CELL *, CELL *); 26 | extern char *is_string_split(PTR, size_t *); 27 | extern void slow_cell_assign(CELL *, CELL *); 28 | extern CELL *slow_field_ptr(int); 29 | extern int field_addr_to_index(CELL *); 30 | extern void set_binmode(int); 31 | 32 | #define NUM_PFIELDS 5 33 | extern CELL field[FBANK_SZ + NUM_PFIELDS]; 34 | /* $0, $1 ... $(FBANK_SZ-1), NF, RS, RS, CONVFMT, OFMT */ 35 | 36 | /* more fields if needed go here */ 37 | extern CELL **fbankv; /* fbankv[0] == field */ 38 | 39 | /* index to CELL * for a field */ 40 | #define field_ptr(i) ((i) < FBANK_SZ ? field + (i) : slow_field_ptr(i)) 41 | 42 | /* some, such as RS may be defined in system-headers */ 43 | #undef NF 44 | #undef RS 45 | #undef FS 46 | #undef CONVFMT 47 | #undef OFMT 48 | 49 | /* some compilers choke on (NF-field) in a case statement 50 | even though it's constant so ... 51 | */ 52 | #define NF_field FBANK_SZ 53 | #define RS_field (FBANK_SZ + 1) 54 | #define FS_field (FBANK_SZ + 2) 55 | #define CONVFMT_field (FBANK_SZ + 3) 56 | #define OFMT_field (FBANK_SZ + 4) 57 | 58 | /* the pseudo fields, assignment has side effects */ 59 | #define NF (field + NF_field) /* must be first */ 60 | #define RS (field + RS_field) 61 | #define FS (field + FS_field) 62 | #define CONVFMT (field + CONVFMT_field) 63 | #define OFMT (field + OFMT_field) /* must be last */ 64 | 65 | #define LAST_PFIELD OFMT 66 | 67 | extern int nf; /* shadows NF */ 68 | 69 | /* a shadow type for RS and FS */ 70 | #define SEP_SPACE 0 71 | #define SEP_CHAR 1 72 | #define SEP_STR 2 73 | #define SEP_RE 3 74 | #define SEP_MLR 4 75 | 76 | typedef struct { 77 | char type; 78 | char c; 79 | PTR ptr; /* STRING* or RE machine* */ 80 | } SEPARATOR; 81 | 82 | extern SEPARATOR rs_shadow; 83 | extern CELL fs_shadow; 84 | 85 | /* types for splitting overflow */ 86 | 87 | typedef struct spov { 88 | struct spov *link; 89 | STRING *sval; 90 | } SPLIT_OV; 91 | 92 | extern SPLIT_OV *split_ov_list; 93 | 94 | #endif /* MAWK_FIELD_H */ 95 | -------------------------------------------------------------------------------- /files.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | files.c 4 | copyright 1991-94,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | /* files.c */ 18 | 19 | #include 20 | 21 | #include "mawk.h" 22 | #include "files.h" 23 | #include "memory.h" 24 | #include "fin.h" 25 | 26 | static FILE * tfopen(const char *, const char *) ; 27 | static void efflush(FILE*) ; 28 | 29 | 30 | /* We store dynamically created files on a linked linear 31 | list with move to the front (big surprise) */ 32 | 33 | typedef struct file 34 | { 35 | struct file *link ; 36 | STRING *name ; 37 | int type ; 38 | PTR ptr ; /* FIN* or FILE* */ 39 | } 40 | FILE_NODE ; 41 | 42 | static FILE_NODE *file_list ; 43 | 44 | static FILE_NODE* std_err ; 45 | static FILE_NODE* std_out ; 46 | 47 | 48 | /* find a file on file_list 49 | if not in the list open it 50 | return is a FIN* if input file 51 | FILE* if output file 52 | 53 | return is 0 on failure to open input 54 | error exit 2 on failure to open output 55 | */ 56 | PTR 57 | file_find(STRING* sval, int type) 58 | { 59 | FILE_NODE *p = file_list ; 60 | FILE_NODE *q = (FILE_NODE *) 0 ; 61 | const char *name = sval->str ; 62 | const char *ostr ; 63 | 64 | while (1) 65 | { 66 | if (!p) 67 | { 68 | /* open a new one */ 69 | p = ZMALLOC(FILE_NODE) ; 70 | 71 | switch (p->type = type) 72 | { 73 | case F_TRUNC: 74 | #if MSDOS 75 | ostr = (binmode() & 2) ? "wb" : "w" ; 76 | #else 77 | ostr = "w" ; 78 | #endif 79 | if (!(p->ptr = (PTR) tfopen(name, ostr))) 80 | goto out_failure ; 81 | break ; 82 | 83 | case F_APPEND: 84 | #if MSDOS 85 | ostr = (binmode() & 2) ? "ab" : "a" ; 86 | #else 87 | ostr = "a" ; 88 | #endif 89 | if (!(p->ptr = (PTR) tfopen(name, ostr))) 90 | goto out_failure ; 91 | break ; 92 | 93 | case F_IN: 94 | if (!(p->ptr = (PTR) FINopen(name, 0))) 95 | { 96 | zfree(p, sizeof(FILE_NODE)) ; 97 | return (PTR) 0 ; 98 | } 99 | break ; 100 | 101 | case PIPE_OUT: 102 | { 103 | FILE* outp = popen(name,"w") ; 104 | if (outp == 0) goto out_failure ; 105 | p->ptr = outp ; 106 | flush_all_output() ; 107 | } 108 | break ; 109 | 110 | case PIPE_IN: 111 | { 112 | FIN* fin = FINpopen(name) ; 113 | if (!fin) { 114 | zfree(p, sizeof(FILE_NODE)) ; 115 | return (PTR) 0 ; 116 | } 117 | p->ptr = fin ; 118 | } 119 | break ; 120 | 121 | #ifdef DEBUG 122 | default: 123 | bozo("bad file type") ; 124 | #endif 125 | } 126 | /* successful open */ 127 | p->name = sval ; 128 | sval->ref_cnt++ ; 129 | break ; /* while loop */ 130 | } 131 | 132 | /* search is by name and type */ 133 | if (strcmp(name, p->name->str) == 0 && 134 | (p->type == type || 135 | /* no distinction between F_APPEND and F_TRUNC here */ 136 | (p->type >= F_APPEND && type >= F_APPEND))) 137 | 138 | { 139 | /* found */ 140 | if (!q) /*at front of list */ 141 | return p->ptr ; 142 | /* delete from list for move to front */ 143 | q->link = p->link ; 144 | break ; /* while loop */ 145 | } 146 | 147 | q = p ; p = p->link ; 148 | } /* end while loop */ 149 | 150 | /* put p at the front of the list */ 151 | p->link = file_list ; 152 | file_list = p ; 153 | return p->ptr ; 154 | 155 | out_failure: 156 | errmsg(errno, "cannot open \"%s\" for output", name) ; 157 | mawk_exit(2) ; 158 | /* not reached, shutup -Wall */ 159 | return 0 ; 160 | } 161 | 162 | 163 | /* Close a file and delete it's node from the file_list. 164 | Walk the whole list, in case a name has two nodes, 165 | e.g. < "/dev/tty" and > "/dev/tty" 166 | 167 | This only gets called from bi_close, i.e. the 168 | user has explicitly called close 169 | 170 | We will ignore user's request to close /dev/stderr so 171 | that stderr is valid for our error messages 172 | */ 173 | 174 | int 175 | file_close(STRING* sval) 176 | { 177 | FILE_NODE dummy ; 178 | register FILE_NODE *p ; 179 | FILE_NODE *q = &dummy ; /* trails p */ 180 | FILE_NODE *hold ; 181 | char *name = sval->str ; 182 | int retval = -1 ; 183 | int error_exit = 0 ; 184 | int not_found = 1 ; 185 | 186 | if (strcmp(name, "/dev/stderr") == 0) { 187 | fflush(stderr) ; 188 | return 0 ; 189 | } 190 | 191 | dummy.link = p = file_list ; 192 | while (p) 193 | { 194 | if (strcmp(name, p->name->str) == 0) 195 | { 196 | not_found = 0 ; 197 | switch (p->type) 198 | { 199 | case F_TRUNC: 200 | case F_APPEND: 201 | { 202 | FILE* fp = (FILE*) p->ptr ; 203 | if (fclose(fp) == -1) { 204 | errmsg(errno, "close error on file %s", name) ; 205 | error_exit = 1 ; 206 | } 207 | else { 208 | retval = 0 ; 209 | } 210 | } 211 | break ; 212 | 213 | case PIPE_OUT: 214 | retval = pclose((FILE*) p->ptr) ; 215 | if (retval == -1) { 216 | errmsg(errno, "close error on pipe | %s", name) ; 217 | error_exit = 1 ; 218 | } 219 | break ; 220 | 221 | case F_IN: 222 | case PIPE_IN: 223 | retval = FINclose((FIN *) p->ptr) ; 224 | break ; 225 | 226 | } 227 | 228 | free_STRING(p->name) ; 229 | hold = p ; 230 | q->link = p = p->link ; 231 | ZFREE(hold) ; 232 | } 233 | else 234 | { 235 | q = p ; 236 | p = p->link ; 237 | } 238 | } 239 | 240 | file_list = dummy.link ; 241 | if (error_exit) mawk_exit(2) ; 242 | if (not_found) { 243 | errmsg(0, "close on \"%s\" failed (not an open file)", name) ; 244 | } 245 | return retval ; 246 | } 247 | 248 | /* report an output error and close the file */ 249 | void write_error(FILE* fp) 250 | { 251 | FILE_NODE* p = file_list ; 252 | FILE_NODE* q = 0 ; 253 | while(p) { 254 | if (p->ptr == (void*) fp) { 255 | if (q == 0) { 256 | file_list = file_list->link ; 257 | } 258 | else { 259 | q->link = p->link ; 260 | } 261 | break ; /* while */ 262 | } 263 | else { 264 | q = p ; 265 | p = p->link ; 266 | } 267 | } 268 | fclose(fp) ; 269 | if (p) { /* this test should never fail */ 270 | const char* type = p->type == PIPE_OUT ? "pipe | " : "file" ; 271 | errmsg(errno, "write error to %s %s", type, p->name->str) ; 272 | zfree(p, sizeof(FILE_NODE)) ; 273 | } 274 | } 275 | 276 | 277 | /* 278 | find an output file with name == sval and fflush it 279 | */ 280 | 281 | int 282 | file_flush(STRING* sval) 283 | { 284 | int ret = -1 ; 285 | register FILE_NODE *p = file_list ; 286 | unsigned len = sval->len ; 287 | char *name = sval->str ; 288 | 289 | if (len==0) 290 | { 291 | /* for consistency with gawk */ 292 | flush_all_output() ; 293 | return 0 ; 294 | } 295 | 296 | while( p ) 297 | { 298 | if ( IS_OUTPUT(p->type) && 299 | len == p->name->len && 300 | strcmp(name,p->name->str) == 0 ) 301 | { 302 | ret = 0 ; 303 | efflush((FILE*)p->ptr) ; 304 | /* it's possible for a command and a file to have the same 305 | name -- so keep looking */ 306 | } 307 | p = p->link ; 308 | } 309 | if (ret == -1) { 310 | errmsg(0, "flush error on %s (not an open file)", name) ; 311 | } 312 | return ret ; 313 | } 314 | 315 | void 316 | flush_all_output(void) 317 | { 318 | FILE_NODE *p ; 319 | 320 | for(p=file_list; p ; p = p->link) 321 | if (IS_OUTPUT(p->type)) efflush((FILE*)p->ptr) ; 322 | } 323 | 324 | static void 325 | efflush(FILE* fp) 326 | { 327 | if (fflush(fp) < 0) 328 | { 329 | write_error(fp) ; 330 | mawk_exit(2) ; 331 | } 332 | } 333 | 334 | /* we close output to check for write errors that might not have shown up 335 | yet because of buffering */ 336 | 337 | int 338 | close_all_output(void) 339 | { 340 | FILE_NODE *p = file_list ; 341 | int ret = 0 ; 342 | 343 | while (p) 344 | { 345 | if (IS_OUTPUT(p->type) && p != std_err) 346 | { 347 | int r ; 348 | FILE* f = (FILE*) p->ptr ; 349 | if (p->type == PIPE_OUT) { 350 | r = pclose(f) ; 351 | } 352 | else { 353 | r = fclose(f) ; 354 | } 355 | if (r == -1) { 356 | ret = -1 ; 357 | errmsg(errno, "close error on %s %s", 358 | p->type == PIPE_OUT ? "pipe |" : "file", 359 | p->name->str) ; 360 | } 361 | } 362 | p = p->link ; 363 | } 364 | return ret ; 365 | } 366 | 367 | 368 | /* put stdout and stderr in file_list */ 369 | void 370 | set_stdoutput(void) 371 | { 372 | FILE_NODE *p, *q ; 373 | 374 | std_out = p = ZMALLOC(FILE_NODE) ; 375 | p->link = (FILE_NODE*) 0 ; 376 | p->type = F_TRUNC ; 377 | p->name = new_STRING("/dev/stdout") ; 378 | p->ptr = (PTR) stdout ; 379 | std_err = q = ZMALLOC(FILE_NODE); 380 | q->link = p ; 381 | q->type = F_TRUNC ; 382 | q->name = new_STRING("/dev/stderr") ; 383 | q->ptr = (PTR) stderr ; 384 | file_list = q ; 385 | } 386 | 387 | /* fopen() but no buffering to ttys */ 388 | static FILE * 389 | tfopen(const char* name, const char* mode) 390 | { 391 | FILE *retval = fopen(name, mode) ; 392 | 393 | if (retval) 394 | { 395 | if (isatty(fileno(retval))) setbuf(retval, (char *) 0) ; 396 | else 397 | { 398 | #ifdef MSDOS 399 | enlarge_output_buffer(retval) ; 400 | #endif 401 | } 402 | } 403 | return retval ; 404 | } 405 | 406 | #ifdef MSDOS 407 | void 408 | enlarge_output_buffer(fp) 409 | FILE *fp ; 410 | { 411 | if (setvbuf(fp, (char *) 0, _IOFBF, BUFFSZ) < 0) 412 | { 413 | errmsg(errno, "setvbuf failed on fileno %d", fileno(fp)) ; 414 | mawk_exit(2) ; 415 | } 416 | } 417 | 418 | void 419 | stdout_init(void) 420 | { 421 | if (!isatty(1)) enlarge_output_buffer(stdout) ; 422 | if (binmode() & 2) 423 | { 424 | setmode(1,O_BINARY) ; setmode(2,O_BINARY) ; 425 | } 426 | } 427 | #endif /* MSDOS */ 428 | -------------------------------------------------------------------------------- /files.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | files.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | #ifndef FILES_H 18 | #define FILES_H 19 | 20 | /* IO redirection types */ 21 | #define F_IN (-5) 22 | #define PIPE_IN (-4) 23 | #define PIPE_OUT (-3) 24 | #define F_APPEND (-2) 25 | #define F_TRUNC (-1) 26 | #define IS_OUTPUT(type) ((type)>=PIPE_OUT) 27 | 28 | extern const char* const shell ; /* for pipes and system() */ 29 | 30 | PTR file_find(STRING *, int) ; 31 | int file_close(STRING *) ; 32 | int file_flush(STRING *) ; 33 | void flush_all_output(void) ; 34 | int close_all_output() ; 35 | 36 | void write_error(FILE*) ; 37 | 38 | #if MSDOS 39 | int DOSexec(char *) ; 40 | int binmode(void) ; 41 | void set_binmode(int) ; 42 | void enlarge_output_buffer(FILE*) ; 43 | #endif 44 | 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /fin.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | fin.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | /* fin.h */ 17 | 18 | #ifndef FIN_H 19 | #define FIN_H 20 | /* structure to control input files */ 21 | 22 | typedef struct { 23 | int fd ; 24 | FILE *fp ; 25 | char *buff ; /* base */ 26 | char *start ; /* start of next record */ 27 | char* end ; /* end of data in buff[] */ 28 | size_t buffsz ; /* size of buff[] */ 29 | int flags ; 30 | int close_val ; /* return value for close */ 31 | } FIN ; 32 | 33 | /* fd and fp in FIN: three possiblities. 34 | interactive -- reading with fgets, fp is valid fd is -1 35 | block reading a file -- fd is valid >= 0, fp is 0 36 | block reading an input pipe -- fd is valid, used for read 37 | fp is valid, used for pclose() 38 | */ 39 | 40 | #define MAIN_FLAG 1 /* part of main input stream if on */ 41 | #define EOF_FLAG 2 42 | #define START_FLAG 4 /* used when RS == "" */ 43 | 44 | FIN * FINdopen(int, int) ; 45 | FIN * FINopen(const char *, int) ; 46 | FIN* FINpopen(const char*) ; 47 | int FINclose(FIN *) ; 48 | void FINsemi_close(FIN *) ; 49 | char* FINgets(FIN *, size_t *) ; 50 | size_t fillbuff(int, char *, size_t) ; 51 | 52 | extern FIN *main_fin ; /* for the main input stream */ 53 | void open_main(void) ; 54 | FIN* next_main(int) ; 55 | 56 | #define setmode setmode_ /* avoid conflict with declaration in on BitRig BSD */ 57 | void setmode(int,int) ; 58 | #endif /* FIN_H */ 59 | -------------------------------------------------------------------------------- /hash.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | hash.c 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | /* hash.c */ 18 | 19 | #include "mawk.h" 20 | #include "memory.h" 21 | #include "symtype.h" 22 | 23 | /* 24 | * FNV-1a hash function 25 | * http://www.isthe.com/chongo/tech/comp/fnv/index.html 26 | */ 27 | unsigned 28 | hash(const char *s) 29 | { 30 | /* FNV-1a */ 31 | register unsigned h = 2166136261U; 32 | 33 | while (*s) { 34 | h ^= (unsigned char) (*s++); 35 | h *= 16777619U; 36 | } 37 | return h; 38 | } 39 | 40 | unsigned 41 | hash2(const char *s, size_t len) 42 | { 43 | /* FNV-1a */ 44 | register unsigned h = 2166136261U; 45 | 46 | while (len > 0) { 47 | h ^= (unsigned char) (*s++); 48 | h *= 16777619U; 49 | len-- ; 50 | } 51 | return h; 52 | } 53 | 54 | typedef struct hash 55 | { 56 | struct hash *link ; 57 | SYMTAB symtab ; 58 | } HASHNODE ; 59 | 60 | #if defined(__cplusplus) 61 | #define delete delete_ 62 | #endif 63 | 64 | static HASHNODE *delete(const char *) ; 65 | 66 | static HASHNODE *hash_table[HASH_PRIME] ; 67 | 68 | /* 69 | insert a string in the symbol table. 70 | Caller knows the symbol is not there 71 | -- used for initialization 72 | */ 73 | 74 | SYMTAB * 75 | insert(const char* s) 76 | { 77 | register HASHNODE *p = ZMALLOC(HASHNODE) ; 78 | register unsigned h ; 79 | 80 | p->link = hash_table[h = hash(s) % HASH_PRIME] ; 81 | p->symtab.name = s ; 82 | hash_table[h] = p ; 83 | return &p->symtab ; 84 | } 85 | 86 | /* Find s in the symbol table, 87 | if not there insert it, s must be dup'ed */ 88 | 89 | SYMTAB * 90 | find(const char* s) 91 | { 92 | register HASHNODE *p ; 93 | HASHNODE *q ; 94 | unsigned h ; 95 | 96 | p = hash_table[h = hash(s) % HASH_PRIME] ; 97 | q = (HASHNODE *) 0 ; 98 | while (1) 99 | { 100 | if (!p) 101 | { 102 | p = ZMALLOC(HASHNODE) ; 103 | p->symtab.type = ST_NONE ; 104 | p->symtab.name = strcpy((char *)zmalloc(strlen(s) + 1), s) ; 105 | break ; 106 | } 107 | 108 | if (strcmp(p->symtab.name, s) == 0) /* found */ 109 | { 110 | if (!q) /* already at the front */ 111 | return &p->symtab ; 112 | else /* delete from the list */ 113 | { 114 | q->link = p->link ; break ; 115 | } 116 | } 117 | 118 | q = p ; p = p->link ; 119 | } 120 | /* put p on front of the list */ 121 | p->link = hash_table[h] ; 122 | hash_table[h] = p ; 123 | return &p->symtab ; 124 | } 125 | 126 | 127 | /* remove a node from the hash table 128 | return a ptr to the node */ 129 | 130 | static unsigned last_hash ; 131 | 132 | static HASHNODE * 133 | delete(const char* s) 134 | { 135 | register HASHNODE *p ; 136 | HASHNODE *q = (HASHNODE *) 0 ; 137 | unsigned h ; 138 | 139 | p = hash_table[last_hash = h = hash(s) % HASH_PRIME] ; 140 | while (p) 141 | { 142 | if (strcmp(p->symtab.name, s) == 0) /* found */ 143 | { 144 | if (q) q->link = p->link ; 145 | else hash_table[h] = p->link ; 146 | return p ; 147 | } 148 | else 149 | { 150 | q = p ; p = p->link ; 151 | } 152 | } 153 | 154 | #ifdef DEBUG /* we should not ever get here */ 155 | bozo("delete") ; 156 | #endif 157 | return (HASHNODE *) 0 ; 158 | } 159 | 160 | /* when processing user functions, global ids which are 161 | replaced by local ids are saved on this list */ 162 | 163 | static HASHNODE *save_list ; 164 | 165 | /* store a global id on the save list, 166 | return a ptr to the local symtab */ 167 | SYMTAB * 168 | save_id(const char* s) 169 | { 170 | HASHNODE *p, *q ; 171 | unsigned h ; 172 | 173 | p = delete(s) ; 174 | q = ZMALLOC(HASHNODE) ; 175 | q->symtab.type = ST_LOCAL_NONE ; 176 | q->symtab.name = p->symtab.name ; 177 | /* put q in the hash table */ 178 | q->link = hash_table[h = last_hash] ; 179 | hash_table[h] = q ; 180 | 181 | /* save p */ 182 | p->link = save_list ; save_list = p ; 183 | 184 | return &q->symtab ; 185 | } 186 | 187 | /* restore all global indentifiers */ 188 | void 189 | restore_ids(void) 190 | { 191 | register HASHNODE *p, *q ; 192 | register unsigned h ; 193 | 194 | q = save_list ; save_list = (HASHNODE *) 0 ; 195 | while (q) 196 | { 197 | p = q ; q = q->link ; 198 | zfree(delete(p->symtab.name), sizeof(HASHNODE)) ; 199 | p->link = hash_table[h = last_hash] ; 200 | hash_table[h] = p ; 201 | } 202 | } 203 | 204 | 205 | /* search the symbol table backwards for the 206 | disassembler. This is slow -- so what 207 | */ 208 | 209 | const char * 210 | reverse_find(int type, void* ptr) 211 | { 212 | CELL *cp = 0 ; 213 | ARRAY array = 0 ; 214 | const char* const uk = "unknown" ; 215 | 216 | int i ; 217 | HASHNODE *p ; 218 | 219 | 220 | switch (type) 221 | { 222 | case ST_VAR: 223 | case ST_FIELD: 224 | cp = *(CELL **) ptr ; 225 | break ; 226 | 227 | case ST_ARRAY: 228 | array = *(ARRAY *) ptr ; 229 | break ; 230 | 231 | default: 232 | return uk ; 233 | } 234 | 235 | for (i = 0; i < HASH_PRIME; i++) 236 | { 237 | p = hash_table[i] ; 238 | while (p) 239 | { 240 | if (p->symtab.type == type) 241 | { 242 | switch (type) 243 | { 244 | case ST_VAR: 245 | case ST_FIELD: 246 | if (cp == p->symtab.stval.cp) 247 | return p->symtab.name ; 248 | break ; 249 | 250 | case ST_ARRAY: 251 | if (array == p->symtab.stval.array) 252 | return p->symtab.name ; 253 | break ; 254 | } 255 | } 256 | 257 | p = p->link ; 258 | } 259 | } 260 | return uk ; 261 | } 262 | 263 | -------------------------------------------------------------------------------- /init.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | init.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | /* init.h */ 18 | 19 | 20 | #ifndef INIT_H 21 | #define INIT_H 22 | 23 | #include "symtype.h" 24 | 25 | /* nodes to link file names for multiple 26 | -f option */ 27 | 28 | typedef struct pfile { 29 | struct pfile *link ; 30 | char *fname ; 31 | } PFILE ; 32 | 33 | extern PFILE *pfile_list ; 34 | 35 | void initialize(int, char **) ; 36 | void code_init(void) ; 37 | void code_cleanup(void) ; 38 | void compile_cleanup(void) ; 39 | void scan_init(const char *) ; 40 | void bi_vars_init(void) ; 41 | void bi_funct_init(void) ; 42 | void print_init(void) ; 43 | void kw_init(void) ; 44 | void field_init(void) ; 45 | void fpe_init(void) ; 46 | void load_environ(ARRAY) ; 47 | void set_stdoutput(void) ; 48 | 49 | #endif /* INIT_H */ 50 | -------------------------------------------------------------------------------- /int.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | /* int.c */ 4 | 5 | #include "int.h" 6 | 7 | const double MAX_INT_DOUBLE = 9007199254740991.0 ; /* 2^53-1 */ 8 | const double MIN_INT_DOUBLE = -9007199254740991.0 ; /* 2^53-1 */ 9 | 10 | #if LONG64 11 | const int have_long64 = 1 ; 12 | #else 13 | const int have_long64 = 0 ; 14 | #endif 15 | 16 | 17 | const double INT64_MAX_DOUBLE = 9223372036854775807.000000 ; /* 2^63 -1 */ 18 | const double INT64_MIN_DOUBLE = -9223372036854775808.000000 ; /* -2^63 */ 19 | const double UINT64_MAX_DOUBLE = 18446744073709551615.000000 ; /* 2^64 - 1 */ 20 | 21 | 22 | int64_t d_to_i64(double d) 23 | { 24 | if (d >= INT64_MAX_DOUBLE) return INT64_MAX ; 25 | if (d > INT64_MIN_DOUBLE) return (int64_t) d ; 26 | return INT64_MIN ; 27 | } 28 | 29 | uint64_t d_to_u64(double d) 30 | { 31 | if (d >= UINT64_MAX_DOUBLE) return UINT64_MAX ; 32 | if (d >= 0.0) return (uint64_t) d ; 33 | { 34 | int64_t x = INT64_MIN ; 35 | if (d > INT64_MIN_DOUBLE) x = (int64_t) d ; 36 | return (uint64_t) x ; 37 | } 38 | } 39 | 40 | 41 | int d_to_int(double d) 42 | { 43 | if (d >= 2147483647.0) return 2147483647 ; 44 | if (d > -2147483648.0) return (int) d ; 45 | return -2147483648 ; 46 | } 47 | -------------------------------------------------------------------------------- /int.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef MAWK_INT_H 4 | #define MAWK_INT_H 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | extern const double MAX_INT_DOUBLE ; /* 2^53-1 */ 11 | extern const double MIN_INT_DOUBLE ; /* -2^53+1 */ 12 | 13 | /* can be represented EXACTLY as an integer with 8 byte double */ 14 | #define is_int_double(x) ((x)==floor(x)&& (x)<=MAX_INT_DOUBLE\ 15 | &&(x)>=MIN_INT_DOUBLE) 16 | 17 | extern const double INT64_MAX_DOUBLE ; 18 | extern const double INT64_MIN_DOUBLE ; 19 | extern const double UINT64_MAX_DOUBLE ; 20 | 21 | uint64_t d_to_u64(double) ; 22 | int64_t d_to_i64(double) ; 23 | int d_to_int(double) ; 24 | 25 | extern const int have_long64 ; 26 | 27 | #if LONG_MAX == 0x7fffffffffffffffL 28 | #define LONG64 1 29 | #define LDFMT "%ld" 30 | #elif LONG_MAX == 0x7fffffffL 31 | #define LONG32 1 32 | #define LDFMT "%lld" 33 | #else 34 | #error "unexpected value for LONG_MAX" 35 | #endif 36 | 37 | #endif /* MAWK_INT_H */ 38 | -------------------------------------------------------------------------------- /jmp.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | jmp.c 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | /* this module deals with back patching jumps, breaks and continues, 18 | and with save and restoring code when we move code. 19 | There are three stacks. If we encounter a compile error, the 20 | stacks are frozen, i.e., we do not attempt error recovery 21 | on the stacks 22 | */ 23 | 24 | 25 | #include "mawk.h" 26 | #include "symtype.h" 27 | #include "jmp.h" 28 | #include "code.h" 29 | #include "sizes.h" 30 | #include "init.h" 31 | #include "memory.h" 32 | 33 | #define error_state (compile_error_count>0) 34 | 35 | 36 | /*---------- back patching jumps ---------------*/ 37 | 38 | typedef struct jmp 39 | { 40 | struct jmp *link ; 41 | int source_offset ; 42 | } 43 | JMP ; 44 | 45 | static JMP *jmp_top ; 46 | 47 | void 48 | code_jmp(int jtype, INST* target) 49 | { 50 | if (error_state) return ; 51 | 52 | /* WARNING: Don't emit any code before using target or 53 | relocation might make it invalid */ 54 | 55 | if (target) code2op(jtype, target - (code_ptr + 1)) ; 56 | else 57 | { 58 | register JMP *p = ZMALLOC(JMP) ; 59 | 60 | /* stack for back patch */ 61 | code2op(jtype, 0) ; 62 | p->source_offset = code_offset - 1 ; 63 | p->link = jmp_top ; 64 | jmp_top = p ; 65 | } 66 | } 67 | 68 | void 69 | patch_jmp(INST* target) /* patch a jump on the jmp_stack */ 70 | { 71 | register JMP *p ; 72 | register INST *source ; /* jmp starts here */ 73 | 74 | if (!error_state) 75 | { 76 | #ifdef DEBUG 77 | if (!jmp_top) bozo("jmp stack underflow") ; 78 | #endif 79 | 80 | p = jmp_top ; jmp_top = p->link ; 81 | source = p->source_offset + code_base ; 82 | source->op = target - source ; 83 | 84 | ZFREE(p) ; 85 | } 86 | } 87 | 88 | 89 | /*-- break and continue -------*/ 90 | 91 | typedef struct bc 92 | { 93 | struct bc *link ; /* stack as linked list */ 94 | int type ; /* 'B' or 'C' or mark start with 0 */ 95 | int source_offset ; /* position of _JMP */ 96 | } 97 | BC ; 98 | 99 | static BC *bc_top ; 100 | 101 | 102 | 103 | void 104 | BC_new() /* mark the start of a loop */ 105 | { 106 | BC_insert(0, (INST *) 0) ; 107 | } 108 | 109 | void 110 | BC_insert(int type, INST* address) 111 | { 112 | register BC *p ; 113 | 114 | if (error_state) return ; 115 | 116 | if (type && !bc_top) 117 | { 118 | compile_error("%s statement outside of loop", 119 | type == 'B' ? "break" : "continue") ; 120 | 121 | return ; 122 | } 123 | else 124 | { 125 | p = ZMALLOC(BC) ; 126 | p->type = type ; 127 | p->source_offset = address - code_base ; 128 | p->link = bc_top ; 129 | bc_top = p ; 130 | } 131 | } 132 | 133 | 134 | /* patch all break and continues for one loop */ 135 | void 136 | BC_clear(INST* B_address, INST* C_address) 137 | { 138 | register BC *p, *q ; 139 | INST *source ; 140 | 141 | if (error_state) return ; 142 | 143 | p = bc_top ; 144 | /* pop down to the mark node */ 145 | while (p->type) 146 | { 147 | source = code_base + p->source_offset ; 148 | source->op = (p->type == 'B' ? B_address : C_address) 149 | - source ; 150 | 151 | q = p ; p = p->link ; ZFREE(q) ; 152 | } 153 | /* remove the mark node */ 154 | bc_top = p->link ; 155 | ZFREE(p) ; 156 | } 157 | 158 | /*----- moving code --------------------------*/ 159 | 160 | /* a stack to hold some pieces of code while 161 | reorganizing loops . 162 | */ 163 | 164 | typedef struct mc 165 | { /* mc -- move code */ 166 | struct mc *link ; 167 | INST *code ; /* the save code */ 168 | unsigned len ; /* its length */ 169 | int scope ; /* its scope */ 170 | int move_level ; /* size of this stack when coded */ 171 | FBLOCK *fbp ; /* if scope FUNCT */ 172 | int offset ; /* distance from its code base */ 173 | } 174 | MC ; 175 | 176 | static MC *mc_top ; 177 | int code_move_level = 0 ; /* see comment in jmp.h */ 178 | 179 | #define NO_SCOPE -1 180 | /* means relocation of resolve list not needed */ 181 | 182 | void 183 | code_push( 184 | INST *code , 185 | unsigned len , 186 | int scope , 187 | FBLOCK *fbp ) 188 | { 189 | register MC *p ; 190 | 191 | if (!error_state) 192 | { 193 | p = ZMALLOC(MC) ; 194 | p->len = len ; 195 | p->link = mc_top ; 196 | mc_top = p ; 197 | 198 | if (len) 199 | { 200 | p->code = (INST *) zmalloc(sizeof(INST) * len) ; 201 | memcpy(p->code, code, sizeof(INST) * len) ; 202 | } 203 | if (!resolve_list) p->scope = NO_SCOPE ; 204 | else 205 | { 206 | p->scope = scope ; 207 | p->move_level = code_move_level ; 208 | p->fbp = fbp ; 209 | p->offset = (code == 0) ? 0 : code - code_base ; 210 | } 211 | } 212 | code_move_level++ ; 213 | } 214 | 215 | /* copy the code at the top of the mc stack to target. 216 | return the number of INSTs moved */ 217 | 218 | unsigned 219 | code_pop(INST* target) 220 | { 221 | register MC *p ; 222 | unsigned len ; 223 | int target_offset ; 224 | 225 | if (error_state) return 0 ; 226 | 227 | #ifdef DEBUG 228 | if (!mc_top) bozo("mc underflow") ; 229 | #endif 230 | 231 | p = mc_top ; mc_top = p->link ; 232 | len = p->len ; 233 | 234 | while (target + len >= code_warn) 235 | { 236 | target_offset = target - code_base ; 237 | code_grow() ; 238 | target = code_base + target_offset ; 239 | } 240 | 241 | if (len) 242 | { 243 | memcpy(target, p->code, len * sizeof(INST)) ; 244 | zfree(p->code, len * sizeof(INST)) ; 245 | } 246 | 247 | if (p->scope != NO_SCOPE) 248 | { 249 | target_offset = target - code_base ; 250 | relocate_resolve_list(p->scope, p->move_level, p->fbp, 251 | p->offset, len, target_offset - p->offset) ; 252 | } 253 | 254 | ZFREE(p) ; 255 | code_move_level-- ; 256 | return len ; 257 | } 258 | -------------------------------------------------------------------------------- /jmp.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | jmp.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | #ifndef JMP_H 18 | #define JMP_H 19 | 20 | void BC_new(void) ; 21 | void BC_insert(int, INST*) ; 22 | void BC_clear(INST *, INST *) ; 23 | void code_push(INST *, unsigned, int, FBLOCK*) ; 24 | unsigned code_pop(INST *) ; 25 | void code_jmp(int, INST *) ; 26 | void patch_jmp(INST *) ; 27 | 28 | extern int code_move_level ; 29 | /* used to as one part of unique identification of context when 30 | moving code. Global for communication with parser. 31 | */ 32 | 33 | #endif /* JMP_H */ 34 | 35 | -------------------------------------------------------------------------------- /kw.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | kw.c 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | /* kw.c */ 18 | 19 | 20 | #include "mawk.h" 21 | #include "symtype.h" 22 | #include "parse.h" 23 | #include "init.h" 24 | 25 | 26 | const 27 | static struct kw 28 | { 29 | const char *text ; 30 | short kw ; 31 | } 32 | keywords[] = 33 | { 34 | { "print", PRINT }, 35 | { "printf", PRINTF }, 36 | { "do", DO }, 37 | { "while", WHILE }, 38 | { "for", FOR }, 39 | { "break", BREAK }, 40 | { "continue", CONTINUE }, 41 | { "if", IF }, 42 | { "else", ELSE }, 43 | { "in", IN }, 44 | { "delete", DELETE }, 45 | { "split", SPLIT }, 46 | { "length", LENGTH }, 47 | { "match", MATCH_FUNC }, 48 | { "BEGIN", BEGIN }, 49 | { "END", END }, 50 | { "exit", EXIT }, 51 | { "next", NEXT }, 52 | { "nextfile", NEXTFILE }, 53 | { "return", RETURN }, 54 | { "getline", GETLINE }, 55 | { "sub", SUB }, 56 | { "gsub", GSUB }, 57 | { "sprintf", SPRINTF }, 58 | { "function", FUNCTION }, 59 | { 0, 0 } 60 | } ; 61 | 62 | /* put keywords in the symbol table */ 63 | void 64 | kw_init(void) 65 | { 66 | register const struct kw *p = keywords ; 67 | register SYMTAB *q ; 68 | 69 | while (p->text) 70 | { 71 | q = insert(p->text) ; 72 | q->type = ST_KEYWORD ; 73 | q->stval.kw = p++->kw ; 74 | } 75 | } 76 | 77 | /* find a keyword to emit an error message */ 78 | const char * 79 | find_kw_str(int kw_token) 80 | { 81 | const struct kw *p ; 82 | 83 | for (p = keywords; p->text; p++) 84 | if (p->kw == kw_token) return p->text ; 85 | /* search failed */ 86 | return (char *) 0 ; 87 | } 88 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | main.c 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | 18 | /* main.c */ 19 | 20 | #include "mawk.h" 21 | #include "init.h" 22 | #include "code.h" 23 | #include "files.h" 24 | 25 | 26 | int mawk_state ; /* 0 is compiling */ 27 | int exit_code ; 28 | 29 | int 30 | main(int argc, char **argv) 31 | { 32 | 33 | initialize(argc, argv) ; 34 | 35 | parse() ; 36 | 37 | mawk_state = EXECUTION ; 38 | execute(execution_start, eval_stack - 1, 0) ; 39 | /* never returns */ 40 | return 0 ; 41 | } 42 | 43 | void 44 | mawk_exit(int x) 45 | { 46 | if (mawk_state == EXECUTION) { 47 | if (close_all_output() < 0) { 48 | x = 2 ; 49 | } 50 | } 51 | exit(x) ; 52 | } 53 | -------------------------------------------------------------------------------- /makescan.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | makescan.c 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | /* source for makescan.exe which builds the scancode[] 18 | via: makescan.exe > scancode.c 19 | */ 20 | 21 | #include 22 | 23 | #define MAKESCAN 24 | 25 | #include "scan.h" 26 | 27 | int scan_code[256] ; 28 | 29 | void 30 | scan_init(void) 31 | { 32 | int *p ; 33 | 34 | for(p = scan_code+1; p < scan_code+256; p++) { 35 | *p = SC_UNEXPECTED ; 36 | } 37 | for (p = scan_code + '0'; p <= scan_code + '9'; p++) *p = SC_DIGIT ; 38 | 39 | scan_code[' '] = scan_code['\t'] = scan_code['\f'] = SC_SPACE ; 40 | scan_code['\r'] = scan_code['\013'] = SC_SPACE ; 41 | 42 | scan_code[';'] = SC_SEMI_COLON ; 43 | scan_code['\n'] = SC_NL ; 44 | scan_code['{'] = SC_LBRACE ; 45 | scan_code['}'] = SC_RBRACE ; 46 | scan_code['+'] = SC_PLUS ; 47 | scan_code['-'] = SC_MINUS ; 48 | scan_code['*'] = SC_MUL ; 49 | scan_code['/'] = SC_DIV ; 50 | scan_code['%'] = SC_MOD ; 51 | scan_code['^'] = SC_POW ; 52 | scan_code['('] = SC_LPAREN ; 53 | scan_code[')'] = SC_RPAREN ; 54 | scan_code['_'] = SC_IDCHAR ; 55 | scan_code['='] = SC_EQUAL ; 56 | scan_code['#'] = SC_COMMENT ; 57 | scan_code['\"'] = SC_DQUOTE ; 58 | scan_code[','] = SC_COMMA ; 59 | scan_code['!'] = SC_NOT ; 60 | scan_code['<'] = SC_LT ; 61 | scan_code['>'] = SC_GT ; 62 | scan_code['|'] = SC_OR ; 63 | scan_code['&'] = SC_AND ; 64 | scan_code['?'] = SC_QMARK ; 65 | scan_code[':'] = SC_COLON ; 66 | scan_code['['] = SC_LBOX ; 67 | scan_code[']'] = SC_RBOX ; 68 | scan_code['\\'] = SC_ESCAPE ; 69 | scan_code['.'] = SC_DOT ; 70 | scan_code['~'] = SC_MATCH ; 71 | scan_code['$'] = SC_DOLLAR ; 72 | 73 | for (p = scan_code + 'A'; p <= scan_code + 'Z'; p++) 74 | *p = *(p + 'a' - 'A') = SC_IDCHAR ; 75 | 76 | } 77 | 78 | void 79 | scan_print(const char* date) 80 | { 81 | register int *p = scan_code ; 82 | register int c ; /* column */ 83 | register int r ; /* row */ 84 | 85 | printf("\n\n/* scancode.c */\n") ; 86 | printf("/* generated from makescan.c */\n") ; 87 | printf("/* %s */\n\n\n", date) ; 88 | printf("int scan_code[256] = {\n") ; 89 | 90 | for (r = 1; r <= 16; r++) 91 | { 92 | for (c = 1; c <= 16; c++) 93 | { 94 | printf("%2d", *p++) ; 95 | if (r != 16 || c != 16) putchar(',') ; 96 | } 97 | putchar('\n') ; 98 | } 99 | 100 | printf("} ;\n") ; 101 | } 102 | 103 | char dbuff[128] ; 104 | const char* get_date(void) 105 | { 106 | FILE* fp = popen("/bin/date", "r") ; 107 | if (fp) { 108 | char* p ; 109 | fgets(dbuff,128,fp) ; 110 | if (p = strchr(dbuff,'\n')) *p = 0 ; 111 | pclose(fp) ; 112 | } 113 | return dbuff ; 114 | } 115 | 116 | 117 | int 118 | main(void) 119 | { 120 | scan_init() ; 121 | scan_print(get_date()) ; 122 | return 0 ; 123 | } 124 | -------------------------------------------------------------------------------- /mawk.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | mawk.h 4 | copyright 1991-94,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | 18 | 19 | /* mawk.h */ 20 | 21 | #ifndef MAWK_H 22 | #define MAWK_H 23 | 24 | typedef void* PTR ; 25 | typedef int Bool ; 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include "types.h" 33 | 34 | #ifdef DEBUG 35 | #define YYDEBUG 1 36 | extern int yydebug ; /* print parse if on */ 37 | extern int dump_RE ; 38 | #endif 39 | 40 | extern int posix_space_flag , interactive_flag ; 41 | extern int posix_repl_scan_flag ; 42 | 43 | /*---------------- 44 | * GLOBAL VARIABLES 45 | *----------------*/ 46 | 47 | /* a well known string */ 48 | extern STRING null_str ; 49 | extern STRING* const the_empty_str ; 50 | 51 | /* a useful scratch area */ 52 | extern char* string_buff ; 53 | extern char* string_buff_end ; 54 | 55 | char* enlarge_string_buff(char*) ; 56 | 57 | /* help with casts */ 58 | extern int mpow2[] ; 59 | 60 | 61 | /* these are used by the parser, scanner and error messages 62 | from the compile */ 63 | 64 | extern const char *pfile_name ; /* program input file */ 65 | extern int current_token ; 66 | extern unsigned token_lineno ; /* lineno of current token */ 67 | extern unsigned compile_error_count ; 68 | extern int paren_cnt, brace_cnt ; 69 | extern int print_flag, getline_flag ; 70 | extern int mawk_state ; 71 | #define EXECUTION 1 /* other state is 0 compiling */ 72 | 73 | 74 | extern const char *progname ; /* for error messages */ 75 | extern unsigned rt_nr , rt_fnr ; /* ditto */ 76 | 77 | /* macro to test the type of two adjacent cells */ 78 | #define TEST2(cp) (mpow2[(cp)->type]+mpow2[((cp)+1)->type]) 79 | 80 | /* macro to get at the string part of a CELL */ 81 | #define string(cp) ((STRING *)(cp)->ptr) 82 | 83 | #ifdef DEBUG 84 | #define cell_destroy(cp) DB_cell_destroy(cp) 85 | #else 86 | 87 | #define cell_destroy(cp) \ 88 | do { \ 89 | if ((cp)->type >= C_STRING && (cp)->type <= C_MBSTRN) {\ 90 | free_STRING(string(cp)) ;\ 91 | }\ 92 | } while(0) 93 | #endif 94 | 95 | /* prototypes */ 96 | 97 | void cast1_to_s(CELL *) ; 98 | void cast1_to_d(CELL *) ; 99 | void cast_to_RE(CELL *) ; 100 | void cast_for_split(CELL *) ; 101 | void check_strnum(CELL *) ; 102 | void cast_to_REPL(CELL *) ; 103 | int d_to_I(double) ; 104 | 105 | #define cast2_to_s(p) do{cast1_to_s(p);cast1_to_s(p+1);} while(0) 106 | #define cast2_to_d(p) do{cast1_to_d(p);cast1_to_d(p+1);} while(0) 107 | 108 | 109 | int test(CELL *) ; /* test for null non-null */ 110 | CELL * cellcpy(CELL *, CELL *) ; 111 | CELL * repl_cpy(CELL *, CELL *) ; 112 | void DB_cell_destroy(CELL *) ; 113 | void overflow(const char *, unsigned) ; 114 | void rt_overflow(const char *, unsigned) ; 115 | void rt_error(const char*, ...) ; 116 | void mawk_exit(int) ; 117 | void da(INST *, FILE *) ; 118 | char * str_str(const char*, size_t , const char*, size_t) ; 119 | size_t rm_escape(char *) ; 120 | char * re_pos_match(const char *, size_t ,PTR, size_t*, Bool) ; 121 | int binmode(void) ; 122 | 123 | void parse(void) ; 124 | int yylex(void) ; 125 | int yyparse(void) ; 126 | void yyerror(const char *) ; 127 | void scan_cleanup(void) ; 128 | 129 | void bozo(const char*) ; 130 | void errmsg(int, const char*, ...) ; 131 | void compile_error(const char*, ...) ; 132 | void call_error(unsigned, const char*, ...) ; 133 | void compile_or_rt_error(const char*, ...) ; 134 | 135 | void execute(INST *, CELL *, CELL *) ; 136 | const char* find_kw_str(int) ; 137 | 138 | #endif /* MAWK_H */ 139 | -------------------------------------------------------------------------------- /memory.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | memory.c 4 | copyright 1991,1992,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | 18 | /* memory.c */ 19 | 20 | #include "mawk.h" 21 | #include "memory.h" 22 | 23 | STRING null_str = 24 | {0, 1, ""} ; 25 | STRING* const the_empty_str = &null_str ; 26 | 27 | static STRING * 28 | xnew_STRING(size_t len) 29 | { 30 | STRING *sval = (STRING *) zmalloc(STRING_SIZE(len)) ; 31 | 32 | sval->len = len ; 33 | sval->ref_cnt = 1 ; 34 | sval->str[len] = 0 ; 35 | return sval ; 36 | } 37 | 38 | /* allocate space for a STRING */ 39 | 40 | STRING * 41 | new_STRING0(size_t len) 42 | { 43 | if (len == 0) { 44 | return STRING_dup(the_empty_str) ; 45 | } 46 | else 47 | { 48 | return xnew_STRING(len) ; 49 | } 50 | } 51 | 52 | /* memcpy into a STRING */ 53 | 54 | STRING* 55 | new_STRING2(const char* s, size_t len) 56 | { 57 | if (len == 0) { 58 | return STRING_dup(the_empty_str) ; 59 | } 60 | else { 61 | STRING* ret = xnew_STRING(len) ; 62 | memcpy(ret->str, s, len) ; 63 | return ret ; 64 | } 65 | } 66 | 67 | /* convert char* to STRING* */ 68 | 69 | STRING * 70 | new_STRING(const char* s) 71 | { 72 | size_t len = strlen(s) ; 73 | return new_STRING2(s,len) ; 74 | } 75 | 76 | /* compare two strings in manner of strcmp */ 77 | int STRING_cmp(STRING* s1, STRING* s2) 78 | { 79 | int ret ; 80 | size_t len1 = s1->len ; 81 | size_t len2 = s2->len ; 82 | size_t len = len1 <= len2 ? len1 : len2 ; 83 | 84 | ret = memcmp(s1->str, s2->str, len) ; 85 | if (ret == 0) { 86 | if (len1 > len) { 87 | ret = 1 ; 88 | } 89 | else if (len2 > len) { 90 | ret = -1 ; 91 | } 92 | } 93 | return ret ; 94 | } 95 | 96 | 97 | #ifdef DEBUG 98 | 99 | void 100 | DB_free_STRING(STRING* sval) 101 | { 102 | if (--sval->ref_cnt == 0) { 103 | zfree(sval, STRING_SIZE(sval->len)) ; 104 | } 105 | } 106 | 107 | #endif 108 | -------------------------------------------------------------------------------- /memory.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | memory.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | /* memory.h */ 18 | 19 | #ifndef MEMORY_H 20 | #define MEMORY_H 21 | 22 | #include "types.h" 23 | #include "zmalloc.h" 24 | 25 | 26 | STRING *new_STRING(const char*) ; 27 | STRING *new_STRING0(size_t) ; 28 | STRING* new_STRING2(const char*,size_t) ; 29 | 30 | #ifdef DEBUG 31 | void DB_free_STRING(STRING *) ; 32 | 33 | #define free_STRING(s) DB_free_STRING(s) 34 | 35 | #else 36 | 37 | #define free_STRING(sval) do { \ 38 | if ( -- (sval)->ref_cnt == 0 )\ 39 | zfree(sval, STRING_SIZE((sval)->len)) ;\ 40 | } while(0) 41 | #endif 42 | 43 | 44 | #endif /* MEMORY_H */ 45 | -------------------------------------------------------------------------------- /old/ACKNOWLEDGMENT: -------------------------------------------------------------------------------- 1 | Version 1.2 2 | =========== 3 | 4 | Thanks for help with beta test to Bill Davidsen, Tom Dickey, Ed 5 | Ferguson, Jack Fitts, Onno van der Linden, Carl Mascott, Jean-Pierre 6 | Radley, John Roll, Ian Searle, Bob Stockler. 7 | 8 | The calendar program examples/hical was written by Bob Stockler. 9 | 10 | Darrel Hankerson ported versions 1.2.x to DOS/OS2. 11 | 12 | Version 1.0 and 1.1 13 | =================== 14 | 15 | Carl Mascott ported mawk to V7 and in the process rooted out 16 | some subtle (and not so subtle) bugs. 17 | 18 | Ian Searle ported mawk to System V and put up with my insane 19 | attempts to get fpe exception trapping off. 20 | 21 | An anonymous reviewer for comp.sources.reviewed did the 22 | MSC and Mac ports and wrote .bat files for the tests. 23 | Another or maybe the same reviewer did the Dynix port. 24 | 25 | Ports to new systems: 26 | Ed Ferguson MIPS M2000 C2.20 OS4.52 27 | Jwahar R. Bammi Atari ST 28 | Berry Kercheval SGI IRIX 4.0.1 29 | Andy Newman Next 2.1 30 | Mike Carlton Next 2.1 31 | Elliot Jaffe AIX 3.1 32 | Jeremy Martin Convex 9.1 33 | Scott Hunziker Coherent 4.0 34 | Ken Poulton Hpux 35 | Onno van der Linden 386bsd 0.1 36 | Bob Hutchinson Linux 0.98p14 37 | 38 | The DOS version is a lot better thanks to suggestions and testing 39 | from Ed Ferguson, Jack Fitts, Nadav Horesh, Michael Golan and 40 | Conny Ohstrom. The DOS additions for 1.1.2d are all ideas of 41 | Ben Myers; much of the code is his too. 42 | 43 | Arnold Robbins kept me current on POSIX standards for AWK, and 44 | explained some of the "dark corners". 45 | 46 | Thank you to everyone who reported bugs or offered encouragement, 47 | suggestions or criticism. (At least the bugs got fixed). 48 | -------------------------------------------------------------------------------- /old/CHANGES: -------------------------------------------------------------------------------- 1 | 1.3.1 -> 1.3.2 Sep 1996 2 | 3 | 1) Numeric but not integer indices caused core dump in new array scheme. 4 | Fixed bug and fired test division. 5 | 6 | 2) Added ferror() checks on writes. 7 | 8 | 3) Added some static storage specs to array.c to keep non-ansi 9 | compilers happy. 10 | 11 | 1.3 -> 1.3.1 Sep 1996 12 | Release to new ftp site ftp://ftp.whidbey.net. 13 | 14 | 1) Workaround for overflow exception in strtod, sunos5.5 solaris. 15 | 16 | 2) []...] and [^]...] put ] in a class (or not in a class) without 17 | having to use back-slash escape. 18 | 19 | 1.2.2 -> 1.3 Jul 1996 20 | Extensive redesign of array data structures to support large arrays and 21 | fast access to arrays created with split. Many of the ideas in the 22 | new design were inspired by reading "The Design and Implementation of 23 | Dynamic Hashing Sets and Tables in Icon" by William Griswold and 24 | Gregg Townsend, SPE 23,351-367. 25 | 26 | 1.2.1 -> 1.2.2 Jan 1996 27 | 28 | 1) Improved autoconfig, in particular, fpe tests. This is far from 29 | perfect and never will be until C standardizes an interface to ieee754. 30 | 31 | 2) Removed automatic error message on open failure for getline. 32 | 33 | 3) Flush all output before system(). Previous behavior was to only 34 | flush std{out,err}. 35 | 36 | 4) Explicitly fclose() all output on exit to work around AIX4.1 bug. 37 | 38 | 5) Fixed random number generator to work with longs larger than 39 | 32bits. 40 | 41 | 6) Added a type Int which is int on real machines and long on dos machines. 42 | Believe that all implicit assumptions that int=32bits are now gone. 43 | -------------------------------------------------------------------------------- /packing.list: -------------------------------------------------------------------------------- 1 | 2 | #$Id: packing.list,v 1.8 2016/08/02 11:08:14 mike Exp $ 3 | 4 | ################################################ 5 | # These files form the beta mawk distribution 1.9.9.x 6 | # 7 | # Mawk is an implementation of the AWK Programming Language as 8 | # defined and described in Aho, Kernighan and Weinberger, The 9 | # Awk Programming Language, Addison-Wesley, 1988 and extended 10 | # by Posix 1003.2 D11.3 11 | # 12 | ################################################ 13 | packing.list this file 14 | README description of mawk 1.9.9.x 15 | INSTALL installation instructions 16 | CONTACT Mike Brennan 17 | REPOSITORY google drive download site for mawk 18 | Makefile.in 19 | configure 20 | configure.ac 21 | config.hin 22 | THANKS 23 | ChangeLog.beebe 24 | ################################# 25 | array.c source files 26 | bi_funct.c 27 | bi_vars.c 28 | cast.c 29 | code.c 30 | da.c 31 | error.c 32 | execute.c 33 | fcall.c 34 | field.c 35 | files.c 36 | fin.c 37 | hash.c 38 | init.c 39 | int.c 40 | jmp.c 41 | kw.c 42 | main.c 43 | makescan.c 44 | memory.c 45 | parse.c 46 | print.c 47 | printf.c 48 | re_cmpl.c 49 | scan.c 50 | scancode.c 51 | split.c 52 | version.c 53 | zmalloc.c 54 | array.h 55 | bi_funct.h 56 | bi_vars.h 57 | code.h 58 | field.h 59 | files.h 60 | fin.h 61 | init.h 62 | int.h 63 | jmp.h 64 | mawk.h 65 | memory.h 66 | parse.h 67 | printf.h 68 | regexp.h 69 | repl.h 70 | scan.h 71 | sizes.h 72 | split.h 73 | symtype.h 74 | types.h 75 | zmalloc.h 76 | array.w 77 | printf.w 78 | parse.y 79 | ######################## 80 | # directory: man 81 | man/mawk.1 troff source for unix style man pages 82 | man/mawk.txt ascii man pages 83 | ######################## 84 | # directory: rexp 85 | rexp/Makefile make rexp*.o files 86 | rexp/rexp.c source for regular matching library 87 | rexp/rexp.h 88 | rexp/rexp0.c 89 | rexp/rexp1.c 90 | rexp/rexp2.c 91 | rexp/rexp3.c 92 | rexp/rexpdb.c 93 | rexp/wait.c 94 | rexp/wait.h 95 | ####################### 96 | # directory: test testing and benchmarking directory 97 | test/mawktest scripts to test mawk compiled OK 98 | test/mawktest.dat input data for the test 99 | test/fpe_test scripts to test if fpe handling compiled OK 100 | test/wc.awk awk programs used by the tests 101 | test/reg0.awk 102 | test/reg1.awk 103 | test/reg2.awk 104 | test/wfrq0.awk 105 | test/decl-awk.out 106 | test/fpetest1.awk 107 | test/fpetest2.awk 108 | test/fpetest3.awk 109 | test/reg-awk.out 110 | test/wc-awk.out 111 | test/wfrq-awk.out 112 | test/pipetest 113 | test/pipetest.out 114 | test/okay/mawktest.out 115 | test/okay/mawktest.err 116 | ###################### 117 | # directory: examples useful awk programs 118 | examples/hical calendar program by Bob Stockler 119 | examples/hcal Bob's latest 120 | examples/decl.awk 121 | examples/deps.awk 122 | examples/gdecl.awk 123 | examples/nocomment.awk 124 | examples/eatc.awk 125 | examples/primes.awk 126 | examples/qsort.awk 127 | examples/ct_length.awk change length to length() 128 | ################################# 129 | # directory cdoc read about the code 130 | cdoc/array.pdf 131 | cdoc/printf.pdf 132 | cdoc/array.dvi 133 | cdoc/printf.dvi 134 | cdoc/Makefile 135 | ############################### 136 | # directory old nostalgia 137 | old/ACKNOWLEDGMENT 138 | old/CHANGES 139 | -------------------------------------------------------------------------------- /parse.h: -------------------------------------------------------------------------------- 1 | 2 | /* A Bison parser, made by GNU Bison 2.4.1. */ 3 | 4 | /* Skeleton interface for Bison's Yacc-like parsers in C 5 | 6 | Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 7 | Free Software Foundation, Inc. 8 | 9 | This program is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | This program is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. 18 | 19 | You should have received a copy of the GNU General Public License 20 | along with this program. If not, see . */ 21 | 22 | /* As a special exception, you may create a larger work that contains 23 | part or all of the Bison parser skeleton and distribute that work 24 | under terms of your choice, so long as that work isn't itself a 25 | parser generator using the skeleton or a modified version thereof 26 | as a parser skeleton. Alternatively, if you modify or redistribute 27 | the parser skeleton itself, you may (at your option) remove this 28 | special exception, which will cause the skeleton and the resulting 29 | Bison output files to be licensed under the GNU General Public 30 | License without this special exception. 31 | 32 | This special exception was added by the Free Software Foundation in 33 | version 2.2 of Bison. */ 34 | 35 | 36 | /* Tokens. */ 37 | #ifndef YYTOKENTYPE 38 | # define YYTOKENTYPE 39 | /* Put the tokens into the symbol table, so that GDB and other debuggers 40 | know about them. */ 41 | enum yytokentype { 42 | UNEXPECTED = 258, 43 | BAD_DECIMAL = 259, 44 | NL = 260, 45 | SEMI_COLON = 261, 46 | LBRACE = 262, 47 | RBRACE = 263, 48 | LBOX = 264, 49 | RBOX = 265, 50 | COMMA = 266, 51 | IO_OUT = 267, 52 | POW_ASG = 268, 53 | MOD_ASG = 269, 54 | DIV_ASG = 270, 55 | MUL_ASG = 271, 56 | SUB_ASG = 272, 57 | ADD_ASG = 273, 58 | ASSIGN = 274, 59 | COLON = 275, 60 | QMARK = 276, 61 | OR = 277, 62 | AND = 278, 63 | IN = 279, 64 | MATCH = 280, 65 | GTE = 281, 66 | GT = 282, 67 | LTE = 283, 68 | LT = 284, 69 | NEQ = 285, 70 | EQ = 286, 71 | CAT = 287, 72 | GETLINE = 288, 73 | MINUS = 289, 74 | PLUS = 290, 75 | MOD = 291, 76 | DIV = 292, 77 | MUL = 293, 78 | UMINUS = 294, 79 | NOT = 295, 80 | PIPE = 296, 81 | IO_IN = 297, 82 | POW = 298, 83 | INC_or_DEC = 299, 84 | FIELD = 300, 85 | DOLLAR = 301, 86 | RPAREN = 302, 87 | LPAREN = 303, 88 | DOUBLE = 304, 89 | STRING_ = 305, 90 | RE = 306, 91 | ID = 307, 92 | D_ID = 308, 93 | FUNCT_ID = 309, 94 | BUILTIN = 310, 95 | LENGTH = 311, 96 | PRINT = 312, 97 | PRINTF = 313, 98 | SPLIT = 314, 99 | MATCH_FUNC = 315, 100 | SUB = 316, 101 | GSUB = 317, 102 | SPRINTF = 318, 103 | DO = 319, 104 | WHILE = 320, 105 | FOR = 321, 106 | BREAK = 322, 107 | CONTINUE = 323, 108 | IF = 324, 109 | ELSE = 325, 110 | DELETE = 326, 111 | BEGIN = 327, 112 | END = 328, 113 | EXIT = 329, 114 | NEXT = 330, 115 | NEXTFILE = 331, 116 | RETURN = 332, 117 | FUNCTION = 333 118 | }; 119 | #endif 120 | /* Tokens. */ 121 | #define UNEXPECTED 258 122 | #define BAD_DECIMAL 259 123 | #define NL 260 124 | #define SEMI_COLON 261 125 | #define LBRACE 262 126 | #define RBRACE 263 127 | #define LBOX 264 128 | #define RBOX 265 129 | #define COMMA 266 130 | #define IO_OUT 267 131 | #define POW_ASG 268 132 | #define MOD_ASG 269 133 | #define DIV_ASG 270 134 | #define MUL_ASG 271 135 | #define SUB_ASG 272 136 | #define ADD_ASG 273 137 | #define ASSIGN 274 138 | #define COLON 275 139 | #define QMARK 276 140 | #define OR 277 141 | #define AND 278 142 | #define IN 279 143 | #define MATCH 280 144 | #define GTE 281 145 | #define GT 282 146 | #define LTE 283 147 | #define LT 284 148 | #define NEQ 285 149 | #define EQ 286 150 | #define CAT 287 151 | #define GETLINE 288 152 | #define MINUS 289 153 | #define PLUS 290 154 | #define MOD 291 155 | #define DIV 292 156 | #define MUL 293 157 | #define UMINUS 294 158 | #define NOT 295 159 | #define PIPE 296 160 | #define IO_IN 297 161 | #define POW 298 162 | #define INC_or_DEC 299 163 | #define FIELD 300 164 | #define DOLLAR 301 165 | #define RPAREN 302 166 | #define LPAREN 303 167 | #define DOUBLE 304 168 | #define STRING_ 305 169 | #define RE 306 170 | #define ID 307 171 | #define D_ID 308 172 | #define FUNCT_ID 309 173 | #define BUILTIN 310 174 | #define LENGTH 311 175 | #define PRINT 312 176 | #define PRINTF 313 177 | #define SPLIT 314 178 | #define MATCH_FUNC 315 179 | #define SUB 316 180 | #define GSUB 317 181 | #define SPRINTF 318 182 | #define DO 319 183 | #define WHILE 320 184 | #define FOR 321 185 | #define BREAK 322 186 | #define CONTINUE 323 187 | #define IF 324 188 | #define ELSE 325 189 | #define DELETE 326 190 | #define BEGIN 327 191 | #define END 328 192 | #define EXIT 329 193 | #define NEXT 330 194 | #define NEXTFILE 331 195 | #define RETURN 332 196 | #define FUNCTION 333 197 | 198 | 199 | 200 | 201 | #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED 202 | typedef union YYSTYPE 203 | { 204 | 205 | /* Line 1676 of yacc.c */ 206 | #line 64 "parse.y" 207 | 208 | CELL *cp ; 209 | SYMTAB *stp ; 210 | int start ; /* code starting address as offset from code_base */ 211 | PF_CP fp ; /* ptr to a (print/printf) or (sub/gsub) function */ 212 | BI_REC *bip ; /* ptr to info about a builtin */ 213 | FBLOCK *fbp ; /* ptr to a function block */ 214 | ARG2_REC *arg2p ; 215 | CA_REC *ca_p ; 216 | int ival ; 217 | PTR ptr ; 218 | 219 | 220 | 221 | /* Line 1676 of yacc.c */ 222 | #line 223 "y.tab.h" 223 | } YYSTYPE; 224 | # define YYSTYPE_IS_TRIVIAL 1 225 | # define yystype YYSTYPE /* obsolescent; will be withdrawn */ 226 | # define YYSTYPE_IS_DECLARED 1 227 | #endif 228 | 229 | extern YYSTYPE yylval; 230 | 231 | 232 | -------------------------------------------------------------------------------- /print.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | print.c 4 | copyright 1991-1993,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | #include "mawk.h" 18 | #include "bi_vars.h" 19 | #include "bi_funct.h" 20 | #include "memory.h" 21 | #include "field.h" 22 | #include "scan.h" 23 | #include "files.h" 24 | #include "int.h" 25 | #include "printf.h" 26 | 27 | static void print_cell(CELL *, FILE *) ; 28 | 29 | 30 | /* Once execute() starts the sprintf code is (belatedly) the only 31 | code allowed to use string_buff */ 32 | 33 | static void 34 | print_cell(CELL* p, FILE* fp) 35 | { 36 | size_t len ; 37 | 38 | switch (p->type) 39 | { 40 | case C_NOINIT: 41 | break ; 42 | case C_MBSTRN: 43 | case C_STRING: 44 | case C_STRNUM: 45 | switch (len = string(p)->len) 46 | { 47 | case 0: 48 | break ; 49 | case 1: 50 | putc(string(p)->str[0], fp) ; 51 | break ; 52 | 53 | default: 54 | fwrite(string(p)->str, 1, len, fp) ; 55 | } 56 | break ; 57 | 58 | case C_DOUBLE: 59 | { 60 | double d = p->dval ; 61 | if (is_int_double(d)) { 62 | #if LONG64 63 | fprintf(fp, "%ld", (int64_t) d) ; 64 | #else 65 | fprintf(fp, "%lld", (int64_t) d) ; 66 | #endif 67 | } 68 | else { 69 | fprintf(fp, string(OFMT)->str, p->dval) ; 70 | } 71 | } 72 | break ; 73 | 74 | default: 75 | bozo("bad cell passed to print_cell") ; 76 | } 77 | } 78 | 79 | /* on entry to bi_print or bi_printf the stack is: 80 | 81 | sp[0] = an integer k 82 | if ( k < 0 ) output is to a file with name in sp[-1] 83 | { so open file and sp -= 2 } 84 | 85 | sp[0] = k >= 0 is the number of print args 86 | sp[-k] holds the first argument 87 | */ 88 | 89 | CELL * 90 | bi_print(CELL *sp) /* stack ptr passed in */ 91 | { 92 | register CELL *p ; 93 | register int k ; 94 | FILE *fp ; 95 | 96 | k = sp->type ; 97 | if (k < 0) 98 | { 99 | /* k holds redirection */ 100 | if ((--sp)->type < C_STRING) cast1_to_s(sp) ; 101 | fp = (FILE *) file_find(string(sp), k) ; 102 | free_STRING(string(sp)) ; 103 | k = (--sp)->type ; 104 | /* k now has number of arguments */ 105 | } 106 | else fp = stdout ; 107 | 108 | if (k) 109 | { 110 | p = sp - k ; /* clear k variables off the stack */ 111 | sp = p - 1 ; 112 | k-- ; 113 | 114 | while (k > 0) 115 | { 116 | print_cell(p,fp) ; print_cell(OFS,fp) ; 117 | cell_destroy(p) ; 118 | p++ ; k-- ; 119 | } 120 | 121 | print_cell(p, fp) ; cell_destroy(p) ; 122 | } 123 | else 124 | { /* print $0 */ 125 | sp-- ; 126 | print_cell(&field[0], fp) ; 127 | } 128 | 129 | print_cell(ORS, fp) ; 130 | if (ferror(fp)) { 131 | write_error(fp) ; 132 | mawk_exit(2) ; 133 | } 134 | return sp ; 135 | } 136 | 137 | /* first argument is the format as a Form* */ 138 | CELL * 139 | bi_printf(CELL* sp) 140 | { 141 | int k ; 142 | FILE *fp ; 143 | const Form* form ; 144 | 145 | k = sp->type ; 146 | if (k < 0) { 147 | /* k has redirection */ 148 | if ((--sp)->type < C_STRING) cast1_to_s(sp) ; 149 | fp = (FILE *) file_find(string(sp), k) ; 150 | free_STRING(string(sp)) ; 151 | k = (--sp)->type ; 152 | /* k is now number of args including format */ 153 | } 154 | else fp = stdout ; 155 | 156 | sp -= k ; /* sp points at the format string */ 157 | k-- ; 158 | 159 | form = (const Form*) sp->ptr ; 160 | do_xprintf(fp, form, sp+1) ; 161 | 162 | /* cleanup arguments on eval stack */ 163 | { 164 | CELL *p ; 165 | for (p = sp + 1; k>0; k--, p++) cell_destroy(p) ; 166 | } 167 | return sp - 1 ; 168 | } 169 | 170 | /* The format was not a constant string so must be parsed at run-time */ 171 | CELL * 172 | bi_printf1(CELL* sp) 173 | { 174 | int k ; 175 | FILE *fp ; 176 | const Form* form ; 177 | 178 | k = sp->type ; 179 | if (k < 0) { 180 | /* k has redirection */ 181 | if ((--sp)->type < C_STRING) cast1_to_s(sp) ; 182 | fp = (FILE *) file_find(string(sp), k) ; 183 | free_STRING(string(sp)) ; 184 | k = (--sp)->type ; 185 | /* k is now number of args including format */ 186 | } 187 | else fp = stdout ; 188 | 189 | sp -= k ; /* sp points at the format string */ 190 | k-- ; 191 | 192 | if (sp->type < C_STRING) cast1_to_s(sp) ; 193 | /* parse form doesn't return on error */ 194 | form = parse_form(string(sp)) ; 195 | if (form->num_args > k) { 196 | rt_error("not enough arguments passed to printf(\"%s\")", 197 | string(sp)->str) ; 198 | } 199 | 200 | do_xprintf(fp, form, sp+1) ; 201 | free_STRING(string(sp)) ; 202 | 203 | /* cleanup arguments on eval stack */ 204 | { 205 | CELL* p ; 206 | for (p = sp + 1; k; k--, p++) cell_destroy(p) ; 207 | } 208 | return sp - 1 ; 209 | } 210 | 211 | /* format is not constant string */ 212 | CELL * 213 | bi_sprintf1(CELL* sp) 214 | { 215 | int argcnt = sp->type ; 216 | STRING *sval ; 217 | const Form* form ; 218 | 219 | sp -= argcnt ; /* sp points at the format string */ 220 | argcnt-- ; 221 | 222 | if (sp->type != C_STRING) cast1_to_s(sp) ; 223 | form = parse_form(string(sp)) ; 224 | if (form->num_args > argcnt) { 225 | rt_error("not enough arguments passed to sprintf(\"%s\")", 226 | string(sp)->str) ; 227 | } 228 | sval = do_xprintf(0, form, sp + 1) ; 229 | free_STRING(string(sp)) ; 230 | sp->ptr = (PTR) sval ; /* sp->type == C_STRING */ 231 | 232 | /* cleanup */ 233 | { 234 | CELL* p ; 235 | for (p = sp + 1; argcnt; argcnt--, p++) cell_destroy(p) ; 236 | } 237 | return sp ; 238 | } 239 | 240 | /* format is constant string parsed to Form* */ 241 | CELL * 242 | bi_sprintf(CELL* sp) 243 | { 244 | int argcnt = sp->type ; 245 | STRING *sval ; 246 | 247 | sp -= argcnt ; /* sp points at the format string */ 248 | argcnt-- ; 249 | 250 | sval = do_xprintf(0, (const Form*) sp->ptr, sp + 1) ; 251 | sp->type = C_STRING ; 252 | sp->ptr = (PTR) sval ; 253 | 254 | /* cleanup */ 255 | { 256 | CELL* p ; 257 | for (p = sp + 1; argcnt; argcnt--, p++) cell_destroy(p) ; 258 | } 259 | return sp ; 260 | } 261 | -------------------------------------------------------------------------------- /printf.h: -------------------------------------------------------------------------------- 1 | /* printf.h */ 2 | /* 3 | copyright 2016 Michael D. Brennan 4 | 5 | This is a source file for mawk, an implementation of 6 | the AWK programming language. 7 | 8 | Mawk is distributed without warranty under the terms of 9 | the GNU General Public License, version 3, 2007. 10 | 11 | printf.c and printf.h were generated with the commands 12 | 13 | notangle -R'"printf.c"' printf.w > printf.c 14 | notangle -R'"printf.h"' printf.w > printf.h 15 | 16 | Notangle is part of Norman Ramsey's noweb literate programming package. 17 | Noweb home page: http://www.cs.tufts.edu/~nr/noweb/ 18 | 19 | It's easiest to read or modify this file by working with printf.w. 20 | */ 21 | 22 | #ifndef PRINTF_H 23 | #define PRINTF_H 1 24 | #include "mawk.h" 25 | #include "types.h" 26 | #include "memory.h" 27 | #include "files.h" 28 | 29 | typedef struct form Form ; 30 | const Form* parse_form(const STRING*) ; 31 | 32 | typedef struct spec Spec ; 33 | struct form { 34 | Form* link ; 35 | STRING* form ; 36 | unsigned num_args ; /* number of args to satisfy form */ 37 | Spec* specs ; /* each %..C is one Spec */ 38 | STRING* ending ; /* after last Spec */ 39 | } ; 40 | STRING* do_xprintf(FILE*, const Form*, CELL*) ; 41 | 42 | void da_Form(FILE*,const Form*) ; 43 | 44 | #endif /* PRINTF_H */ 45 | 46 | -------------------------------------------------------------------------------- /regexp.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | regexp.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | #include 18 | 19 | PTR REcompile(const char *, size_t) ; 20 | int REtest (const char *, size_t, PTR) ; 21 | char *REmatch(const char *, size_t, PTR, size_t*, Bool) ; 22 | void REmprint(PTR , FILE*) ; 23 | 24 | extern int REerrno ; 25 | extern const char* const REerrlist[] ; 26 | 27 | 28 | -------------------------------------------------------------------------------- /repl.h: -------------------------------------------------------------------------------- 1 | /******************************************** 2 | repl.h 3 | copyright 1991,1993,2014-2016 Michael D. Brennan 4 | 5 | This is a source file for mawk, an implementation of 6 | the AWK programming language. 7 | 8 | Mawk is distributed without warranty under the terms of 9 | the GNU General Public License, version 3, 2007. 10 | 11 | If you import elements of this code into another product, 12 | you agree to not name that product mawk. 13 | ********************************************/ 14 | 15 | /* repl.h */ 16 | /* replacement */ 17 | 18 | #ifndef REPL_H 19 | #define REPL_H 20 | 21 | #include "types.h" 22 | 23 | PTR re_compile(const STRING *); 24 | const STRING* re_uncompile(PTR); 25 | 26 | typedef struct { 27 | unsigned cnt; /* number of pieces */ 28 | unsigned amp_cnt; /* number of & */ 29 | STRING **pieces; 30 | size_t piece_len; 31 | } Replv_Data; 32 | 33 | void replacement_scan(const STRING *, CELL *); 34 | const STRING *repl_unscan(CELL *); 35 | 36 | /* only C_REPL needs anything freed */ 37 | #define repl_destroy(cp) do{ \ 38 | if((cp)->type == C_REPL) { \ 39 | free_STRING(string(cp)) ; \ 40 | }} while(0) 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /rexp/Makefile: -------------------------------------------------------------------------------- 1 | 2 | #################################### 3 | # This is a makefile for mawk, 4 | # an implementation of AWK 5 | #################################### 6 | # 7 | # 8 | 9 | CC = cc 10 | CFLAGS = -O -DMAWK -I.. 11 | 12 | O=rexp.o rexp0.o rexp1.o rexp2.o rexp3.o rexpdb.o wait.o 13 | 14 | all : $(O) 15 | 16 | $(O) : rexp.h wait.h 17 | 18 | clean : 19 | rm -f *.o 20 | -------------------------------------------------------------------------------- /rexp/rexp.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | rexp.c 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | /* op precedence parser for regular expressions */ 18 | 19 | #include "rexp.h" 20 | 21 | 22 | /* DATA */ 23 | int REerrno ; 24 | const char* const REerrlist[] = 25 | {(const char *) 0, 26 | /* 1 */ "missing '('", 27 | /* 2 */ "missing ')'", 28 | /* 3 */ "bad class -- [], [^] or [", 29 | /* 4 */ "missing operand", 30 | /* 5 */ "resource exhaustion -- regular expression too large" , 31 | /* 6 */ "invalid named character class [: :]" 32 | } ; 33 | /* E5 is very unlikely to occur */ 34 | 35 | 36 | /* This table drives the operator precedence parser */ 37 | static short table[8][8] = { 38 | 39 | /* 0 | CAT * + ? ( ) */ 40 | /* 0 */ {0, L, L, L, L, L, L, E1}, 41 | /* | */ {G, G, L, L, L, L, L, G}, 42 | /* CAT*/ {G, G, G, L, L, L, L, G}, 43 | /* * */ {G, G, G, G, G, G, E7, G}, 44 | /* + */ {G, G, G, G, G, G, E7, G}, 45 | /* ? */ {G, G, G, G, G, G, E7, G}, 46 | /* ( */ {E2, L, L, L, L, L, L, EQ}, 47 | /* ) */ {G , G, G, G, G, G, E7, G} } ; 48 | 49 | 50 | #define STACKSZ 64 51 | 52 | 53 | static jmp_buf err_buf ; /* used to trap on error */ 54 | 55 | void 56 | RE_error_trap(int x) 57 | { 58 | REerrno = x ; 59 | longjmp(err_buf, 1) ; 60 | } 61 | 62 | 63 | PTR 64 | REcompile(const char* re, size_t re_len) 65 | { 66 | MACHINE m_stack[STACKSZ] ; 67 | struct op 68 | { 69 | int token ; 70 | int prec ; 71 | } 72 | op_stack[STACKSZ] ; 73 | register MACHINE *m_ptr ; 74 | register struct op *op_ptr ; 75 | register int t ; 76 | 77 | /* do this first because it also checks if we have a 78 | run time stack */ 79 | RE_lex_init(re, re_len) ; 80 | 81 | if (re_len == 0) 82 | { 83 | STATE *p = (STATE *) RE_malloc(sizeof(STATE)) ; 84 | p->type = M_ACCEPT ; 85 | return (PTR) p ; 86 | } 87 | 88 | if (setjmp(err_buf)) return (PTR) 0 ; 89 | /* we used to try to recover memory left on machine stack ; 90 | but now m_ptr is in a register so it won't be right unless 91 | we force it out of a register which isn't worth the trouble */ 92 | 93 | /* initialize the stacks */ 94 | m_ptr = m_stack - 1 ; 95 | op_ptr = op_stack ; 96 | op_ptr->token = 0 ; 97 | 98 | t = RE_lex(m_stack) ; 99 | 100 | while (1) 101 | { 102 | switch (t) 103 | { 104 | case T_STR: 105 | case T_ANY: 106 | case T_U: 107 | case T_START: 108 | case T_END: 109 | case T_CLASS: 110 | m_ptr++ ; 111 | break ; 112 | 113 | case 0: /* end of reg expr */ 114 | if (op_ptr->token == 0) 115 | { 116 | /* done */ 117 | if (m_ptr == m_stack) { 118 | /* if M_WAIT at end, replace with M_ACCEPT */ 119 | if ((m_ptr->stop -1)->type == M_WAIT) { 120 | (m_ptr->stop - 1)->type = M_ACCEPT ; 121 | } 122 | return (PTR) m_ptr->start ; 123 | } 124 | else 125 | { 126 | /* machines still on the stack */ 127 | RE_panic("values still on machine stack") ; 128 | } 129 | } 130 | 131 | /* otherwise fall thru to default 132 | which is operator case */ 133 | 134 | default: 135 | 136 | if ((op_ptr->prec = table[op_ptr->token][t]) == G) 137 | { 138 | do 139 | { /* op_pop */ 140 | 141 | if (op_ptr->token <= T_CAT) /*binary op*/ 142 | m_ptr-- ; 143 | /* if not enough values on machine stack 144 | then we have a missing operand */ 145 | if (m_ptr < m_stack) RE_error_trap(-E4) ; 146 | 147 | switch (op_ptr->token) 148 | { 149 | case T_CAT: 150 | RE_cat(m_ptr, m_ptr + 1) ; 151 | break ; 152 | 153 | case T_OR: 154 | RE_or(m_ptr, m_ptr + 1) ; 155 | break ; 156 | 157 | case T_STAR: 158 | RE_close(m_ptr) ; 159 | break ; 160 | 161 | case T_PLUS: 162 | RE_poscl(m_ptr) ; 163 | break ; 164 | 165 | case T_Q: 166 | RE_01(m_ptr) ; 167 | break ; 168 | 169 | default: 170 | /*nothing on ( or ) */ 171 | break ; 172 | } 173 | 174 | op_ptr-- ; 175 | } 176 | while (op_ptr->prec != L); 177 | 178 | continue ; /* back thru switch at top */ 179 | } 180 | 181 | if (op_ptr->prec < 0) 182 | { 183 | if (op_ptr->prec == E7) RE_panic("parser returns E7") ; 184 | else RE_error_trap(-op_ptr->prec) ; 185 | } 186 | 187 | if (++op_ptr == op_stack + STACKSZ) 188 | { 189 | /* stack overflow */ 190 | RE_error_trap(-E5) ; 191 | } 192 | 193 | op_ptr->token = t ; 194 | } /* end of switch */ 195 | 196 | if (m_ptr == m_stack + (STACKSZ - 1)) 197 | { 198 | /*overflow*/ 199 | RE_error_trap(-E5) ; 200 | } 201 | 202 | t = RE_lex(m_ptr + 1) ; 203 | } 204 | } 205 | 206 | 207 | /* getting here means a logic flaw or unforeseen case */ 208 | void 209 | RE_panic(const char *s) 210 | { 211 | fprintf(stderr, "REcompile() - panic: %s\n", s) ; 212 | exit(100) ; 213 | } 214 | -------------------------------------------------------------------------------- /rexp/rexp.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | rexp.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | #ifndef REXP_H 18 | #define REXP_H 19 | 20 | 21 | typedef void* PTR ; 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | typedef int Bool ; 30 | PTR REcompile(const char *,size_t) ; 31 | int REtest (const char *, size_t len, PTR) ; 32 | char *REmatch(const char *, size_t, PTR, size_t*, Bool) ; 33 | void REmprint(PTR , FILE*) ; 34 | int REempty(PTR) ; 35 | 36 | extern int REerrno ; 37 | extern const char* const REerrlist[] ; 38 | 39 | 40 | 41 | PTR RE_malloc(size_t) ; 42 | PTR RE_realloc(void *,size_t) ; 43 | 44 | 45 | /* finite machine state types */ 46 | 47 | #define M_STR 0 48 | #define M_CLASS 1 49 | #define M_ANY 2 50 | #define M_START 3 51 | #define M_END 4 52 | #define M_U 5 53 | #define M_1J 6 54 | #define M_2JA 7 55 | #define M_2JB 8 56 | #define M_WAIT 9 57 | #define M_ACCEPT 10 58 | #define U_ON 11 59 | 60 | #define U_OFF 0 61 | #define END_OFF 0 62 | #define END_ON (2*U_ON) 63 | 64 | 65 | typedef uint32_t BV[8] ; /* bit vector */ 66 | 67 | typedef struct 68 | { int type ; 69 | size_t len ; /* used for M_STR */ 70 | union 71 | { 72 | char *str ; /* string */ 73 | const BV* bvp ; /* class */ 74 | int jump ; 75 | } data ; 76 | } STATE ; 77 | 78 | #define STATESZ (sizeof(STATE)) 79 | 80 | typedef struct 81 | { STATE *start, *stop ; } MACHINE ; 82 | 83 | 84 | /* tokens */ 85 | #define T_OR 1 /* | */ 86 | #define T_CAT 2 87 | #define T_STAR 3 /* * */ 88 | #define T_PLUS 4 /* + */ 89 | #define T_Q 5 /* ? */ 90 | #define T_LP 6 /* ( */ 91 | #define T_RP 7 /* ) */ 92 | #define T_START 8 /* ^ */ 93 | #define T_END 9 /* $ */ 94 | #define T_ANY 10 /* . */ 95 | #define T_CLASS 11 /* starts with [ */ 96 | #define T_SLASH 12 /* \ */ 97 | #define T_CHAR 13 /* all the rest */ 98 | #define T_STR 14 99 | #define T_U 15 100 | 101 | /* precedences and error codes */ 102 | #define L 0 103 | #define EQ 1 104 | #define G 2 105 | #define E1 (-1) 106 | #define E2 (-2) 107 | #define E3 (-3) 108 | #define E4 (-4) 109 | #define E5 (-5) 110 | #define E6 (-6) 111 | #define E7 (-7) 112 | 113 | #define MEMORY_FAILURE 5 114 | 115 | #define ISON(b,x) ((b)[((unsigned char)(x))>>5] & (1<<((x)&31))) 116 | 117 | /* struct for the run time stack */ 118 | typedef struct { 119 | STATE *m ; /* save the machine ptr */ 120 | int u ; /* save the u_flag */ 121 | const char *s ; /* save the active string ptr */ 122 | const char *ss ; /* save the match start -- only used by REmatch */ 123 | } RT_STATE ; /* run time state */ 124 | 125 | /* error trap */ 126 | extern int REerrno ; 127 | void RE_error_trap(int) ; 128 | 129 | 130 | MACHINE RE_u(void) ; 131 | MACHINE RE_start(void) ; 132 | MACHINE RE_end(void) ; 133 | MACHINE RE_any(void) ; 134 | MACHINE RE_str(char *, size_t) ; 135 | MACHINE RE_class(const BV *) ; 136 | void RE_cat(MACHINE *, MACHINE *) ; 137 | void RE_or(MACHINE *, MACHINE *) ; 138 | void RE_close(MACHINE *) ; 139 | void RE_poscl(MACHINE *) ; 140 | void RE_01(MACHINE *) ; 141 | void RE_panic(const char *) ; 142 | char* str_str(const char *, size_t , const char *, size_t) ; 143 | 144 | void RE_lex_init (const char *,size_t) ; 145 | int RE_lex (MACHINE *) ; 146 | void RE_run_stack_init(void) ; 147 | RT_STATE * RE_new_run_stack(void) ; 148 | 149 | #endif /* REXP_H */ 150 | -------------------------------------------------------------------------------- /rexp/rexp2.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | rexp2.c 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | 18 | 19 | /* test a string against a machine */ 20 | 21 | #include "rexp.h" 22 | #include "wait.h" 23 | 24 | #define STACKGROWTH 16 25 | 26 | #ifdef DEBUG 27 | static RT_STATE* slow_push(RT_STATE *, STATE *, const char *, int) ; 28 | #endif 29 | 30 | 31 | RT_STATE *RE_run_stack_base ; 32 | RT_STATE *RE_run_stack_limit ; 33 | 34 | /* Large model DOS segment arithemetic breaks the current stack. 35 | This hack fixes it without rewriting the whole thing, 5/31/91 */ 36 | RT_STATE *RE_run_stack_empty ; 37 | 38 | void 39 | RE_run_stack_init() 40 | { 41 | if (!RE_run_stack_base) 42 | { 43 | RE_run_stack_base = (RT_STATE *) 44 | RE_malloc(sizeof(RT_STATE) * STACKGROWTH) ; 45 | RE_run_stack_limit = RE_run_stack_base + STACKGROWTH ; 46 | RE_run_stack_empty = RE_run_stack_base - 1 ; 47 | } 48 | } 49 | 50 | RT_STATE * 51 | RE_new_run_stack() 52 | { 53 | int oldsize = RE_run_stack_limit - RE_run_stack_base ; 54 | int newsize = oldsize + STACKGROWTH ; 55 | 56 | 57 | RE_run_stack_base = (RT_STATE *) realloc(RE_run_stack_base, 58 | newsize * sizeof(RT_STATE)) ; 59 | 60 | if (!RE_run_stack_base) 61 | { 62 | fprintf(stderr, "out of memory for RE run time stack\n") ; 63 | /* this is pretty unusual, I've only seen it happen on 64 | weird input to REmatch() under 16bit DOS , the same 65 | situation worked easily on 32bit machine. */ 66 | exit(100) ; 67 | } 68 | 69 | RE_run_stack_limit = RE_run_stack_base + newsize ; 70 | RE_run_stack_empty = RE_run_stack_base - 1 ; 71 | 72 | /* return the new stackp */ 73 | return RE_run_stack_base + oldsize ; 74 | } 75 | 76 | #ifdef DEBUG 77 | static RT_STATE * 78 | slow_push(RT_STATE* sp, STATE* m, const char* s, int u) 79 | { 80 | if (sp == RE_run_stack_limit) sp = RE_new_run_stack() ; 81 | sp->m = m ; sp->s = s ; sp->u = u ; 82 | return sp ; 83 | } 84 | #endif 85 | 86 | #ifdef DEBUG 87 | #define push(mx,sx,ux) stackp = slow_push(++stackp, mx, sx, ux) 88 | #else 89 | #define push(mx,sx,ux) if (++stackp == RE_run_stack_limit)\ 90 | stackp = RE_new_run_stack() ;\ 91 | stackp->m=(mx);stackp->s=(sx);stackp->u=(ux) 92 | #endif 93 | 94 | 95 | #define CASE_UANY(x) case x + U_OFF : case x + U_ON 96 | 97 | /* test if str ~ /machine/ 98 | */ 99 | 100 | int 101 | REtest(const char* str, size_t len, PTR machine) 102 | { 103 | register STATE *m = (STATE *) machine ; 104 | register const char *s = str ; 105 | register RT_STATE *stackp ; 106 | int u_flag ; 107 | const char *const str_end = str + len ; 108 | 109 | /* handle the easy case quickly */ 110 | if (m->type == M_STR && (m+1)->type == M_ACCEPT) { 111 | return str_str(s, len, m->data.str, m->len) != (char *) 0 ; 112 | } 113 | else 114 | { 115 | s = str - 1 ; /* ^ can be matched */ 116 | u_flag = U_ON ; 117 | stackp = RE_run_stack_empty ; 118 | if (wait_queue) empty_wait_queue() ; 119 | goto reswitch ; 120 | } 121 | 122 | refill : 123 | if (stackp == RE_run_stack_empty) { 124 | RTS_Node* wp ; 125 | if (wait_queue == 0) return 0 ; 126 | wp = wait_queue ; 127 | wait_queue = wp->link ; 128 | 129 | m = wp->state.m ; 130 | s = wp->state.s ; 131 | u_flag = wp->state.u ; 132 | /* put wp on free list */ 133 | wp->link = wait_free_list ; 134 | wait_free_list = wp ; 135 | m++ ; 136 | } 137 | else { 138 | m = stackp->m ; 139 | s = stackp->s ; 140 | u_flag = stackp->u ; 141 | stackp-- ; 142 | } 143 | 144 | reswitch : 145 | 146 | switch (m->type + u_flag) 147 | { 148 | case M_STR + U_OFF + END_OFF: 149 | if (s > str_end || (size_t)(str_end - s) < m->len || memcmp(s, m->data.str, m->len)) { 150 | goto refill ; 151 | } 152 | s += m->len ; m++ ; 153 | goto reswitch ; 154 | 155 | case M_STR + U_OFF + END_ON: 156 | if ((size_t)(str_end - s) != m->len || memcmp(s,m->data.str,m->len) != 0) { 157 | goto refill ; 158 | } 159 | s = str_end+1 ; 160 | m++ ; 161 | goto reswitch ; 162 | 163 | case M_STR + U_ON + END_OFF: 164 | if (s > str_end) goto refill ; 165 | if (s < str) s = str ; 166 | if (!(s = str_str(s, (size_t)(str_end - s) , m->data.str, m->len))) goto refill ; 167 | push(m, s + 1, U_ON) ; 168 | s += m->len ; m++ ; u_flag = U_OFF ; 169 | goto reswitch ; 170 | 171 | case M_STR + U_ON + END_ON: 172 | if (s > str_end) goto refill ; 173 | if (s < str) s = str ; 174 | { 175 | int t = ((size_t)(str_end - s)) - m->len ; 176 | if (t < 0 || memcmp(s + t, m->data.str, m->len) != 0) { 177 | goto refill ; 178 | } 179 | } 180 | s = str_end+1 ; 181 | m++ ; 182 | u_flag = U_OFF ; 183 | goto reswitch ; 184 | 185 | case M_CLASS + U_OFF + END_OFF: 186 | if (s >= str_end || !ISON(*m->data.bvp, s[0])) goto refill ; 187 | s++ ; m++ ; 188 | goto reswitch ; 189 | 190 | case M_CLASS + U_OFF + END_ON: 191 | if (s+1 != str_end || !ISON(*m->data.bvp, s[0])) goto refill ; 192 | s = str_end+1 ; 193 | m++ ; 194 | goto reswitch ; 195 | 196 | case M_CLASS + U_ON + END_OFF: 197 | if (s < str) s = str ; 198 | while (1) { 199 | if (s >= str_end) goto refill ; 200 | if (ISON(*m->data.bvp,s[0])) break ; 201 | s++ ; 202 | } 203 | s++ ; 204 | push(m, s, U_ON) ; 205 | m++ ; u_flag = U_OFF ; 206 | goto reswitch ; 207 | 208 | case M_CLASS + U_ON + END_ON: 209 | if (s < str) s = str ; 210 | if (s >= str_end || !ISON(*m->data.bvp, str_end[-1])) 211 | goto refill ; 212 | s = str_end+1 ; m++ ; u_flag = U_OFF ; 213 | goto reswitch ; 214 | 215 | case M_ANY + U_OFF + END_OFF: 216 | if (s >= str_end) goto refill ; 217 | s++ ; m++ ; 218 | goto reswitch ; 219 | 220 | case M_ANY + U_OFF + END_ON: 221 | if (s + 1 != str_end) goto refill ; 222 | s = str_end+1 ; 223 | m++ ; 224 | goto reswitch ; 225 | 226 | case M_ANY + U_ON + END_OFF: 227 | if (s < str) s = str ; 228 | if (s >= str_end) goto refill ; 229 | s++ ; 230 | push(m, s, U_ON) ; 231 | m++ ; u_flag = U_OFF ; 232 | goto reswitch ; 233 | 234 | case M_ANY + U_ON + END_ON: 235 | if (s < str) s = str ; 236 | if (s >= str_end) goto refill ; 237 | s = str_end+1 ; m++ ; u_flag = U_OFF ; 238 | goto reswitch ; 239 | 240 | case M_START + U_OFF + END_OFF: 241 | case M_START + U_ON + END_OFF: 242 | if (!(s < str)) goto refill ; 243 | s = str ; 244 | m++ ; u_flag = U_OFF ; 245 | goto reswitch ; 246 | 247 | case M_START + U_OFF + END_ON: 248 | case M_START + U_ON + END_ON: 249 | if (!(s < str) || str != str_end) goto refill ; 250 | s = str_end+1 ; 251 | m++ ; u_flag = U_OFF ; 252 | goto reswitch ; 253 | 254 | case M_END + U_OFF: 255 | if (s != str_end) goto refill ; 256 | s++ ; 257 | m++ ; 258 | goto reswitch ; 259 | 260 | case M_END + U_ON: 261 | if (s > str_end) goto refill ; 262 | s = str_end+1 ; 263 | m++ ; u_flag = U_OFF ; 264 | goto reswitch ; 265 | 266 | CASE_UANY(M_U): 267 | if (s < str) s = str ; 268 | u_flag = U_ON ; m++ ; 269 | goto reswitch ; 270 | 271 | CASE_UANY(M_1J): 272 | m += m->data.jump ; 273 | goto reswitch ; 274 | 275 | CASE_UANY(M_2JA): /* take the non jump branch */ 276 | push(m + m->data.jump, s, u_flag) ; 277 | m++ ; 278 | goto reswitch ; 279 | 280 | CASE_UANY(M_2JB): 281 | push(m + m->data.jump, s, u_flag) ; 282 | m++ ; 283 | goto reswitch ; 284 | 285 | CASE_UANY(M_WAIT): 286 | if (stackp != RE_run_stack_empty || wait_queue != 0) { 287 | wait_queue_insert(m,s,u_flag,0) ; 288 | goto refill ; 289 | } 290 | m++ ; 291 | goto reswitch ; 292 | 293 | CASE_UANY(M_ACCEPT): 294 | return 1 ; 295 | 296 | default: 297 | RE_panic("unexpected case in REtest") ; 298 | } 299 | /* not reached, shutup -Wall */ 300 | return 0 ; 301 | } 302 | 303 | 304 | 305 | #ifdef MAWK 306 | 307 | char * 308 | is_string_split(PTR q, size_t* lenp) 309 | { 310 | STATE *p; 311 | 312 | p = (STATE *)q; 313 | if (p && p[0].type == M_STR && p[1].type == M_ACCEPT) 314 | { 315 | *lenp = p->len ; 316 | return p->data.str ; 317 | } 318 | else return (char *) 0 ; 319 | } 320 | #else /* mawk provides its own str_str */ 321 | 322 | char * 323 | str_str(target, key, klen) 324 | register char *target ; 325 | register char *key ; 326 | unsigned klen ; 327 | { 328 | int c = key[0] ; 329 | 330 | switch (klen) 331 | { 332 | case 0: 333 | return (char *) 0 ; 334 | 335 | case 1: 336 | return strchr(target, c) ; 337 | 338 | case 2: 339 | { 340 | int c1 = key[1] ; 341 | 342 | while (target = strchr(target, c)) 343 | { 344 | if (target[1] == c1) return target ; 345 | else target++ ; 346 | } 347 | break ; 348 | } 349 | 350 | default: 351 | klen-- ; key++ ; 352 | while (target = strchr(target, c)) 353 | { 354 | if (memcmp(target + 1, key, klen) == 0) return target ; 355 | else target++ ; 356 | } 357 | break ; 358 | } 359 | return (char *) 0 ; 360 | } 361 | 362 | 363 | #endif /* MAWK */ 364 | -------------------------------------------------------------------------------- /rexp/rexpdb.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | rexpdb.c 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | 18 | #include "rexp.h" 19 | #include 20 | 21 | /* print a machine for debugging */ 22 | 23 | static const char *xlat[] = { 24 | "M_STR" , 25 | "M_CLASS" , 26 | "M_ANY" , 27 | "M_START" , 28 | "M_END" , 29 | "M_U", 30 | "M_1J" , 31 | "M_2JA" , 32 | "M_2JB" , 33 | "M_WAIT", 34 | "M_ACCEPT" } ; 35 | 36 | /* print string visibly */ 37 | static 38 | void visible_string(FILE* fp, const char* s, size_t len) 39 | { 40 | const char* const s_end = s + len ; 41 | 42 | fputc('"',fp) ; 43 | while(s < s_end) { 44 | int c = *s++ ; 45 | 46 | if (c == '"') { 47 | fprintf(fp, "\\\"") ; 48 | } 49 | else if (c == '\\') { 50 | fprintf(fp, "\\\\") ; 51 | } 52 | else if(c >= 32 && c < 127) { 53 | fprintf(fp, "%c", c) ; 54 | } 55 | else { 56 | fprintf(fp, "\\x%02x", c) ; 57 | } 58 | } 59 | fputc('"', fp) ; 60 | } 61 | 62 | 63 | void REmprint(void* m, FILE* f) 64 | { STATE *p = (STATE *) m ; 65 | const char *end_on_string ; 66 | STATE* last = 0 ; 67 | 68 | while ( 1 ) 69 | { 70 | if ( p->type >= END_ON ) 71 | { p->type -= END_ON ; end_on_string = "$" ; } 72 | else end_on_string = "" ; 73 | 74 | if ( p->type < 0 || p->type >= END_ON ) 75 | { fprintf(f, "unknown STATE type\n") ; return ; } 76 | 77 | fprintf(f, "%-10s" , xlat[p->type]) ; 78 | switch( p->type ) 79 | { 80 | case M_STR : visible_string(f,p->data.str,p->len) ; 81 | break ; 82 | 83 | case M_2JA: 84 | { 85 | STATE* q = p + p->data.jump ; 86 | if (q > last) last = q ; 87 | } 88 | /* fall thru */ 89 | case M_1J: 90 | case M_2JB : fprintf(f, "%d", p->data.jump) ; 91 | break ; 92 | case M_CLASS: 93 | { 94 | const BV* bvp = p->data.bvp ; 95 | unsigned i ; 96 | fprintf(f, "%x", (*bvp)[0]) ; 97 | for(i=1; i < 8; i++) { 98 | fprintf(f, " %x", (*bvp)[i]) ; 99 | } 100 | } 101 | break ; 102 | } 103 | fprintf(f, "%s\n" , end_on_string) ; 104 | if ( end_on_string[0] ) p->type += END_ON ; 105 | if ( p->type == M_ACCEPT && p >= last) return ; 106 | p++ ; 107 | } 108 | } 109 | 110 | -------------------------------------------------------------------------------- /rexp/wait.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | wait.c 4 | copyright 2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | #include "wait.h" 18 | 19 | RTS_Node* wait_queue ; 20 | RTS_Node* wait_free_list ; 21 | 22 | RTS_Node* rts_node_malloc(RT_STATE rts) 23 | { 24 | RTS_Node* ret ; 25 | if (wait_free_list) { 26 | ret = wait_free_list ; 27 | wait_free_list = ret->link ; 28 | } 29 | else { 30 | ret = (RTS_Node*) RE_malloc(sizeof(RTS_Node)) ; 31 | } 32 | ret->state = rts ; 33 | ret->link = 0 ; 34 | return ret ; 35 | } 36 | 37 | /* move the wait_queue to the wait_free_list */ 38 | void empty_wait_queue() 39 | { 40 | RTS_Node* p = wait_queue ; 41 | wait_queue = 0 ; 42 | if (wait_free_list == 0) wait_free_list = p ; 43 | else { /* find the tail and append */ 44 | RTS_Node* q = wait_free_list ; 45 | while(q->link) q = q->link ; 46 | q->link = p ; 47 | } 48 | } 49 | 50 | int rt_state_lt(RT_STATE* r1, RT_STATE* r2) { 51 | if (r1->s < r2->s) return 1 ; 52 | if (r1->s > r2->s) return 0 ; 53 | if (r1->m < r2->m) return 1 ; 54 | if (r1->m > r2->m) return 0 ; 55 | if (r1->u < r2->u) return 1 ; 56 | if (r1->u > r2->u) return 0 ; 57 | return r1->ss < r2->ss ; 58 | } 59 | 60 | /* wait queue is priority queue on ordering rt_state_lt() 61 | Currently is ordered singley linked list. Appears it 62 | doesn't get big enough require a binary tree implementation 63 | */ 64 | void wait_queue_insert(STATE* m, const char* s, int u, const char* ss) 65 | { 66 | 67 | RT_STATE rts ; 68 | RTS_Node* p = wait_queue ; 69 | RTS_Node* q = 0 ; 70 | 71 | rts.m = m ; 72 | rts.s = s ; 73 | rts.u = u ; 74 | rts.ss = ss ; 75 | 76 | while(p) { 77 | if (rt_state_lt(&rts, &p->state)) { 78 | /* insert in front of p */ 79 | RTS_Node* x = rts_node_malloc(rts) ; 80 | x->link = p ; 81 | if (q == 0) { 82 | wait_queue = x ; 83 | } 84 | else { 85 | q->link = x ; 86 | } 87 | return ; 88 | } 89 | else if (rts.m == p->state.m && 90 | rts.s == p->state.s && 91 | rts.u == p->state.u && 92 | rts.ss == p->state.ss) { 93 | /* already in the list */ 94 | return ; 95 | } 96 | q = p ; 97 | p = p->link ; 98 | } 99 | /* add to tail of list */ 100 | p = rts_node_malloc(rts) ; 101 | if (wait_queue == 0) { 102 | wait_queue = p ; 103 | } 104 | else q->link = p ; 105 | } 106 | 107 | -------------------------------------------------------------------------------- /rexp/wait.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | wait.h 4 | copyright 2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | #ifndef RE_WAIT_H 17 | #define RE_WAIT_H 18 | 19 | #include "rexp.h" 20 | 21 | typedef struct rts_node { 22 | struct rts_node* link ; 23 | RT_STATE state ; 24 | } RTS_Node ; 25 | 26 | extern RTS_Node* wait_queue ; 27 | extern RTS_Node* wait_free_list ; 28 | 29 | void wait_queue_insert(STATE* m, const char* s, int u, const char* ss) ; 30 | void empty_wait_queue(void) ; 31 | 32 | #endif 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /scan.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | scan.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | 18 | /* scan.h */ 19 | 20 | #ifndef SCAN_H_INCLUDED 21 | #define SCAN_H_INCLUDED 1 22 | 23 | #include 24 | 25 | #ifndef MAKESCAN 26 | #include "symtype.h" 27 | #include "parse.h" 28 | #endif 29 | 30 | extern int scan_code[256] ; 31 | 32 | /* the scan codes to compactify the main switch */ 33 | 34 | #define SC_SPACE 1 35 | #define SC_NL 2 36 | #define SC_SEMI_COLON 3 37 | #define SC_FAKE_SEMI_COLON 4 38 | #define SC_LBRACE 5 39 | #define SC_RBRACE 6 40 | #define SC_QMARK 7 41 | #define SC_COLON 8 42 | #define SC_OR 9 43 | #define SC_AND 10 44 | #define SC_PLUS 11 45 | #define SC_MINUS 12 46 | #define SC_MUL 13 47 | #define SC_DIV 14 48 | #define SC_MOD 15 49 | #define SC_POW 16 50 | #define SC_LPAREN 17 51 | #define SC_RPAREN 18 52 | #define SC_LBOX 19 53 | #define SC_RBOX 20 54 | #define SC_IDCHAR 21 55 | #define SC_DIGIT 22 56 | #define SC_DQUOTE 23 57 | #define SC_ESCAPE 24 58 | #define SC_COMMENT 25 59 | #define SC_EQUAL 26 60 | #define SC_NOT 27 61 | #define SC_LT 28 62 | #define SC_GT 29 63 | #define SC_COMMA 30 64 | #define SC_DOT 31 65 | #define SC_MATCH 32 66 | #define SC_DOLLAR 33 67 | #define SC_UNEXPECTED 34 68 | 69 | #ifndef MAKESCAN 70 | 71 | void eat_nl(void) ; 72 | 73 | /* in error.c */ 74 | void unexpected_char(void) ; 75 | 76 | #define ct_ret(x) return current_token = (x) 77 | 78 | #define next() (*buffp ? *buffp++ : slow_next()) 79 | #define un_next() buffp-- 80 | 81 | #define test1_ret(c,x,d) if ( next() == (c) ) ct_ret(x) ;\ 82 | else { un_next() ; ct_ret(d) ; } 83 | 84 | #define test2_ret(c1,x1,c2,x2,d) switch( next() )\ 85 | { case c1: ct_ret(x1) ;\ 86 | case c2: ct_ret(x2) ;\ 87 | default: un_next() ;\ 88 | ct_ret(d) ; } 89 | #endif /* ! MAKESCAN */ 90 | #endif 91 | -------------------------------------------------------------------------------- /scancode.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | /* scancode.c */ 4 | /* generated from makescan.c */ 5 | /* Sun Jul 24 15:02:09 PDT 2016 */ 6 | 7 | 8 | int scan_code[256] = { 9 | 0,34,34,34,34,34,34,34,34, 1, 2, 1, 1, 1,34,34, 10 | 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 11 | 1,27,23,25,33,15,10,34,17,18,13,11,30,12,31,14, 12 | 22,22,22,22,22,22,22,22,22,22, 8, 3,28,26,29, 7, 13 | 34,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21, 14 | 21,21,21,21,21,21,21,21,21,21,21,19,24,20,16,21, 15 | 34,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21, 16 | 21,21,21,21,21,21,21,21,21,21,21, 5, 9, 6,32,34, 17 | 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 18 | 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 19 | 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 20 | 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 21 | 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 22 | 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 23 | 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34, 24 | 34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34 25 | } ; 26 | -------------------------------------------------------------------------------- /sizes.h: -------------------------------------------------------------------------------- 1 | /******************************************** 2 | sizes.h 3 | copyright 1991-1995,2014-2016. Michael D. Brennan 4 | 5 | This is a source file for mawk, an implementation of 6 | the AWK programming language. 7 | 8 | Mawk is distributed without warranty under the terms of 9 | the GNU General Public License, version 3, 2007. 10 | 11 | If you import elements of this code into another product, 12 | you agree to not name that product mawk. 13 | ********************************************/ 14 | 15 | 16 | /* sizes.h */ 17 | 18 | #ifndef SIZES_H 19 | #define SIZES_H 20 | 21 | #define MAX__INT 0x7fffffff 22 | 23 | 24 | #define EVAL_STACK_SIZE 256 /* initial size , can grow */ 25 | 26 | /* 27 | * FBANK_SZ, the number of fields at startup, must be a power of 2. 28 | * 29 | */ 30 | #define FBANK_SZ 1024 31 | #define FB_SHIFT 10 /* lg(FBANK_SZ) */ 32 | 33 | /* 34 | * initial size of sprintf buffer 35 | */ 36 | #define SPRINTF_LIMIT 8192 37 | 38 | #define BUFFSZ 4096 39 | #define FINBUFFSZ 8192 40 | /* starting buffer size for input files, grows if 41 | necessary */ 42 | 43 | #define HASH_PRIME 113 44 | 45 | #define MAX_COMPILE_ERRORS 5 /* quit if more than 4 errors */ 46 | 47 | #endif /* SIZES_H */ 48 | -------------------------------------------------------------------------------- /split.c: -------------------------------------------------------------------------------- 1 | /******************************************** 2 | split.c 3 | copyright 1991-1996,2014-2016 Michael D. Brennan 4 | 5 | This is a source file for mawk, an implementation of 6 | the AWK programming language. 7 | 8 | Mawk is distributed without warranty under the terms of 9 | the GNU General Public License, version 3, 2007. 10 | 11 | If you import elements of this code into another product, 12 | you agree to not name that product mawk. 13 | ********************************************/ 14 | 15 | 16 | /* split.c */ 17 | 18 | #include "mawk.h" 19 | #include "split.h" 20 | #include "symtype.h" 21 | #include "bi_vars.h" 22 | #include "bi_funct.h" 23 | #include "memory.h" 24 | #include "scan.h" 25 | #include "regexp.h" 26 | #include "repl.h" 27 | #include "field.h" 28 | 29 | #ifdef MEM_CHECK 30 | #define SP_SIZE 4 /* exercises split_block_list code */ 31 | #else 32 | #define SP_SIZE 2048 33 | #endif 34 | 35 | typedef struct split_block { 36 | STRING *strings[SP_SIZE]; 37 | struct split_block *link; 38 | } Split_Block_Node; 39 | 40 | static Split_Block_Node split_block_base; 41 | static Split_Block_Node *split_block_list = &split_block_base; 42 | 43 | /* usually the list is of size 1 44 | the list never gets smaller than size 1 45 | this function returns a bigger list to size 1 46 | */ 47 | 48 | static void 49 | spb_list_shrink(void) 50 | { 51 | Split_Block_Node *p = split_block_list->link; 52 | split_block_list->link = 0; 53 | while (p) { 54 | Split_Block_Node *hold = p; 55 | p = p->link; 56 | free(hold); 57 | } 58 | } 59 | 60 | /* this function is passed a pointer to the tail of the list, 61 | adds a new node and returns the new tail 62 | This makes the list one node bigger 63 | */ 64 | 65 | static Split_Block_Node * 66 | grow_sp_list(Split_Block_Node * tail) 67 | { 68 | tail->link = (Split_Block_Node *) emalloc(sizeof(Split_Block_Node)); 69 | tail = tail->link; 70 | tail->link = 0; 71 | return tail; 72 | } 73 | 74 | /* 75 | * Split string s of length slen on SPACE without changing s. 76 | * Load the pieces into STRINGS 77 | * return the number of pieces 78 | */ 79 | size_t 80 | space_split(const char *s, size_t slen) 81 | { 82 | size_t cnt = 0; 83 | const char *end = s + slen; 84 | Split_Block_Node *node_p = split_block_list; 85 | unsigned idx = 0; 86 | 87 | while (1) { 88 | /* eat space */ 89 | while (scan_code[*(const unsigned char *) s] == SC_SPACE) { 90 | s++; 91 | } 92 | if (s == end) { 93 | return cnt; 94 | } 95 | /* find one field */ 96 | { 97 | const char *q = s++; /* q is front of field */ 98 | while (s < end && scan_code[*(const unsigned char *) s] != SC_SPACE) 99 | s++; 100 | /* create and store the string field */ 101 | node_p->strings[idx] = new_STRING2(q, s - q); 102 | cnt++; 103 | if (++idx == SP_SIZE) { 104 | idx = 0; 105 | node_p = grow_sp_list(node_p); 106 | } 107 | } 108 | } 109 | /* not reached */ 110 | } 111 | 112 | size_t 113 | re_split(const char *s, size_t slen, PTR re) 114 | { 115 | size_t cnt = 0; 116 | const char *end = s + slen; 117 | Split_Block_Node *node_p = split_block_list; 118 | unsigned idx = 0; 119 | int no_front_match = 0; 120 | 121 | if (slen == 0) { 122 | return 0; 123 | } 124 | 125 | while (s < end) { 126 | size_t mlen; 127 | const char *m = re_pos_match(s, end - s, re, &mlen, no_front_match); 128 | 129 | no_front_match = 1; /* future matches don't match ^ */ 130 | if (m) { 131 | /* stuff in front of match is a field, might have length zero */ 132 | node_p->strings[idx] = new_STRING2(s, m - s); 133 | cnt++; 134 | if (++idx == SP_SIZE) { 135 | idx = 0; 136 | node_p = grow_sp_list(node_p); 137 | } 138 | s = m + mlen; 139 | } else { 140 | /* no match so last field is what's left */ 141 | node_p->strings[idx] = new_STRING2(s, end - s); 142 | /* done so don't need to increment idx */ 143 | return ++cnt; 144 | } 145 | } 146 | /* last match at end of s, so last field is "" */ 147 | node_p->strings[idx] = new_STRING0(0); 148 | return ++cnt; 149 | } 150 | 151 | /* match a string with a regular expression, but 152 | * only matches of positive length count 153 | * input a string str and its length 154 | * return is match point else 0 if no match 155 | * length of match is returned in *lenp 156 | * 157 | * no_front_match -- hook for str being in middle of a bigger string 158 | */ 159 | char * 160 | re_pos_match(const char *str, size_t str_len, PTR re, size_t *lenp, 161 | int no_front_match) 162 | { 163 | const char *end = str + str_len; 164 | 165 | while (str < end) { 166 | char *match = REmatch(str, end - str, re, lenp, no_front_match); 167 | if (match) { 168 | if (*lenp) { 169 | /* match of positive length so done */ 170 | return match; 171 | } else { 172 | /* match but zero length, move str forward and try again */ 173 | /* note this match must have occured at front of str */ 174 | str = match + 1; 175 | no_front_match = 1; 176 | } 177 | } else { 178 | /* no match */ 179 | *lenp = 0; 180 | return 0; 181 | } 182 | } 183 | *lenp = 0; 184 | return 0; 185 | } 186 | 187 | /* like space split but splits s into single character strings */ 188 | 189 | size_t 190 | null_split(const char *s, size_t slen) 191 | { 192 | const char *end = s + slen; 193 | Split_Block_Node *node_p = split_block_list; 194 | unsigned idx = 0; 195 | 196 | while (s < end) { 197 | node_p->strings[idx] = new_STRING2(s++, 1); 198 | if (++idx == SP_SIZE) { 199 | idx = 0; 200 | node_p = grow_sp_list(node_p); 201 | } 202 | } 203 | return slen; 204 | } 205 | 206 | /* The caller knows there are cnt STRING* in the split_block_list 207 | * buffers. This function uses them to make CELLs in cp[] 208 | * The target CELLs are virgin, they don't need to be 209 | * destroyed 210 | * 211 | */ 212 | 213 | void 214 | transfer_to_array(CELL cp[], size_t cnt) 215 | { 216 | Split_Block_Node *node_p = split_block_list; 217 | unsigned idx = 0; 218 | while (cnt > 0) { 219 | cp->type = C_MBSTRN; 220 | cp->ptr = (PTR) node_p->strings[idx]; 221 | cnt--; 222 | cp++; 223 | if (++idx == SP_SIZE) { 224 | idx = 0; 225 | node_p = node_p->link; 226 | } 227 | } 228 | if (node_p != split_block_list) 229 | spb_list_shrink(); 230 | } 231 | 232 | /* like above but transfers the saved pieces to $1, $2 ... $cnt 233 | * The target CELLs may be string type so need to be destroyed 234 | * The caller has made sure the target CELLs exist 235 | * 236 | */ 237 | 238 | void 239 | transfer_to_fields(size_t cnt) 240 | { 241 | CELL *fp = &field[1]; /* start with $1 */ 242 | CELL *fp_end = &field[FBANK_SZ]; 243 | Split_Block_Node *node_p = split_block_list; 244 | unsigned idx = 0; 245 | unsigned fb_idx = 0; 246 | 247 | while (cnt > 0) { 248 | cell_destroy(fp); 249 | fp->type = C_MBSTRN; 250 | fp->ptr = (PTR) node_p->strings[idx]; 251 | cnt--; 252 | if (++idx == SP_SIZE) { 253 | idx = 0; 254 | node_p = node_p->link; 255 | } 256 | if (++fp == fp_end) { 257 | fb_idx++; 258 | fp = &fbankv[fb_idx][0]; 259 | fp_end = fp + FBANK_SZ; 260 | } 261 | } 262 | if (node_p != split_block_list) { 263 | spb_list_shrink(); 264 | } 265 | } 266 | 267 | /* 268 | * split(s, X, r) 269 | * split s into array X on r 270 | * 271 | * mawk state is EXECUTION sp points at top of eval_stack[] 272 | * 273 | * entry: sp[0] holds r 274 | * sp[-1] pts at X 275 | * sp[-2] holds s 276 | * 277 | exit : sp is 2 less, sp[0] is C_DOUBLE CELL with value equal 278 | to the number of split pieces 279 | */ 280 | CELL * 281 | bi_split(CELL *sp) 282 | { 283 | size_t cnt = 0; /* the number of pieces */ 284 | 285 | if (sp->type < C_RE) 286 | cast_for_split(sp); 287 | /* can be C_RE, C_SPACE or C_SNULL */ 288 | sp -= 2; 289 | if (sp->type < C_STRING) 290 | cast1_to_s(sp); 291 | 292 | if (string(sp)->len == 0) { /* nothing to split */ 293 | cnt = 0; 294 | } else { 295 | switch ((sp + 2)->type) { 296 | case C_RE: 297 | cnt = re_split(string(sp)->str, string(sp)->len, 298 | (sp + 2)->ptr); 299 | break; 300 | 301 | case C_SPACE: 302 | cnt = space_split(string(sp)->str, string(sp)->len); 303 | break; 304 | 305 | case C_SNULL: /* split on empty string */ 306 | cnt = null_split(string(sp)->str, string(sp)->len); 307 | break; 308 | 309 | default: 310 | bozo("bad splitting cell in bi_split"); 311 | } 312 | } 313 | 314 | free_STRING(string(sp)); 315 | sp->type = C_DOUBLE; 316 | sp->dval = (double) cnt; 317 | 318 | array_load((ARRAY) (sp + 1)->ptr, cnt); 319 | 320 | return sp; 321 | } 322 | -------------------------------------------------------------------------------- /split.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | split.h 4 | copyright 2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | #ifndef SPLIT_H 17 | #define SPLIT_H 18 | 19 | /* 20 | 21 | Splitting is a two step process: 22 | There is a call to a split function which loads the pieces 23 | into a private buffer and returns a count. 24 | 25 | The caller next imediately calls 26 | transfer_to_fields(size_t) or 27 | transfer_to_array(CELL[],size_t) 28 | 29 | to get the pieces out of the anonymous private buffer 30 | 31 | We can split one of three ways: 32 | (1) By space: 33 | space_split() 34 | (2) By regular expression: 35 | re_split() 36 | (3) By "" (null -- split into characters) 37 | null_split() 38 | */ 39 | 40 | size_t space_split(const char *str, size_t slen); 41 | size_t null_split(const char *str, size_t len); 42 | size_t re_split(const char *str, size_t len, PTR re); 43 | 44 | /* the two transfer functions */ 45 | void transfer_to_fields(size_t cnt); 46 | void transfer_to_array(CELL[], size_t cnt); 47 | 48 | #endif /* SPLIT_H */ 49 | -------------------------------------------------------------------------------- /symtype.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | symtype.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | /* types related to symbols are defined here */ 18 | 19 | #ifndef SYMTYPE_H 20 | #define SYMTYPE_H 21 | 22 | 23 | /* struct to hold info about builtins */ 24 | typedef struct { 25 | const char *name ; 26 | PF_CP fp ; /* ptr to function that does the builtin */ 27 | unsigned char min_args, max_args ; 28 | /* info for parser to check correct number of arguments */ 29 | } BI_REC ; 30 | 31 | /*--------------------------- 32 | structures and types for arrays 33 | *--------------------------*/ 34 | 35 | #include "array.h" 36 | 37 | extern ARRAY Argv ; 38 | 39 | 40 | /* for parsing (i,j) in A */ 41 | typedef struct { 42 | int start ; /* offset to code_base */ 43 | int cnt ; 44 | } ARG2_REC ; 45 | 46 | /*------------------------ 47 | user defined functions 48 | ------------------------*/ 49 | 50 | typedef struct fblock { 51 | const char *name ; 52 | INST *code ; 53 | unsigned short nargs ; 54 | char *typev ; /* array of size nargs holding types */ 55 | } FBLOCK ; /* function block */ 56 | 57 | void add_to_fdump_list(FBLOCK *) ; 58 | void fdump(void) ; 59 | 60 | /*------------------------- 61 | elements of the symbol table 62 | -----------------------*/ 63 | 64 | #define ST_NONE 0 65 | #define ST_VAR 1 66 | #define ST_KEYWORD 2 67 | #define ST_BUILTIN 3 /* a pointer to a builtin record */ 68 | #define ST_ARRAY 4 /* a void * ptr to a hash table */ 69 | #define ST_FIELD 5 /* a cell ptr to a field */ 70 | #define ST_FUNCT 6 71 | #define ST_NR 7 /* NR is special */ 72 | #define ST_ENV 8 /* and so is ENVIRON */ 73 | #define ST_UNUSED 9 74 | #define ST_LOCAL_NONE 10 75 | #define ST_LOCAL_VAR 11 76 | #define ST_LOCAL_ARRAY 12 77 | 78 | #define is_local(stp) ((stp)->type>=ST_LOCAL_NONE) 79 | 80 | typedef struct { 81 | const char *name ; 82 | int type ; 83 | unsigned offset ; /* offset in stack frame for local vars */ 84 | union { 85 | CELL *cp ; 86 | int kw ; 87 | PF_CP fp ; 88 | BI_REC *bip ; 89 | ARRAY array ; 90 | FBLOCK *fbp ; 91 | } stval ; 92 | } SYMTAB ; 93 | 94 | /* for patching length(A) when A is local and type unknown */ 95 | typedef struct { 96 | FBLOCK* fbp ; 97 | unsigned offset ; 98 | } Local_PI ; 99 | 100 | 101 | /***************************** 102 | structures for type checking function calls 103 | ******************************/ 104 | 105 | typedef struct ca_rec { 106 | struct ca_rec *link ; 107 | short type ; 108 | short arg_num ; /* position in callee's stack */ 109 | /*--------- this data only set if we'll need to patch -------*/ 110 | /* happens if argument is an ID or type ST_NONE or ST_LOCAL_NONE */ 111 | 112 | int call_offset ; 113 | /* where the type is stored */ 114 | SYMTAB *sym_p ; /* if type is ST_NONE */ 115 | char *type_p ; /* if type is ST_LOCAL_NONE */ 116 | } CA_REC ; /* call argument record */ 117 | 118 | /* type field of CA_REC matches with ST_ types */ 119 | #define CA_EXPR ST_LOCAL_VAR 120 | #define CA_ARRAY ST_LOCAL_ARRAY 121 | 122 | typedef struct fcall { 123 | struct fcall *link ; 124 | FBLOCK *callee ; 125 | short call_scope ; 126 | short move_level ; 127 | FBLOCK *call ; /* only used if call_scope == SCOPE_FUNCT */ 128 | INST *call_start ; /* computed later as code may be moved */ 129 | CA_REC *arg_list ; 130 | short arg_cnt_checked ; 131 | unsigned line_no ; /* for error messages */ 132 | } FCALL_REC ; 133 | 134 | extern FCALL_REC *resolve_list ; 135 | 136 | void resolve_fcalls(void) ; 137 | void check_fcall(FBLOCK*,int,int,FBLOCK*,CA_REC*,unsigned) ; 138 | void relocate_resolve_list(int,int,FBLOCK*,int,unsigned,int) ; 139 | 140 | /* hash.c */ 141 | unsigned hash(const char *) ; 142 | unsigned hash2(const char*,size_t) ; 143 | SYMTAB * insert(const char *) ; 144 | SYMTAB * find(const char *) ; 145 | const char * reverse_find(int, PTR) ; 146 | SYMTAB * save_id(const char *) ; 147 | void restore_ids(void) ; 148 | 149 | /* error.c */ 150 | void type_error(SYMTAB *) ; 151 | 152 | #endif /* SYMTYPE_H */ 153 | -------------------------------------------------------------------------------- /test/decl-awk.out: -------------------------------------------------------------------------------- 1 | hash: function returning unsigned (extern) 2 | last_dhash: unsigned (static) 3 | A: ARRAY 4 | sval: pointer to STRING 5 | cflag: int 6 | A: ARRAY 7 | d: double 8 | cflag: int 9 | ap: pointer to ANODE 10 | signal: function returning pointer to function returning void 11 | -------------------------------------------------------------------------------- /test/fpe_test: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # tests if mawk has been compiled to correctly handle 4 | # floating point exceptions 5 | # 6 | # $Log: fpe_test,v $ 7 | # Revision 1.1.1.1 2014/09/22 17:58:28 mike 8 | # Import mawk 1.3.3 9 | # 10 | # Revision 1.3 1995/08/29 14:17:18 mike 11 | # exit 2 changes 12 | # 13 | # Revision 1.2 1994/12/18 18:51:55 mike 14 | # recognize NAN printed as ? for hpux 15 | # 16 | 17 | PATH=.:$PATH 18 | 19 | test1='BEGIN{ print 4/0 }' 20 | 21 | 22 | test2='BEGIN { 23 | x = 100 24 | do { y = x ; x *= 1000 } while ( y != x ) 25 | print "loop terminated" 26 | }' 27 | 28 | test3='BEGIN{ print log(-8) }' 29 | 30 | 31 | echo "testing division by zero" 32 | echo mawk "$test1" 33 | mawk "$test1" 34 | ret1=$? 35 | echo 36 | 37 | echo "testing overflow" 38 | echo mawk "$test2" 39 | mawk "$test2" 40 | ret2=$? 41 | echo 42 | 43 | echo "testing domain error" 44 | echo mawk "$test3" 45 | mawk "$test3" > temp$$ 46 | ret3=$? 47 | cat temp$$ 48 | echo 49 | 50 | 51 | # the returns should all be zero or all 2 52 | # core dumps not allowed 53 | 54 | trap ' 55 | echo compilation defines for floating point are incorrect 56 | rm -f temp$$ 57 | exit 1' 0 58 | 59 | echo 60 | echo ============================== 61 | 62 | echo return1 = $ret1 63 | echo return2 = $ret2 64 | echo return3 = $ret3 65 | 66 | 67 | [ $ret1 -gt 128 ] && { echo test1 failed ; exception=1 ; } 68 | [ $ret2 -gt 128 ] && { echo test2 failed ; exception=1 ; } 69 | [ $ret3 -gt 128 ] && { echo test3 failed ; exception=1 ; } 70 | 71 | [ "$exception" = 1 ] && { rm -f core temp$$ ; exit 1 ; } 72 | 73 | 74 | same=0 75 | 76 | [ $ret1 = $ret2 ] && [ $ret2 = $ret3 ] && same=1 77 | 78 | 79 | if [ $same = 1 ] 80 | then 81 | if [ $ret1 = 0 ] 82 | then 83 | echo results consistent: ignoring floating exceptions 84 | # some versions of hpux print NAN as ? 85 | if egrep '[nN][aA][nN]|\?' temp$$ > /dev/null 86 | then : 87 | else 88 | echo "but the library is not IEEE754 compatible" 89 | echo "test 3 failed" 90 | exit 1 91 | fi 92 | else echo results consistent: trapping floating exceptions 93 | fi 94 | 95 | trap 0 96 | rm -f temp$$ 97 | exit 0 98 | 99 | else 100 | echo results are not consistent 101 | echo 'return values should all be 0 if ignoring FPEs (e.g. with IEEE754) 102 | or all 2 if trapping FPEs' 103 | 104 | exit 1 105 | fi 106 | 107 | -------------------------------------------------------------------------------- /test/fpetest1.awk: -------------------------------------------------------------------------------- 1 | BEGIN{ print 4/0 } 2 | -------------------------------------------------------------------------------- /test/fpetest2.awk: -------------------------------------------------------------------------------- 1 | BEGIN { 2 | x = 100 3 | do { y = x ; x *= 1000 } while ( y != x ) 4 | print "loop terminated" 5 | } 6 | -------------------------------------------------------------------------------- /test/fpetest3.awk: -------------------------------------------------------------------------------- 1 | BEGIN{ print log(-8) } 2 | -------------------------------------------------------------------------------- /test/mawktest: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This is a simple test that a new made mawk seems to 4 | # be working OK. 5 | # It's certainly not exhaustive, but the last two tests in 6 | # particular use most features. 7 | # 8 | # mawk needs to be in .. or $PATH 9 | 10 | dat=mawktest.dat 11 | 12 | trap 'echo mawk_test failed ; rm -f temp$$ ; exit 1' 0 13 | 14 | PATH=..:$PATH ; export PATH 15 | 16 | # find out which mawk we're testing 17 | mawk -W version 18 | 19 | 20 | ################################# 21 | echo 22 | echo testing input and field splitting 23 | 24 | mawk -f wc.awk $dat | diff - wc-awk.out || exit 25 | 26 | echo input and field splitting OK 27 | ##################################### 28 | 29 | echo 30 | echo testing regular expression matching 31 | mawk -f reg0.awk $dat > temp$$ 32 | mawk -f reg1.awk $dat >> temp$$ 33 | mawk -f reg2.awk $dat >> temp$$ 34 | 35 | diff reg-awk.out temp$$ || exit 36 | 37 | echo regular expression matching OK 38 | ####################################### 39 | 40 | echo 41 | echo testing arrays and flow of control 42 | 43 | mawk -f wfrq0.awk $dat | diff - wfrq-awk.out || exit 44 | 45 | echo array test OK 46 | ################################# 47 | 48 | echo 49 | echo testing input/output with pipes 50 | AWK=mawk ./pipetest | diff - pipetest.out 51 | 52 | echo pipe IO OK 53 | 54 | ################### 55 | 56 | echo 57 | echo testing function calls and general stress test 58 | 59 | mawk -f ../examples/decl.awk $dat | diff - decl-awk.out || exit 60 | 61 | echo general stress test passed 62 | 63 | 64 | echo 65 | echo tested mawk seems OK 66 | 67 | trap 0 68 | rm -f temp$$ 69 | exit 0 70 | -------------------------------------------------------------------------------- /test/mawktest.dat: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | extern unsigned hash() ; 5 | 6 | /* An array A is a pointer to an array of struct array, 7 | which is two hash tables in one. One for strings 8 | and one for doubles. 9 | 10 | each array is of size A_HASH_PRIME. 11 | 12 | When an index is deleted via delete A[i], the 13 | ANODE is not removed from the hash chain. A[i].cp 14 | and A[i].sval are both freed and sval is set NULL. 15 | This method of deletion simplifies for( i in A ) loops. 16 | 17 | On the D_ANODE list, we use real deletion and move to the 18 | front on access. 19 | 20 | Separate nodes (as opposed to one type of node on two lists) 21 | to 22 | (1) d1 != d2, but sprintf(A_FMT,d1) == sprintf(A_FMT,d1) 23 | so two dnodes can point at the same anode. 24 | (2) Save a little data space(64K PC mentality). 25 | 26 | the cost is an extra level of indirection. 27 | 28 | Some care is needed so that things like 29 | A[1] = 2 ; delete A["1"] work . 30 | */ 31 | 32 | #define _dhash(d) (((int)(d)&0x7fff)%A_HASH_PRIME) 33 | #define DHASH(d) (last_dhash=_dhash(d)) 34 | static unsigned last_dhash ; 35 | 36 | /* switch =======;;;;;;hhhh */ 37 | 38 | static ANODE *find_by_sval(A, sval, cflag) 39 | ARRAY A ; 40 | STRING *sval ; 41 | int cflag ; /* create if on */ 42 | { 43 | char *s = sval->str ; 44 | unsigned h = hash(s) % A_HASH_PRIME ; 45 | register ANODE *p = A[h].link ; 46 | ANODE *q = 0 ; /* holds first deleted ANODE */ 47 | 48 | while ( p ) 49 | { 50 | if ( p->sval ) 51 | { if ( strcmp(s,p->sval->str) == 0 ) return p ; } 52 | else /* its deleted, mark with q */ 53 | if ( ! q ) q = p ; 54 | 55 | p = p->link ; 56 | } 57 | 58 | /* not there */ 59 | if ( cflag ) 60 | { 61 | if ( q ) p = q ; /* reuse the deleted node q */ 62 | else 63 | { p = (ANODE *)zmalloc(sizeof(ANODE)) ; 64 | p->link = A[h].link ; A[h].link = p ; 65 | } 66 | 67 | p->sval = sval ; 68 | sval->ref_cnt++ ; 69 | p->cp = (CELL *) zmalloc(sizeof(CELL)) ; 70 | p->cp->type = C_NOINIT ; 71 | } 72 | return p ; 73 | } 74 | 75 | 76 | /* on the D_ANODE list, when we find a node we move it 77 | to the front of the hash chain */ 78 | 79 | static D_ANODE *find_by_dval(A, d, cflag) 80 | ARRAY A ; 81 | double d ; 82 | int cflag ; 83 | { 84 | unsigned h = DHASH(d) ; 85 | register D_ANODE *p = A[h].dlink ; 86 | D_ANODE *q = 0 ; /* trails p for move to front */ 87 | ANODE *ap ; 88 | 89 | while ( p ) 90 | if ( p->dval == d ) 91 | { /* found */ 92 | if ( ! p->ap->sval ) /* but it was deleted by string */ 93 | { if ( q ) q->dlink = p->dlink ; 94 | else A[h].dlink = p->dlink ; 95 | zfree(p, sizeof(D_ANODE)) ; 96 | break ; 97 | } 98 | /* found */ 99 | if ( !q ) return p ; /* already at front */ 100 | else /* delete to put at front */ 101 | { q->dlink = p->dlink ; goto found ; } 102 | } 103 | else 104 | { q = p ; p = p->dlink ; } 105 | 106 | void (*signal())() ; 107 | 108 | -------------------------------------------------------------------------------- /test/okay/mawktest.err: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aksr/mawk2/949abae95ca3a814efc9625abba0d743af508b54/test/okay/mawktest.err -------------------------------------------------------------------------------- /test/okay/mawktest.out: -------------------------------------------------------------------------------- 1 | mawk 1.9.9.6, 21 Aug 2016, Copyright Michael D. Brennan 2 | 3 | 4 | testing input and field splitting 5 | input and field splitting OK 6 | 7 | testing regular expression matching 8 | regular expression matching OK 9 | 10 | testing arrays and flow of control 11 | array test OK 12 | 13 | testing input/output with pipes 14 | pipe IO OK 15 | 16 | testing function calls and general stress test 17 | general stress test passed 18 | 19 | tested mawk seems OK 20 | -------------------------------------------------------------------------------- /test/pipetest: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | AWK=${AWK:-../mawk} 4 | 5 | # pipes and out files and new printf and system 6 | 7 | echo '3 8 | 2 9 | 1' | $AWK 'BEGIN { 10 | input = "cat; exit 7" 11 | output = "sort" 12 | 13 | while((input | getline > 0)) { 14 | print | output 15 | } 16 | print close(input), close(output) 17 | }' 18 | 19 | $AWK 'BEGIN { 20 | file = "foo" rand() 21 | printf "%x %x %d\n", -1, 2^32, -2^32 > file 22 | print close(file) 23 | command = sprintf("cat %s; rm %s; exit 4", file, file) 24 | print system(command) 25 | }' 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /test/pipetest.out: -------------------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 7 0 5 | 0 6 | ffffffffffffffff 100000000 -4294967296 7 | 4 8 | -------------------------------------------------------------------------------- /test/reg-awk.out: -------------------------------------------------------------------------------- 1 | 3 2 | 4 3 | 1 4 | -------------------------------------------------------------------------------- /test/reg0.awk: -------------------------------------------------------------------------------- 1 | 2 | /return/ {cnt++} 3 | END{print cnt} 4 | -------------------------------------------------------------------------------- /test/reg1.awk: -------------------------------------------------------------------------------- 1 | 2 | /return|switch/ {cnt++} 3 | END{print cnt} 4 | -------------------------------------------------------------------------------- /test/reg2.awk: -------------------------------------------------------------------------------- 1 | 2 | /[A-Za-z_][A-Za-z0-9_]*\[.*\][ \t]*=/ {cnt++} 3 | END{print cnt} 4 | -------------------------------------------------------------------------------- /test/wc-awk.out: -------------------------------------------------------------------------------- 1 | 107 479 2 | -------------------------------------------------------------------------------- /test/wc.awk: -------------------------------------------------------------------------------- 1 | 2 | {sum += NF} 3 | END{ print NR, sum} 4 | -------------------------------------------------------------------------------- /test/wfrq-awk.out: -------------------------------------------------------------------------------- 1 | 29 p 2 | 21 A 3 | 14 ANODE 4 | 13 q 5 | 12 d 6 | 12 sval 7 | 10 if 8 | 10 the 9 | 8 dlink 10 | 8 h 11 | 8 is 12 | 7 to 13 | 6 D 14 | 6 of 15 | 5 cflag 16 | 5 deleted 17 | 5 else 18 | 5 front 19 | 5 hash 20 | 5 link 21 | -------------------------------------------------------------------------------- /test/wfrq0.awk: -------------------------------------------------------------------------------- 1 | 2 | # this program finds the twenty most freq 3 | # words in document using a heap sort at the end 4 | # 5 | # 6 | 7 | function down_heap(i, k,hold) 8 | { 9 | while ( 1 ) 10 | { 11 | if ( compare(heap[2*i], heap[2*i+1]) <= 0 ) k = 2*i 12 | else k = 2*i + 1 13 | 14 | if ( compare(heap[i],heap[k]) <= 0 ) return 15 | 16 | hold = heap[k] ; heap[k] = heap[i] ; heap[i] = hold 17 | i = k 18 | } 19 | } 20 | 21 | # compares two values of form "number word" 22 | # by number and breaks ties by word (reversed) 23 | 24 | function compare(s1, s2, t, X) 25 | { 26 | t = (s1+0) - (s2+0) # forces types to number 27 | 28 | if ( t == 0 ) 29 | { 30 | split(s1, X); s1 = X[2] 31 | split(s2, X); s2 = X[2] 32 | if ( s2 < s1 ) return -1 33 | return s1 < s2 34 | } 35 | 36 | return t 37 | } 38 | 39 | 40 | BEGIN { RS = "[^a-zA-Z]+" ; BIG = "999999:" } 41 | 42 | { cnt[$0]++ } 43 | 44 | END { delete cnt[ "" ] 45 | 46 | # load twenty values 47 | j = 1 48 | for( i in cnt ) 49 | { 50 | heap[j] = num_word( cnt[i] , i ) 51 | delete cnt[i] ; 52 | if ( ++j == 21 ) break ; 53 | } 54 | 55 | # make some sentinals 56 | for( i = j ; i < 43 ; i++ ) heap[i] = BIG 57 | 58 | h_empty = j # save the first empty slot 59 | # make a heap with the smallest in slot 1 60 | for( i = h_empty - 1 ; i > 0 ; i-- ) down_heap(i) 61 | 62 | # examine the rest of the values 63 | for ( i in cnt ) 64 | { 65 | j = num_word(cnt[i], i) 66 | if ( compare(j, heap[1]) > 0 ) 67 | { # its bigger 68 | # take the smallest out of the heap and readjust 69 | heap[1] = j 70 | down_heap(1) 71 | } 72 | } 73 | 74 | h_empty-- ; 75 | 76 | # what's left are the twenty largest 77 | # smallest at the top 78 | # 79 | 80 | i = 20 81 | while ( h_empty > 1 ) 82 | { 83 | buffer[i--] = heap[1] 84 | heap[1] = heap[h_empty] 85 | heap[h_empty] = BIG 86 | down_heap(1) 87 | h_empty-- 88 | } 89 | buffer[i--] = heap[1] 90 | 91 | for(j = 1 ; j <= 20 ; j++ ) print buffer[j] 92 | } 93 | 94 | 95 | function num_word(num, word) 96 | { 97 | return sprintf("%3d %s", num, word) 98 | } 99 | -------------------------------------------------------------------------------- /types.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | types.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | 18 | /* types.h */ 19 | 20 | #ifndef MAWK_TYPES_H 21 | #define MAWK_TYPES_H 22 | 23 | #include "sizes.h" 24 | 25 | 26 | /* CELL types */ 27 | enum { 28 | C_NOINIT , 29 | C_DOUBLE , 30 | C_STRING , 31 | C_STRNUM , 32 | C_MBSTRN , /*could be STRNUM, has not been checked */ 33 | C_RE , 34 | C_SPACE , /* split on space */ 35 | C_SNULL , /* split on the empty string */ 36 | C_REPL , /* a replacement string '\&' changed to & */ 37 | C_REPLV , /* a vector replacement -- broken on & */ 38 | NUM_CELL_TYPES 39 | } ; 40 | 41 | /* these defines are used to check types for two 42 | CELLs which are adjacent in memory */ 43 | 44 | #define TWO_NOINITS (2*(1<len==(s2)->len && \ 71 | memcmp((s1)->str,(s2)->str,(s1)->len)==0) 72 | 73 | /* make copy of STRING */ 74 | #define STRING_dup(s) (((STRING*)(s))->ref_cnt++, (STRING*)(s)) 75 | 76 | /* like strcmp() */ 77 | int STRING_cmp(STRING*,STRING*) ; 78 | 79 | 80 | typedef struct cell { 81 | int type ; 82 | PTR ptr ; 83 | double dval ; 84 | } CELL ; 85 | 86 | 87 | /* all builtins are passed the evaluation stack pointer and 88 | return its new value, here is the type */ 89 | 90 | typedef CELL *(*PF_CP)(CELL*) ; 91 | 92 | /* an element of code (instruction) */ 93 | typedef union { 94 | int op ; 95 | PTR ptr ; 96 | } INST ; 97 | 98 | #endif /* MAWK_TYPES_H */ 99 | -------------------------------------------------------------------------------- /version.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | version.c 4 | copyright 1991-95,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | #include "mawk.h" 18 | 19 | /* mawk 1.9 */ 20 | static const char* PATCH_STRING = ".9.6" ; 21 | static const char* DATE_STRING = "21 Aug 2016" ; 22 | 23 | static 24 | const char* const version_string = 25 | "mawk 1.9%s, %s, Copyright Michael D. Brennan\n\n" ; 26 | 27 | static const char*const fmt = "%-14s%10lu\n"; 28 | /* print VERSION and exit */ 29 | void 30 | print_version(void) 31 | { 32 | 33 | printf(version_string, PATCH_STRING, DATE_STRING) ; 34 | exit(0) ; 35 | } 36 | 37 | 38 | -------------------------------------------------------------------------------- /zmalloc.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | zmalloc.c 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | #include "mawk.h" 17 | #include "zmalloc.h" 18 | 19 | static void 20 | out_of_mem(void) 21 | { 22 | const char* out = "out of memory" ; 23 | 24 | if (mawk_state == EXECUTION) rt_error(out) ; 25 | else 26 | { 27 | /* I don't think this will ever happen */ 28 | compile_error(out) ; mawk_exit(2) ; 29 | } 30 | } 31 | 32 | void* emalloc(size_t sz) 33 | { 34 | void* ret = malloc(sz) ; 35 | if (ret == 0) out_of_mem() ; 36 | return ret ; 37 | } 38 | 39 | void* erealloc(void* p, size_t sz) 40 | { 41 | void* ret = realloc(p, sz) ; 42 | if (ret == 0) out_of_mem() ; 43 | return ret ; 44 | } 45 | 46 | /* if we are valgrinding or purifying */ 47 | 48 | #ifdef MEM_CHECK 49 | 50 | void* zmalloc(size_t sz) 51 | { 52 | return emalloc(sz) ; 53 | } 54 | 55 | void* zrealloc(void* p, size_t old, size_t new) 56 | { 57 | return erealloc(p,new) ; 58 | } 59 | 60 | void zfree(void* p, size_t sz) 61 | { 62 | free(p) ; 63 | } 64 | 65 | #else /* usual case */ 66 | /* 67 | zmalloc() gets mem from emalloc() in chunks of ZSIZE * AVAIL_SZ 68 | and cuts these blocks into smaller pieces that are multiples ZSIZE. 69 | When a piece is returned via zfree(), it goes 70 | on a linked linear list indexed by its size. The lists are 71 | an array, pool[]. 72 | */ 73 | 74 | /* block sizes are set by this #define */ 75 | 76 | #define ZSZ (4*sizeof(long)) 77 | 78 | typedef union zblock { 79 | union zblock* link ; 80 | double align ; 81 | char filler[ZSZ] ; 82 | } ZBlock ; 83 | 84 | #define ZSIZE sizeof(ZBlock) 85 | 86 | #define bytes_to_blocks(b) (((b)+ZSIZE-1)/ZSIZE) 87 | 88 | /* memory from emalloc goes here to be partitioned into 89 | smaller pieces that end up in pool[] 90 | */ 91 | 92 | static ZBlock* avail ; 93 | static size_t amt_avail ; 94 | 95 | #define AVAIL_SZ 1024 /* number of ZBlocks to get from emalloc */ 96 | 97 | static void 98 | fill_avail(void) 99 | { 100 | avail = (ZBlock *)emalloc(ZSIZE * AVAIL_SZ) ; 101 | amt_avail = AVAIL_SZ ; 102 | } 103 | 104 | #define POOL_SZ 16 105 | static ZBlock* pool[POOL_SZ] ; 106 | /* size of biggest block in pool[] */ 107 | #define zmalloc_limit (16*ZSIZE) 108 | 109 | void* zmalloc(size_t sz) 110 | { 111 | if (sz > zmalloc_limit) { 112 | return emalloc(sz) ; 113 | } 114 | { 115 | size_t blks = bytes_to_blocks(sz) ; 116 | ZBlock* p = pool[blks-1] ; 117 | if (p) { 118 | /* get mem from pool */ 119 | pool[blks-1] = p->link ; 120 | return p ; 121 | } 122 | 123 | if (blks > amt_avail) { 124 | if (amt_avail > 0) { 125 | avail->link = pool[amt_avail-1] ; 126 | pool[amt_avail-1] = avail ; 127 | } 128 | fill_avail() ; 129 | } 130 | /* cut a piece off the avail block */ 131 | p = avail ; 132 | avail += blks ; 133 | amt_avail -= blks ; 134 | return p ; 135 | } 136 | } 137 | 138 | void zfree(void* p, size_t sz) 139 | { 140 | if (sz > zmalloc_limit) { 141 | free(p) ; 142 | } 143 | else { 144 | /* put p in pool[] */ 145 | size_t blks = bytes_to_blocks(sz) ; 146 | ZBlock* zp = (ZBlock*) p ; 147 | zp->link = pool[blks-1] ; 148 | pool[blks-1] = zp ; 149 | } 150 | } 151 | 152 | void* zrealloc(void* p, size_t old_size, size_t new_size) 153 | { 154 | if (new_size > zmalloc_limit && old_size > zmalloc_limit) { 155 | return erealloc(p,new_size) ; 156 | } 157 | else { 158 | void* ret = zmalloc(new_size) ; 159 | memcpy(ret, p, old_size < new_size ? old_size : new_size) ; 160 | zfree(p, old_size) ; 161 | return ret ; 162 | } 163 | } 164 | 165 | #endif 166 | -------------------------------------------------------------------------------- /zmalloc.h: -------------------------------------------------------------------------------- 1 | 2 | /******************************************** 3 | zmalloc.h 4 | copyright 1991,2014-2016 Michael D. Brennan 5 | 6 | This is a source file for mawk, an implementation of 7 | the AWK programming language. 8 | 9 | Mawk is distributed without warranty under the terms of 10 | the GNU General Public License, version 3, 2007. 11 | 12 | If you import elements of this code into another product, 13 | you agree to not name that product mawk. 14 | ********************************************/ 15 | 16 | 17 | #ifndef ZMALLOC_H 18 | #define ZMALLOC_H 19 | 20 | #include 21 | 22 | void* emalloc(size_t) ; 23 | void* erealloc(void*,size_t) ; 24 | 25 | void* zmalloc(size_t) ; 26 | void* zrealloc(void*,size_t,size_t) ; 27 | void zfree(void*,size_t) ; 28 | 29 | #define ZMALLOC(type) ((type*)zmalloc(sizeof(type))) 30 | #define ZFREE(p) zfree(p,sizeof(*(p))) 31 | 32 | 33 | #endif /* ZMALLOC_H */ 34 | --------------------------------------------------------------------------------