├── README.txt ├── clawk.asd ├── clawk.lisp ├── clawk.system ├── clawk.translations ├── clawktest.lisp ├── emp.data ├── license.txt └── packages.lisp /README.txt: -------------------------------------------------------------------------------- 1 | CLAWK is an AWK implementation embedded into Common Lisp, by Michael 2 | Parker. 3 | 4 | Michael Weber patched it to make it compile again; his version is here: 5 | 6 | http://www.foldr.org/~michaelw/lisp/clawk/ 7 | 8 | This version was created by Zach Beane to fix an SBCL compile-time 9 | warning about a bogus format string. 10 | 11 | https://github.com/sharplispers/clawk 12 | 13 | 2011-10-31 14 | -------------------------------------------------------------------------------- /clawk.asd: -------------------------------------------------------------------------------- 1 | (asdf:defsystem clawk 2 | :version "4" 3 | :description "Common Lisp AWK" 4 | :maintainer "Zach Beane " 5 | :author "Michael Parker " 6 | :licence "BSD-style" 7 | :depends-on (regex) 8 | :components ((:file "packages") 9 | (:file "clawk" :depends-on ("packages")) 10 | (:file "clawktest" :depends-on ("packages" "clawk")) 11 | (:static-file "clawk.system" :pathname "clawk.system") 12 | (:static-file "clawk.translations" :pathname "clawk.translations") 13 | (:static-file "emp.data" :pathname "emp.data") 14 | (:static-file "license.txt" :pathname "license.txt")) 15 | ;; :long-description "" 16 | ) 17 | -------------------------------------------------------------------------------- /clawk.lisp: -------------------------------------------------------------------------------- 1 | ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CLAWK; Base: 10 -*- 2 | 3 | (in-package :CLAWK) 4 | 5 | 6 | ;;; 7 | ;;; change the #/.../ readmacro to generate a regex-matcher object, 8 | ;;; and use the make-load-form function to define the loader and initializer 9 | ;;; 10 | 11 | ;;; 12 | ;;; reader macro for #/.../ regex symbols 13 | ;;; 14 | 15 | (defun |#/-reader| (strm subchar arg) 16 | (declare (ignore subchar arg)) 17 | (let ((symname "")) 18 | (loop 19 | (let ((c (read-char strm))) 20 | (cond ((char= c #\/) 21 | (return-from |#/-reader| 22 | (list 'quote (intern symname *package*)))) 23 | ((char= c #\\) 24 | (let ((c2 (read-char strm))) 25 | (cond ((char= c2 #\/) 26 | (setq symname (concatenate 'string symname (string #\/)))) 27 | ((or (char= c2 #\n) (char= c2 #\N) 28 | (char= c2 #\r) (char= c2 #\R) 29 | (char= c2 #\t) (char= c2 #\T)) 30 | (setq symname (concatenate 'string symname (string c) (string c2)))) 31 | ((digit-char-p c2) 32 | (setq symname (concatenate 'string symname (string c) (string c2)))) 33 | (t (setq symname (concatenate 'string symname (string c2))))))) 34 | ((or (char= c #\Return) (char= c #\Newline)) 35 | (error "Unterminated regular expression: #/~S" symname)) 36 | (t (setq symname (concatenate 'string symname (string c))))))))) 37 | 38 | (defun install-regex-syntax () 39 | (set-dispatch-macro-character #\# #\/ #'|#/-reader|) 40 | t) 41 | 42 | #-:Symbolics 43 | (progn 44 | 45 | (defun |#`-reader| (strm subchar arg) 46 | (declare (ignore subchar arg)) 47 | (let ((cmdname-strm (make-string-output-stream))) 48 | (loop 49 | (let ((c (read-char strm))) 50 | (cond ((char= c #\`) 51 | (return-from |#`-reader| `(call-system-catching-output ,(get-output-stream-string cmdname-strm)))) 52 | ((char= c #\\) 53 | (let ((c2 (read-char strm))) 54 | (cond ((char= c2 #\`) 55 | (princ c2 cmdname-strm)) 56 | (t (princ c cmdname-strm) 57 | (princ c2 cmdname-strm))))) 58 | ((or (char= c #\Return) (char= c #\Newline)) 59 | (error "Unterminated shell command: #`~S" (get-output-stream-string cmdname-strm))) 60 | (t (princ c cmdname-strm))))))) 61 | 62 | (defun install-cmd-syntax () 63 | (set-dispatch-macro-character #\# #\` #'|#`-reader|) 64 | t) 65 | 66 | ) 67 | 68 | ;(defun test () 69 | ; (for-stream-lines (#`command.com /c dir`) 70 | ; ($print *FNR* " " $0))) 71 | 72 | 73 | 74 | #+:Lispworks 75 | (progn 76 | 77 | (defun call-system-catching-output (cmd) 78 | (let ((cmd-output (make-string-output-stream))) 79 | (sys:call-system-showing-output cmd 80 | :prefix "" 81 | :output-stream cmd-output 82 | :wait t 83 | :show-cmd nil) 84 | (make-string-input-stream (get-output-stream-string cmd-output)))) 85 | 86 | (defun get-shell-pgm () 87 | (or (lispworks:environment-variable "COMSPEC") 88 | (lispworks:environment-variable "SHELL") 89 | (lispworks:environment-variable "shell"))) 90 | 91 | ) 92 | 93 | 94 | 95 | (defgeneric get-matcher-for-pattern (pattern)) 96 | (defmethod get-matcher-for-pattern ((pattern symbol)) 97 | (let ((matcher (get pattern 'regex-matcher))) 98 | (if matcher 99 | matcher 100 | (let ((matcher (compile-str (symbol-name pattern)))) 101 | (if matcher 102 | (setf (get pattern 'regex-matcher) matcher)) 103 | matcher)))) 104 | (defmethod get-matcher-for-pattern ((pattern string)) 105 | (compile-str pattern)) 106 | (defmethod get-matcher-for-pattern ((pattern matcher)) 107 | pattern) 108 | 109 | 110 | 111 | 112 | (defvar *CURFILE*) 113 | (defvar *CURLINE* "") 114 | (defvar *FS-IS-WS* t) 115 | (regex::define-constant +WS-FIELDSEP-PAT+ "[ \\t]+") 116 | (defvar *FS* +WS-FIELDSEP-PAT+) 117 | (defvar *RSTART* 0) 118 | (defvar *RLENGTH* 0) 119 | (defvar *REND* 0) 120 | (defvar *REGS* (make-array 0)) 121 | (defvar *FIELDS* (make-array 0)) 122 | (defvar *NR* 0) ; total num recs read 123 | (defvar *FNR* 0) ; num recs read in current file 124 | (defvar *NF* 0) ; number of fields in current record 125 | (defvar *SUBSEP* (string (code-char #o34))) 126 | (defvar *OFS* " ") 127 | (defvar *ORS* "\n") 128 | (defvar *LAST-SUCCESSFUL-MATCH*) 129 | 130 | (define-symbol-macro $0 *curline*) 131 | (define-symbol-macro $1 ($n 1)) 132 | (define-symbol-macro $2 ($n 2)) 133 | (define-symbol-macro $3 ($n 3)) 134 | (define-symbol-macro $4 ($n 4)) 135 | (define-symbol-macro $5 ($n 5)) 136 | (define-symbol-macro $6 ($n 6)) 137 | (define-symbol-macro $7 ($n 7)) 138 | (define-symbol-macro $8 ($n 8)) 139 | (define-symbol-macro $9 ($n 9)) 140 | (define-symbol-macro $10 ($n 10)) 141 | (define-symbol-macro $11 ($n 11)) 142 | (define-symbol-macro $12 ($n 12)) 143 | (define-symbol-macro $13 ($n 13)) 144 | (define-symbol-macro $14 ($n 14)) 145 | (define-symbol-macro $15 ($n 15)) 146 | (define-symbol-macro $16 ($n 16)) 147 | (define-symbol-macro $17 ($n 17)) 148 | (define-symbol-macro $18 ($n 18)) 149 | (define-symbol-macro $19 ($n 19)) 150 | (define-symbol-macro $20 ($n 20)) 151 | (define-symbol-macro $NF ($n *NF*)) 152 | 153 | (define-symbol-macro $#0 ($#n 0)) 154 | (define-symbol-macro $#1 ($#n 1)) 155 | (define-symbol-macro $#2 ($#n 2)) 156 | (define-symbol-macro $#3 ($#n 3)) 157 | (define-symbol-macro $#4 ($#n 4)) 158 | (define-symbol-macro $#5 ($#n 5)) 159 | (define-symbol-macro $#6 ($#n 6)) 160 | (define-symbol-macro $#7 ($#n 7)) 161 | (define-symbol-macro $#8 ($#n 8)) 162 | (define-symbol-macro $#9 ($#n 9)) 163 | (define-symbol-macro $#10 ($#n 10)) 164 | (define-symbol-macro $#11 ($#n 11)) 165 | (define-symbol-macro $#12 ($#n 12)) 166 | (define-symbol-macro $#13 ($#n 13)) 167 | (define-symbol-macro $#14 ($#n 14)) 168 | (define-symbol-macro $#15 ($#n 15)) 169 | (define-symbol-macro $#16 ($#n 16)) 170 | (define-symbol-macro $#17 ($#n 17)) 171 | (define-symbol-macro $#18 ($#n 18)) 172 | (define-symbol-macro $#19 ($#n 19)) 173 | (define-symbol-macro $#20 ($#n 20)) 174 | (define-symbol-macro $#NF ($#n *NF*)) 175 | 176 | 177 | 178 | (define-symbol-macro %0 (%n 0)) 179 | (define-symbol-macro %1 (%n 1)) 180 | (define-symbol-macro %2 (%n 2)) 181 | (define-symbol-macro %3 (%n 3)) 182 | (define-symbol-macro %4 (%n 4)) 183 | (define-symbol-macro %5 (%n 5)) 184 | (define-symbol-macro %6 (%n 6)) 185 | (define-symbol-macro %7 (%n 7)) 186 | (define-symbol-macro %8 (%n 8)) 187 | (define-symbol-macro %9 (%n 9)) 188 | (define-symbol-macro %10 (%n 10)) 189 | (define-symbol-macro %11 (%n 11)) 190 | (define-symbol-macro %12 (%n 12)) 191 | (define-symbol-macro %13 (%n 13)) 192 | (define-symbol-macro %14 (%n 14)) 193 | (define-symbol-macro %15 (%n 15)) 194 | (define-symbol-macro %16 (%n 16)) 195 | (define-symbol-macro %17 (%n 17)) 196 | (define-symbol-macro %18 (%n 18)) 197 | (define-symbol-macro %19 (%n 19)) 198 | (define-symbol-macro %20 (%n 20)) 199 | 200 | 201 | (define-symbol-macro %#0 (%#n 0)) 202 | (define-symbol-macro %#1 (%#n 1)) 203 | (define-symbol-macro %#2 (%#n 2)) 204 | (define-symbol-macro %#3 (%#n 3)) 205 | (define-symbol-macro %#4 (%#n 4)) 206 | (define-symbol-macro %#5 (%#n 5)) 207 | (define-symbol-macro %#6 (%#n 6)) 208 | (define-symbol-macro %#7 (%#n 7)) 209 | (define-symbol-macro %#8 (%#n 8)) 210 | (define-symbol-macro %#9 (%#n 9)) 211 | (define-symbol-macro %#10 (%#n 10)) 212 | (define-symbol-macro %#11 (%#n 11)) 213 | (define-symbol-macro %#12 (%#n 12)) 214 | (define-symbol-macro %#13 (%#n 13)) 215 | (define-symbol-macro %#14 (%#n 14)) 216 | (define-symbol-macro %#15 (%#n 15)) 217 | (define-symbol-macro %#16 (%#n 16)) 218 | (define-symbol-macro %#17 (%#n 17)) 219 | (define-symbol-macro %#18 (%#n 18)) 220 | (define-symbol-macro %#19 (%#n 19)) 221 | (define-symbol-macro %#20 (%#n 20)) 222 | 223 | (defun FS () 224 | (when (stringp *FS*) 225 | (if (string= *FS* +WS-FIELDSEP-PAT+) 226 | (setq *FS-IS-WS* t) 227 | (setq *FS-IS-WS* nil)) 228 | (setq *FS* (get-matcher-for-pattern *FS*))) 229 | *FS*) 230 | 231 | 232 | 233 | 234 | ;;; 235 | ;;; STR 236 | ;;; 237 | (defgeneric str (x)) 238 | (defmethod str ((x string)) 239 | x) 240 | (defmethod str ((x (eql nil))) 241 | "") 242 | (defmethod str ((x number)) 243 | (format nil "~D" x)) 244 | 245 | 246 | 247 | ;;; 248 | ;;; NUM 249 | ;;; 250 | (defgeneric num (x)) 251 | (defmethod num ((x number)) 252 | x) 253 | (defmethod num ((x (eql nil))) 254 | 0) 255 | (defmethod num ((x string)) 256 | (let ((val (read (make-string-input-stream x)))) 257 | (if (numberp val) 258 | val 259 | 0))) 260 | 261 | ;;; 262 | ;;; INT 263 | ;;; 264 | (defgeneric int (x)) 265 | (defmethod int ((x integer)) 266 | x) 267 | (defmethod int ((x number)) 268 | (round x)) 269 | (defmethod int ((x (eql nil))) 270 | 0) 271 | (defmethod int ((x string)) 272 | (let ((val (read (make-string-input-stream x) nil 0))) 273 | (if (numberp val) 274 | (round val) 275 | 0))) 276 | 277 | ;;; 278 | ;;; SUB 279 | ;;; 280 | ;;; Substitute the first occurrence of pattern. 281 | ;;; 282 | (defun sub (pattern replacement &optional (source *CURLINE*)) 283 | "Replace the first occurrence of pattern in the source string." 284 | (let ((matcher (get-matcher-for-pattern pattern)) 285 | (srclen (length source))) 286 | (multiple-value-bind (matchedp matchstart matchlen) 287 | (regex:scan-str matcher source :start 0 :length srclen) 288 | (if matchedp 289 | (concatenate 'string (subseq source 0 matchstart) 290 | replacement 291 | (subseq source (+ matchstart matchlen) srclen)) 292 | source)))) 293 | 294 | (defun $sub (pattern replacement &optional (source *CURLINE*)) 295 | "Replace the first occurrence of pattern in the source string. 296 | Coerces its arguments to the appropriate type." 297 | (sub pattern (str replacement) (str source))) 298 | 299 | 300 | ;;; 301 | ;;; GSUB 302 | ;;; 303 | ;;; Globally substitute all occurrences of pattern. 304 | ;;; 305 | (defun gsub (pattern replacement &optional (source *CURLINE*)) 306 | "Replace all occurrences of pattern in the source string." 307 | (let* ((matcher (get-matcher-for-pattern pattern)) 308 | (total-len (length source)) 309 | (len-remaining total-len) 310 | (new-string "") 311 | (matchedp t) 312 | (prior-end 0) 313 | (match-start 0) 314 | (match-len 0)) 315 | (loop 316 | (multiple-value-setq (matchedp match-start match-len) 317 | (regex:scan-str matcher source :start match-start :length len-remaining)) 318 | (if matchedp 319 | (setf new-string (concatenate 'string new-string 320 | (subseq source prior-end match-start) 321 | replacement) 322 | prior-end (+ match-start match-len) 323 | match-start prior-end 324 | len-remaining (- total-len match-start)) 325 | (return-from gsub 326 | (concatenate 'string 327 | new-string 328 | (subseq source prior-end total-len))))))) 329 | 330 | (defun $gsub (pattern replacement &optional (source *CURLINE*)) 331 | "Replaces all occurrences of pattern in the source string. 332 | Coerces its arguments to the appropriate type." 333 | (gsub pattern (str replacement) (str source))) 334 | 335 | 336 | ;;; 337 | ;;; SPLIT 338 | ;;; 339 | ;;; Splits a string up based on an optional field separator pattern 340 | ;;; (uses *FS* as a default). If no string is supplied, it will split 341 | ;;; *CURLINE* and set *FIELDS* and the various $n variables. 342 | ;;; 343 | (defun split (&optional (source nil not-splitting-curline) 344 | (fieldsep-pattern (FS)) 345 | include-empty-leading-and-trailing-fields) 346 | "Split a string up into a list of multiple fields based on 347 | the field-separator pattern." 348 | (let ((push-empty-leading-and-trailing-segments 349 | (or include-empty-leading-and-trailing-fields 350 | (not *fs-is-ws*) 351 | (not (and (stringp fieldsep-pattern) (string= fieldsep-pattern +WS-FIELDSEP-PAT+)))))) 352 | (when (or (null source) (eq source *CURLINE*)) 353 | (setf source *CURLINE* 354 | not-splitting-curline nil)) 355 | (let* ((fieldsep-matcher (get-matcher-for-pattern fieldsep-pattern)) 356 | (fields nil) 357 | (total-len (length source)) 358 | (len-remaining (length source)) 359 | (matchedp t) 360 | (prior-end 0) 361 | (match-start 0) 362 | (match-len 0)) 363 | (loop 364 | (multiple-value-setq (matchedp match-start match-len) 365 | (regex:scan-str fieldsep-matcher source :start match-start :length len-remaining)) 366 | (cond (matchedp 367 | ;; don't push an empty string if the first thing we 368 | ;; find is a separator 369 | (when (or push-empty-leading-and-trailing-segments (> match-start 0)) 370 | (push (subseq source prior-end match-start) 371 | fields)) 372 | ;; now step over this match and continue splitting 373 | (setf prior-end (+ match-start match-len) 374 | match-start prior-end 375 | len-remaining (- total-len match-start)) ) 376 | (t 377 | ;; only push an string if we're not at the end. 378 | (when (or push-empty-leading-and-trailing-segments (< prior-end total-len)) 379 | (push (subseq source prior-end match-len) fields)) 380 | (setf fields (nreverse fields)) 381 | (unless not-splitting-curline 382 | (setf *FIELDS* (make-array (length fields) :initial-contents fields))) 383 | (return-from split fields)))))) ) 384 | 385 | 386 | (defun $split (&optional (source nil source-supplied-p) (fieldsep-pattern (FS))) 387 | "Split a string up into a list of multiple fields based on 388 | the field-separator pattern. Coerces its arguments to the appropriate type." 389 | (split (if (and source source-supplied-p) (str source) source) 390 | fieldsep-pattern)) 391 | 392 | 393 | ;;; 394 | ;;; MATCH 395 | ;;; 396 | ;;; Searches for the first occurrence of a pattern in a string. 397 | ;;; Takes an optional offset. If successful, sets *RSTART*, 398 | ;;; *RLENGTH*, *REND*, and *REGS*. 399 | ;;; 400 | (defun match (source pattern &optional (start 0)) 401 | "Scan for first occurrence of a pattern within the source string." 402 | (let ((matcher (get-matcher-for-pattern pattern))) 403 | (multiple-value-bind (matchedp start len regs) 404 | (regex:scan-str matcher source :start start :length (- (length source) start) 405 | :start-is-anchor t :end-is-anchor t) 406 | (when matchedp 407 | (setf *RSTART* start 408 | *RLENGTH* len 409 | *REND* (+ start len) 410 | *REGS* regs 411 | *LAST-SUCCESSFUL-MATCH* source)) 412 | matchedp))) 413 | 414 | (defun $match (source pattern &optional (start 0)) 415 | "Scan for first occurrence of a pattern within the source string. 416 | Coerces its arguments to the appropriate type." 417 | (match (str source) pattern (int start))) 418 | 419 | 420 | ;;; 421 | ;;; INDEX 422 | ;;; 423 | ;;; Searches for the first occurrence of a substring within a string. 424 | ;;; Takes an optional offset. 425 | ;;; 426 | (declaim (inline index)) 427 | (defun index (pat str &optional (start 0)) 428 | (search pat str :start2 start)) 429 | 430 | (defun $index (pat str &optional (start 0)) 431 | (index (str pat) (str str) (int start))) 432 | 433 | 434 | ;;; 435 | ;;; SUBSTR 436 | ;;; 437 | ;;; Extract a substring. Like subseq, but takes length instead of end. 438 | ;;; 439 | (declaim (inline substr)) 440 | (defun substr (str start len) 441 | (subseq str start (+ start len))) 442 | 443 | (defun $substr (str start len) 444 | (substr (str str) (int start) (int len))) 445 | 446 | 447 | ;;; 448 | ;;; ~ !~ /~ 449 | ;;; 450 | ;;; test if string contains the pattern 451 | ;;; 452 | (defgeneric ~ (str pat) 453 | (:documentation "Test if pattern matches the string.")) 454 | 455 | (defmethod ~ ((str string) (matcher matcher)) 456 | (multiple-value-bind (matchedp) 457 | (regex:scan-str matcher str 458 | :start 0 :length (length str) 459 | :start-is-anchor t :end-is-anchor t) 460 | matchedp)) 461 | 462 | (defmethod ~ ((str string) pat) 463 | (~ str (get-matcher-for-pattern pat))) 464 | 465 | (defmethod ~ ((matcher matcher) (str string)) 466 | (~ str matcher)) 467 | 468 | (defmethod ~ ((pat symbol) (str string)) 469 | (~ str (get-matcher-for-pattern pat))) 470 | 471 | 472 | (declaim (inline /~)) 473 | (defun /~ (str pat) 474 | "Test if pattern isn't present in the string." 475 | (not (~ str pat))) 476 | 477 | (declaim (inline !~)) 478 | (defun !~ (str pat) 479 | "Test if pattern isn't present in the string." 480 | (/~ pat str)) 481 | 482 | 483 | 484 | ;;; 485 | ;;; WITH-PATTERNS 486 | ;;; 487 | (defmacro with-patterns (pats &rest body) 488 | "Execute the body in an environment that includes the compiled patterns 489 | bound to their respective variables." 490 | #+:Symbolics (declare (zwei:indentation 1 1)) 491 | (expand-with-patterns pats `(progn ,@body))) 492 | #+:Lispworks (editor:setup-indent "with-patterns" 1 2 6) 493 | 494 | (defun expand-with-pattern (var strpat body) 495 | `(let ((,var (get-matcher-for-pattern ,strpat))) 496 | (when ,var 497 | (locally 498 | (declare (type matcher ,var)) 499 | ,body))) ) 500 | 501 | (defun expand-with-patterns (pats body) 502 | (if pats 503 | (expand-with-pattern (caar pats) (cadar pats) 504 | (expand-with-patterns (cdr pats) body)) 505 | body)) 506 | 507 | 508 | 509 | ;;; 510 | ;;; WITH-FIELDS 511 | ;;; 512 | ;;; Allows stuff like 513 | ;;; (with-fields ((a b c d &rest rest) str) 514 | ;;; ($print "Fields:" a b c d "Rest: " rest)) 515 | ;;; 516 | ;;; as well as the slightly more awk-ish 517 | ;;; (with-fields (nil str) 518 | ;;; ($print "Fields:" $1 $2 $3 $4)) 519 | ;;; which splits STR into the $ vars, 520 | ;;; 521 | ;;; or even 522 | ;;; (with-fields () 523 | ;;; ($print "Fields:" $1 $2 $3 $4)) 524 | ;;; which will split the current line into the $ vars. 525 | ;;; 526 | (defmacro with-fields ((&optional fields sourcestr (fieldsep-pattern '(FS))) &rest body) 527 | "Split the source string into fields based on the field separator, 528 | bind the field array to the fields variable." 529 | #+:Symbolics (declare (zwei:indentation 1 1)) 530 | (expand-with-fields fields sourcestr fieldsep-pattern body)) 531 | #+:Lispworks (editor:setup-indent "with-fields" 1 2 6) 532 | 533 | (defun expand-with-fields (fields sourcestr fieldsep-pattern body) 534 | (let ((tmp-splits (gensym))) 535 | (if (null fields) 536 | `(let (*FIELDS*) 537 | (let* ((,tmp-splits (split ,sourcestr ,fieldsep-pattern)) 538 | (*NF* (length ,tmp-splits))) 539 | ,@body)) 540 | `(let* ((,tmp-splits (split ,sourcestr ,fieldsep-pattern)) 541 | (*NF* (length ,tmp-splits))) 542 | (declare (special *NF*)) 543 | (destructuring-bind ,fields ,tmp-splits 544 | ,@body))))) 545 | 546 | 547 | 548 | ;;; 549 | ;;; $ 550 | ;;; 551 | ;;; Field access to a computed field 552 | ;;; 553 | (defun $n (n) 554 | "Access a field." 555 | (let ((n (int n))) 556 | (cond ((zerop n) *CURLINE*) 557 | ((and (>= n 0) (<= n (array-dimension *FIELDS* 0))) 558 | (aref *FIELDS* (1- n)))))) 559 | 560 | (defsetf $n (n) (val) 561 | (let ((tmpvar (gensym))) 562 | `(let ((,tmpvar (int ,n))) 563 | (cond ((zerop ,tmpvar) (setf *CURLINE* ,val)) 564 | ((and (>= ,tmpvar 0) (<= ,tmpvar (array-dimension *FIELDS* 0))) 565 | (setf (aref *FIELDS* (1- ,tmpvar)) ,val)))))) 566 | 567 | 568 | (declaim (inline $#n)) 569 | (defun $#n (n) 570 | "Access a field, as a number." 571 | (num ($n n))) 572 | 573 | (defsetf $#n (n) (val) 574 | `(setf ($n ,n) (str ,val))) 575 | 576 | 577 | 578 | ;;; 579 | ;;; WITH-SUBMATCHES 580 | ;;; 581 | ;;; Allows stuff like 582 | ;;; (with-submatches (a b c d) 583 | ;;; ($print "Regs:" a b c d)) 584 | ;;; 585 | ;;; which is handy in match-case clauses 586 | ;;; 587 | (defmacro with-submatches (&optional fields &rest body) 588 | "Bind the submatch variables to the corresponding strings from the registers array." 589 | #+:Symbolics (declare (zwei:indentation 1 1)) 590 | (expand-with-submatches fields body)) 591 | #+:Lispworks (editor:setup-indent "with-submatches" 1 2 6) 592 | 593 | (defun expand-with-submatches (fields body) 594 | (if fields 595 | `(destructuring-bind ,fields 596 | (make-register-list *LAST-SUCCESSFUL-MATCH* *REGS*) 597 | ,@body) 598 | `(progn ,@body))) 599 | 600 | 601 | 602 | ;;; 603 | ;;; IF-MATCH 604 | ;;; 605 | ;;; Allows stuff like 606 | ;;; (if-match ("a*b" str) 607 | ;;; ($print "Regs:" %0 %1 %2 %3 %4) 608 | ;;; ($print "No match") 609 | ;;; 610 | (defmacro if-match ((pat str &optional (pos 0)) consequent alternative) 611 | "Match the pattern to the string, and if it matches, bind the 612 | *RSTART*, *RLENGTH*, and *REGS* and evaluate the consequent, 613 | otherwise evaluate the alternative." 614 | #+:Symbolics (declare (zwei:indentation 1 1)) 615 | (expand-if-match pat str pos consequent alternative)) 616 | #+:Lispworks (editor:setup-indent "if-match" 2 2 4) 617 | 618 | 619 | #-:Symbolics 620 | (progn 621 | (defmacro once-only (variables &rest body) 622 | "Returns the code built by BODY. If any of VARIABLES 623 | might have side effects, they are evaluated once and stored 624 | in temporary variables that are then passed to BODY." 625 | (assert (every #'symbolp variables)) 626 | (let ((temps (loop repeat (length variables) collect (gensym)))) 627 | `(if (every #'side-effect-free-p (list ,@variables)) 628 | (progn ,@body) 629 | (list 'let 630 | ,`(list ,@(mapcar #'(lambda (tmp var) 631 | `(list ',tmp ,var)) 632 | temps variables)) 633 | (let ,(mapcar #'(lambda (var tmp) `(,var ',tmp)) 634 | variables temps) 635 | ,@body))))) 636 | 637 | (defun side-effect-free-p (exp) 638 | "Is EXP a constant, variable, or function, 639 | or of the form (THE type x) where x is side-effect-free." 640 | (or (constantp exp) (atom exp) 641 | (and (starts-with exp 'the) 642 | (side-effect-free-p (third exp))))) 643 | 644 | (defun starts-with (lst x) 645 | (and (consp lst) (eq (first lst) x))) 646 | ) 647 | 648 | 649 | (defun expand-if-match (pat str pos consequent alternative) 650 | (once-only (str pos) 651 | `(multiple-value-bind (matchedp *RSTART* *RLENGTH* *REGS*) 652 | (regex:match-str (get-matcher-for-pattern ,pat) 653 | ,str :start ,pos) 654 | (if matchedp 655 | (let ((*last-successful-match* ,str)) 656 | ,consequent) 657 | ,alternative)))) 658 | 659 | 660 | ;;; 661 | ;;; WITH-MATCH 662 | ;;; 663 | ;;; Allows stuff like 664 | ;;; (with-match ((a b c d &rest rest) pat str) 665 | ;;; ($print "Regs:" a b c d "Rest: " rest)) 666 | ;;; 667 | ;;; as well as the slightly less readable 668 | ;;; (with-match (nil pat str) 669 | ;;; ($print "Regs:" %1 %2 %3 %4)) 670 | ;;; which matches pat against str, and loads the submatches into the % vars, 671 | ;;; 672 | ;;; or even 673 | ;;; (with-match (nil pat) 674 | ;;; ($print "Fields:" %1 %2 %3 %4)) 675 | ;;; which will split the current line into the % vars. 676 | 677 | (defmacro with-match ((&optional fields pat sourcestr) &rest body) 678 | "Split the source string into registers based on the pattern, 679 | bind the register variables to the registers array." 680 | #+:Symbolics (declare (zwei:indentation 1 1)) 681 | (expand-with-match pat fields sourcestr body)) 682 | #+:Lispworks (editor:setup-indent "with-match" 1 2 6) 683 | 684 | (defun expand-with-match (pat fields sourcestr body) 685 | (cond ((and pat fields) 686 | `(when (match (or ,sourcestr *CURLINE*) ,pat) 687 | (destructuring-bind ,fields (make-register-list *LAST-SUCCESSFUL-MATCH* *REGS*) 688 | ,@body))) 689 | ((and pat (null fields)) 690 | `(let (*LAST-SUCCESSFUL-MATCH* *REGS*) 691 | (declare (special *LAST-SUCCESSFUL-MATCH* *REGS*)) 692 | (when (match (or ,sourcestr *CURLINE*) ,pat) 693 | ,@body))) 694 | ((and (null pat) fields (null sourcestr)) 695 | `(destructuring-bind ,fields 696 | (make-register-list *LAST-SUCCESSFUL-MATCH* *REGS*) 697 | ,@body)) 698 | ((and (null pat) (null fields) sourcestr) 699 | (error "WITH-MATCH requires a pattern to match the string ~A against" sourcestr)) 700 | ((and (null pat) (null fields) (null sourcestr)) 701 | `(progn ,@body)))) 702 | 703 | (defun make-register-list (str regs) 704 | (let ((nregs (array-dimension regs 0))) 705 | (loop for i from 0 below nregs 706 | collect (get-reg-str str regs i)))) 707 | 708 | 709 | ;;; 710 | ;;; % 711 | ;;; 712 | ;;; Register access 713 | ;;; 714 | 715 | (defun get-reg-str (str regs n) 716 | "Access a register." 717 | (let ((rstart (register-start regs n)) 718 | (rend (register-end regs n))) 719 | (if (and (numberp rstart) (numberp rend)) 720 | (subseq str rstart rend)))) 721 | 722 | (defun %n (n) 723 | "Access a register." 724 | (let ((n (int n))) 725 | (if (and (stringp *LAST-SUCCESSFUL-MATCH*) (< n (array-dimension *REGS* 0)) (>= n 0)) 726 | (get-reg-str *LAST-SUCCESSFUL-MATCH* *REGS* n)))) 727 | 728 | 729 | 730 | ;;; 731 | ;;; MATCH-CASE 732 | ;;; 733 | ;;; Allows stuff like: 734 | ;;; (match-case str 735 | ;;; (#/foo/ (format t "foo")) 736 | ;;; (#/bar/ (format t "bar")) 737 | ;;; ((#/baz/ #/qux/) (format t "either baz or qux")) 738 | ;;; (else (format t "unknown"))) 739 | ;;; 740 | ;;; The matches are done by MATCH, so the various special variables are set 741 | ;;; appropriately 742 | ;;; 743 | (defmacro match-case (strexpr &rest clauses) 744 | #+:Symbolics (declare (zwei:indentation 1 1)) 745 | (expand-match-case strexpr clauses)) 746 | #+:Lispworks (editor:setup-indent "match-case" 1 2 6) 747 | 748 | (defun expand-match-case (strexpr clauses) 749 | (let ((tmpstrsym (gensym))) 750 | `(let ((,tmpstrsym ,strexpr)) 751 | ,(expand-match-case-clauses tmpstrsym clauses)))) 752 | 753 | (defun expand-match-case-clauses (strexpr clauses) 754 | (if clauses 755 | (let ((clause (first clauses)) 756 | (other-clauses (rest clauses))) 757 | (cond ((member (car clause) '(t else otherwise)) 758 | `(progn ,@(rest clause))) 759 | ((atom (car clause)) 760 | `(if (match ,strexpr ,(car clause)) 761 | (progn ,@(cdr clause)) 762 | ,(expand-match-case-clauses strexpr other-clauses))) 763 | (t `(if (or ,@(mapcar #'(lambda (x) `(,strexpr match ,x)) clause)) 764 | (progn ,@(cdr clause)) 765 | ,(expand-match-case-clauses strexpr other-clauses))))) 766 | `())) 767 | 768 | 769 | 770 | 771 | ;;; 772 | ;;; MATCH-WHEN 773 | ;;; 774 | ;;; Takes a set of clauses that correspond to the AWK toplevel forms, 775 | ;;; but just evaluates the clauses (BEGIN clause first, then pattern 776 | ;;; clauses, then END clause), without any looping. 777 | ;;; 778 | (defmacro match-when (&rest clauses) 779 | #+:Symbolics (declare (zwei:indentation 1 1)) 780 | (let ((docs-and-decs (extract-docs-and-decs clauses)) 781 | (begin-clauses (extract-begin-clauses clauses)) 782 | (end-clauses (extract-end-clauses clauses)) 783 | (pattern-clauses (extract-pattern-clauses clauses))) 784 | `(locally ,@docs-and-decs 785 | (progn ,@(mapcan #'rest begin-clauses) 786 | ,@(mapcar #'expand-match-when-clause pattern-clauses) 787 | ,@(mapcar #'rest end-clauses))))) 788 | #+:Lispworks (editor:setup-indent "match-when" 0 2) 789 | 790 | (defun is-special-clause (clause type) 791 | (and (listp clause) 792 | (symbolp (first clause)) 793 | (string-equal (symbol-name (first clause)) type))) 794 | 795 | 796 | (defun extract-docs-and-decs (clauses) 797 | (loop for clause in clauses 798 | while (or (stringp clause) (is-special-clause clause "DECLARE")) 799 | collect clause)) 800 | 801 | (defun extract-begin-clauses (clauses) 802 | (loop for clause in clauses 803 | when (is-special-clause clause "BEGIN") 804 | collect clause)) 805 | 806 | (defun extract-end-clauses (clauses) 807 | (loop for clause in clauses 808 | when (is-special-clause clause "END") 809 | collect clause)) 810 | 811 | (defun extract-pattern-clauses (clauses) 812 | (loop for clause in clauses 813 | unless (or (not (listp clause)) 814 | (is-special-clause clause "BEGIN") 815 | (is-special-clause clause "END") 816 | (is-special-clause clause "DECLARE")) 817 | collect clause)) 818 | 819 | ; 820 | ; (t . body) --> (progn . body) 821 | ; (nil . body) --> (progn . body) 822 | ; ((quote sym) . body) --> (when (match *CURLINE* (quote sym)) . body) 823 | ; (stringlit . body) --> (when (match *CURLINE* stringlit) . body) 824 | ; (form . body) --> (when form . body) 825 | ; 826 | (defun expand-match-when-clause (clause) 827 | (cond ((null clause) nil) 828 | ((stringp (first clause)) 829 | `(when (match *CURLINE* ,(first clause)) 830 | ,@(expand-match-when-consequent (rest clause)))) 831 | ((member (first clause) '("t" "always") :test #'symbol-name-eq) 832 | (rest clause)) 833 | ((atom (first clause)) 834 | `(when ,(first clause) 835 | ,@(expand-match-when-consequent (rest clause)))) 836 | (t (let ((condition (first clause)) 837 | (consequents (rest clause))) 838 | (if (eq (first condition) 'quote) 839 | `(when (match *CURLINE* ,condition) 840 | ,@(expand-match-when-consequent consequents)) 841 | `(when ,condition 842 | ,@(expand-match-when-consequent consequents))))))) 843 | 844 | ; provide the default action if one isn't present 845 | (defun expand-match-when-consequent (consequent) 846 | (if consequent 847 | consequent 848 | '(($print *CURLINE*)))) 849 | 850 | ; sym-name-eq 851 | (defun symbol-name-eq (x y) 852 | (if (symbolp x) 853 | (if (symbolp y) 854 | (string= (symbol-name x) (symbol-name y)) 855 | (if (stringp y) 856 | (string= (symbol-name x) y))) 857 | (if (stringp x) 858 | (if (symbolp y) 859 | (string= x (symbol-name y)) 860 | (if (stringp y) 861 | (string= x y)))))) 862 | 863 | ;;; 864 | ;;; FOR-STREAM-LINES 865 | ;;; 866 | ;;; Iterate the body over the lines of the stream. Don't split the 867 | ;;; lines, but keep the current line in both *CURLINE* and $0. 868 | ;;; 869 | (defmacro for-stream-lines ((stream &optional (strmvar (gensym)) 870 | (linevar (gensym))) 871 | &rest body) 872 | #+:Symbolics (declare (zwei:indentation 1 1)) 873 | (expand-for-stream-lines strmvar linevar stream body)) 874 | #+:Lispworks (editor:setup-indent "for-stream-lines" 1 2 6) 875 | 876 | (defun expand-for-stream-lines (streamvar linevar stream body 877 | &aux (nextlbl (gensym))) 878 | `(let ((,streamvar ,stream)) 879 | (when (eq ,streamvar 't) 880 | (setq ,streamvar *standard-input*)) 881 | (unless (null ,streamvar) 882 | (let ((*CURFILE* nil) 883 | (*CURLINE* "") 884 | (*FNR* -1)) 885 | (macrolet ((next () (list 'throw ',nextlbl nil))) 886 | (prog ,(if (eq linevar '*CURLINE*) nil (list linevar)) 887 | ,nextlbl 888 | (setq ,linevar (read-line ,streamvar nil :eof)) 889 | (unless (eq ,linevar :eof) 890 | (setq *CURLINE* ,linevar 891 | $0 ,linevar) 892 | (incf *NR*) 893 | (incf *FNR*) 894 | (catch ',nextlbl 895 | ,@body) 896 | (go ,nextlbl)))))))) 897 | 898 | 899 | 900 | ;;; 901 | ;;; FOR-FILE-LINES 902 | ;;; 903 | ;;; Iterate the body over the lines of the file. Don't split the 904 | ;;; lines, but keep the current line in both *CURLINE* and $0. 905 | ;;; 906 | ;;; Need to do this with prog or labels, and macrolet (next) to jump to 907 | ;;; the read-next-line logic. 908 | ;;; 909 | (defmacro for-file-lines ((path &optional (streamvar (gensym)) 910 | (linevar (gensym))) 911 | &rest body) 912 | #+:Symbolics (declare (zwei:indentation 1 1)) 913 | (expand-for-file-lines streamvar linevar path body)) 914 | #+:Lispworks (editor:setup-indent "for-file-lines" 1 2 6) 915 | 916 | (defun expand-for-file-lines (streamvar linevar path body) 917 | `(with-open-file (,streamvar ,path 918 | :direction :input 919 | :element-type 'character 920 | :if-does-not-exist :error) 921 | ,(expand-for-stream-lines streamvar linevar streamvar body))) 922 | 923 | 924 | ;;; 925 | ;;; FOR-STREAM-FIELDS 926 | ;;; 927 | ;;; Iterate the body over the lines of the stream. Split the lines 928 | ;;; based on *FS*. Depending on what fieldspec you provide, the 929 | ;;; various $n vars may or may not be set (except for $0, which is the 930 | ;;; current line). 931 | ;;; 932 | ;;; As a special case, the value 't will use *standard-input* as the stream. 933 | ;;; 934 | (defmacro for-stream-fields ((stream &optional fieldspec 935 | (strmvar (gensym)) 936 | (linevar (gensym))) 937 | &rest body) 938 | #+:Symbolics (declare (zwei:indentation 1 1)) 939 | (expand-for-stream-fields strmvar linevar fieldspec stream body)) 940 | #+:Lispworks (editor:setup-indent "for-stream-fields" 1 2 6) 941 | 942 | (defun expand-for-stream-fields (strmvar linevar fieldspec stream body 943 | &key (curfile-name stream curfile-name-p) 944 | &aux (nextlbl (gensym))) 945 | `(let ((,strmvar ,stream)) 946 | (if (eq ,strmvar 't) 947 | (setq ,strmvar *standard-input*)) 948 | (unless (null ,strmvar) 949 | (let (,@(if curfile-name-p `((*CURFILE* ,curfile-name))) 950 | (*CURLINE* "") 951 | (*FNR* -1)) 952 | (macrolet ((next () (list 'throw ',nextlbl nil))) 953 | (prog ,(if (eq linevar '*CURLINE*) nil (list linevar)) 954 | ,nextlbl 955 | (setq ,linevar (read-line ,strmvar nil :eof)) 956 | (unless (eq ,linevar :eof) 957 | (setq *CURLINE* ,linevar 958 | $0 ,linevar) 959 | (incf *NR*) 960 | (incf *FNR*) 961 | (catch ',nextlbl 962 | ,(expand-with-fields fieldspec linevar '*FS* body)) 963 | (go ,nextlbl)))))))) 964 | 965 | 966 | 967 | ;;; 968 | ;;; FOR-FILE-FIELDS 969 | ;;; 970 | ;;; Open the filepath, then iterate the body over the lines. Split the lines 971 | ;;; based on *FS*. Depending on what fieldspec you provide, the various $n vars 972 | ;;; may or may not be set (except for $0, which is the current line). 973 | ;;; 974 | (defmacro for-file-fields ((path &optional fieldspec 975 | (strmvar (gensym)) 976 | (linevar (gensym))) 977 | &rest body) 978 | #+:Symbolics (declare (zwei:indentation 1 1)) 979 | (expand-for-file-fields strmvar linevar fieldspec path body)) 980 | #+:Lispworks (editor:setup-indent "for-file-fields" 1 2 6) 981 | 982 | (defun expand-for-file-fields (strmvar linevar fieldspec path body) 983 | `(with-open-file (,strmvar ,path 984 | :direction :input 985 | :element-type 'character 986 | :if-does-not-exist :error) 987 | ,(expand-for-stream-fields strmvar linevar fieldspec strmvar body :curfile-name path))) 988 | 989 | 990 | 991 | 992 | ;;; 993 | ;;; WHEN-STREAM-FIELDS 994 | ;;; 995 | ;;; Guts of AWK toplevel for a file, but unlike AWK this can be 996 | ;;; used anywhere. 997 | ;;; 998 | (defmacro when-stream-fields ((stream &optional fieldspec) &rest clauses) 999 | #+:Symbolics (declare (zwei:indentation 1 1)) 1000 | (let ((docs-and-decs (extract-docs-and-decs clauses)) 1001 | (begin-clauses (extract-begin-clauses clauses)) 1002 | (end-clauses (extract-end-clauses clauses)) 1003 | (pattern-clauses (extract-pattern-clauses clauses))) 1004 | `(locally ,@docs-and-decs 1005 | (progn ,@(mapcan #'rest begin-clauses) 1006 | (for-stream-fields (,stream ,fieldspec) 1007 | ,@(mapcar #'expand-match-when-clause pattern-clauses)) 1008 | ,@(mapcan #'rest end-clauses))))) 1009 | #+:Lispworks (editor:setup-indent "when-stream-fields" 1 2 6) 1010 | 1011 | (defmacro when-file-fields ((path &optional fieldspec) &rest clauses) 1012 | #+:Symbolics (declare (zwei:indentation 1 1)) 1013 | (let ((docs-and-decs (extract-docs-and-decs clauses)) 1014 | (begin-clauses (extract-begin-clauses clauses)) 1015 | (end-clauses (extract-end-clauses clauses)) 1016 | (pattern-clauses (extract-pattern-clauses clauses))) 1017 | `(locally ,@docs-and-decs 1018 | (progn ,@(mapcan #'rest begin-clauses) 1019 | (for-file-fields (,path ,fieldspec) 1020 | ,@(mapcar #'expand-match-when-clause pattern-clauses)) 1021 | ,@(mapcan #'rest end-clauses))))) 1022 | #+:Lispworks (editor:setup-indent "when-file-fields" 1 2 6) 1023 | 1024 | 1025 | 1026 | ;;; 1027 | ;;; Fakes a reasonably close equivalent to a top-level awk program. 1028 | ;;; 1029 | ;;; 1030 | ;;; This needs to be modified to run the BEGIN clauses *ONCE* before 1031 | ;;; any files are processed, and similarly run the END clauses once 1032 | ;;; after the files are processed. 1033 | ;;; 1034 | 1035 | (defmacro defawk (name (&rest parms) &rest clauses) 1036 | (let ((file-or-stream (gensym)) 1037 | (process-stream-fn (gensym)) 1038 | (strm (gensym)) 1039 | (docs-and-decs (extract-docs-and-decs clauses)) 1040 | (begin-clauses (extract-begin-clauses clauses)) 1041 | (end-clauses (extract-end-clauses clauses)) 1042 | (pattern-clauses (extract-pattern-clauses clauses))) 1043 | `(defun ,name (&rest ARGS ,@parms ,@(if (member '&key parms) '((&allow-other-keys)))) 1044 | ;; docstrings and declarations 1045 | ,@docs-and-decs 1046 | ;; shadow our globals with their good values 1047 | (let ((*CURFILE*) (*CURLINE* "") 1048 | (*FS* +WS-FIELDSEP-PAT+) 1049 | (*RSTART* 0) (*RLENGTH* 0) (*REND* 0) 1050 | (*REGS* (make-array 0)) 1051 | (*FIELDS* (make-array 0)) 1052 | (*NR* 0) (*FNR* 0) (*NF* 0) 1053 | (*SUBSEP* (string (code-char #o34))) 1054 | (*OFS* " ") (*ORS* "\n") 1055 | (*LAST-SUCCESSFUL-MATCH*)) 1056 | (declare (special *CURFILE* *CURLINE* *FS* *RSTART* *RLENGTH* 1057 | *REND* *REGS* *FIELDS* *NR* *FNR* *NF* 1058 | *SUBSEP* *OFS* *ORS* *LAST-SUCCESSFUL-MATCH*)) 1059 | (flet ((,process-stream-fn (,strm) 1060 | ,(expand-for-stream-fields strm '*CURLINE* nil strm 1061 | (mapcar 1062 | #'expand-match-when-clause 1063 | pattern-clauses)))) 1064 | ;; run BEGIN clauses 1065 | ,@(mapcan #'rest begin-clauses) 1066 | ;; process files 1067 | (dolist (,file-or-stream ARGS) 1068 | (cond ((eq ,file-or-stream 't) 1069 | (,process-stream-fn *standard-input*)) 1070 | ((and (streamp ,file-or-stream) (input-stream-p ,file-or-stream)) 1071 | (,process-stream-fn ,file-or-stream)) 1072 | ((or (pathnamep ,file-or-stream) (stringp ,file-or-stream)) 1073 | (let ((*CURFILE* ,file-or-stream)) 1074 | (declare (special *CURFILE*)) 1075 | (with-open-file (,strm ,file-or-stream 1076 | :direction :input 1077 | :element-type 'character 1078 | :if-does-not-exist :error) 1079 | (,process-stream-fn ,strm)))))) 1080 | ;; run END clauses 1081 | ,@(mapcan #'rest end-clauses)))))) 1082 | #+:LispWorks (editor:setup-indent "defawk" 1 2) 1083 | 1084 | 1085 | ;;; 1086 | ;;; misc generic functions 1087 | ;;; 1088 | (defun $+ (&rest rest) 1089 | (reduce #'+ (mapcar #'num rest))) 1090 | (defun $- (&rest rest) 1091 | (reduce #'- (mapcar #'num rest))) 1092 | (defun $* (&rest rest) 1093 | (reduce #'* (mapcar #'num rest))) 1094 | (defun $/ (&rest rest) 1095 | (reduce #'/ (mapcar #'num rest))) 1096 | (defun $rem (x y) 1097 | (rem (num x) (num y))) 1098 | (defun $exp (y) 1099 | ($exp (num y))) 1100 | (defun $expt (x y) 1101 | (expt (num x) (num y))) 1102 | (defun $atan2 (x y) 1103 | (atan (num x) (num y))) 1104 | (defun $cos (x) 1105 | (cos (num x))) 1106 | (defun $sin (x) 1107 | (sin (num x))) 1108 | (defun $int (x) 1109 | (truncate (num x))) 1110 | (defun $log (x) 1111 | (log (num x))) 1112 | (defun $sqrt (x) 1113 | (sqrt (num x))) 1114 | (defun $rand () 1115 | (random 1.0)) 1116 | 1117 | (defvar *random-states* (make-hash-table)) 1118 | 1119 | (defun $srand (x) 1120 | (let ((nx (num x))) 1121 | (let ((oldstate (gethash nx *random-states*))) 1122 | (if oldstate 1123 | (setq *random-state* oldstate) 1124 | (setf *random-state* (make-random-state) 1125 | (gethash nx *random-states*) *random-state*))))) 1126 | 1127 | (defun $++ (&rest rest) 1128 | (reduce #'(lambda (x y) (concatenate 'string x (str y))) 1129 | (mapcar #'str rest))) 1130 | 1131 | (defgeneric ! (x)) 1132 | (defmethod ! ((x number)) 1133 | (zerop x)) 1134 | (defmethod ! ((x (eql nil))) 1135 | 1) 1136 | (defmethod ! ((x string)) 1137 | (zerop (num x))) 1138 | 1139 | (defgeneric $== (x y)) 1140 | (defmethod $== (x y) 1141 | (= (num x) (num y))) 1142 | (defmethod $== ((x number) (y number)) 1143 | (= x y)) 1144 | (defmethod $== ((x number) (y string)) 1145 | (= x (num y))) 1146 | (defmethod $== ((x string) (y number)) 1147 | (= (num x) y)) 1148 | (defmethod $== ((x string) (y string)) 1149 | (string= x y)) 1150 | 1151 | (defgeneric $< (x y)) 1152 | (defmethod $< (x y) 1153 | (< (num x) (num y))) 1154 | (defmethod $< ((x number) (y number)) 1155 | (< x y)) 1156 | (defmethod $< ((x number) (y string)) 1157 | (< x (num y))) 1158 | (defmethod $< ((x string) (y number)) 1159 | (< (num x) y)) 1160 | (defmethod $< ((x string) (y string)) 1161 | (string< x y)) 1162 | 1163 | (defgeneric $> (x y)) 1164 | (defmethod $> (x y) 1165 | (> (num x) (num y))) 1166 | (defmethod $> ((x number) (y number)) 1167 | (> x y)) 1168 | (defmethod $> ((x number) (y string)) 1169 | (> x (num y))) 1170 | (defmethod $> ((x string) (y number)) 1171 | (> (num x) y)) 1172 | (defmethod $> ((x string) (y string)) 1173 | (string> x y)) 1174 | 1175 | (defgeneric $<= (x y)) 1176 | (defmethod $<= (x y) 1177 | (<= (num x) (num y))) 1178 | (defmethod $<= ((x number) (y number)) 1179 | (<= x y)) 1180 | (defmethod $<= ((x number) (y string)) 1181 | (<= x (num y))) 1182 | (defmethod $<= ((x string) (y number)) 1183 | (<= (num x) y)) 1184 | (defmethod $<= ((x string) (y string)) 1185 | (string<= x y)) 1186 | 1187 | (defgeneric $>= (x y)) 1188 | (defmethod $>= (x y) 1189 | (>= (num x) (num y))) 1190 | (defmethod $>= ((x number) (y number)) 1191 | (>= x y)) 1192 | (defmethod $>= ((x number) (y string)) 1193 | (>= x (num y))) 1194 | (defmethod $>= ((x string) (y number)) 1195 | (>= (num x) y)) 1196 | (defmethod $>= ((x string) (y string)) 1197 | (string>= x y)) 1198 | 1199 | (defgeneric $/= (x y)) 1200 | (defmethod $/= (x y) 1201 | (/= (num x) (num y))) 1202 | (defmethod $/= ((x number) (y number)) 1203 | (/= x y)) 1204 | (defmethod $/= ((x number) (y string)) 1205 | (/= x (num y))) 1206 | (defmethod $/= ((x string) (y number)) 1207 | (/= (num x) y)) 1208 | (defmethod $/= ((x string) (y string)) 1209 | (string/= x y)) 1210 | (defun != (x y) 1211 | ($/= x y)) 1212 | 1213 | 1214 | (defgeneric $max (x &rest rest)) 1215 | (defmethod $max (x &rest rest) 1216 | (reduce #'max (mapcar #'num rest) :initial-value (num x))) 1217 | (defmethod $max ((x number) &rest rest) 1218 | (reduce #'max (mapcar #'num rest) :initial-value x)) 1219 | 1220 | (defgeneric $min (x &rest rest)) 1221 | (defmethod $min (x &rest rest) 1222 | (reduce #'min (mapcar #'num rest) :initial-value (num x))) 1223 | (defmethod $min ((x number) &rest rest) 1224 | (reduce #'min (mapcar #'num rest) :initial-value x)) 1225 | 1226 | (defgeneric $zerop (x)) 1227 | (defmethod $zerop (x) 1228 | (declare (ignore x)) 1229 | nil) 1230 | (defmethod $zerop ((x number)) 1231 | (zerop x)) 1232 | (defmethod $zerop ((x (eql nil))) 1233 | t) 1234 | (defmethod $zerop ((x string)) 1235 | (not (null (or (string= x "") (string= x "0") (string= x "0.0"))))) 1236 | 1237 | (defgeneric $length (x)) 1238 | (defmethod $length (x) 1239 | (length x)) 1240 | (defmethod $length ((x number)) 1241 | (length (str x))) 1242 | (defmethod $length ((x (eql nil))) 1243 | 0) 1244 | 1245 | (defun $print (&rest rest) 1246 | (do-$fprint *standard-output* rest)) 1247 | (defun $fprint (stream &rest rest) 1248 | (do-$fprint stream rest)) 1249 | (defun do-$fprint (stream lst) 1250 | (if (string= *ORS* "\n") 1251 | (format stream "~%") 1252 | (format stream "~A" *ORS*)) 1253 | (loop for item in lst 1254 | do (format stream "~A~A" (str item) *OFS*))) 1255 | 1256 | 1257 | 1258 | 1259 | ;;; 1260 | ;;; Awk-like associative arrays (built on hashtable, obviously) 1261 | ;;; 1262 | (defun $array () 1263 | (make-hash-table :test 'equalp)) 1264 | 1265 | 1266 | ;; AWK associative-arrays have the odd characteristic that simply 1267 | ;; checking for the presence of a key adds it to the array. 1268 | (defun assoc-array-ref (tbl index) 1269 | (multiple-value-bind (val foundp) 1270 | (gethash index tbl) 1271 | (if foundp 1272 | val 1273 | (setf (gethash index tbl) "")))) 1274 | 1275 | (defsetf assoc-array-ref (tbl index) (val) 1276 | `(setf (gethash ,index ,tbl) ,val)) 1277 | 1278 | (defmacro $aref (tbl index &rest other-indices) 1279 | (if (null other-indices) 1280 | `(assoc-array-ref ,tbl (str ,index)) 1281 | `(assoc-array-ref ,tbl 1282 | (concatenate 1283 | 'string 1284 | (str ,index) 1285 | ,@(mapcan #'(lambda (x) 1286 | `(*SUBSEP* (str ,x))) 1287 | other-indices))))) 1288 | 1289 | ; equivalent to: for (x in a) ... 1290 | (defmacro $for ((keyvar in tbl) &rest body) 1291 | (declare (ignore in)) 1292 | #+:Symbolics (declare (zwei:indentation 1 1)) 1293 | `(loop for ,keyvar being the hash-keys of ,tbl 1294 | do (progn ,@body))) 1295 | #+:Lispworks (editor:setup-indent "$for" 1 2 6) 1296 | 1297 | ; equivalent to: x in a 1298 | (defun $in (key tbl) 1299 | (multiple-value-bind (val presentp) 1300 | (getf key tbl) 1301 | (declare (ignore val)) 1302 | presentp)) 1303 | 1304 | ; equivalent to either delete a[i] or delete a 1305 | (defun $delete (tbl &optional (elt nil eltp)) 1306 | (if eltp 1307 | (remhash elt tbl) 1308 | (clrhash tbl))) 1309 | 1310 | ; return the cardinality of the table 1311 | (defmethod $length ((x hash-table)) 1312 | (hash-table-count x)) 1313 | -------------------------------------------------------------------------------- /clawk.system: -------------------------------------------------------------------------------- 1 | ;;; -*- Mode: Lisp; Syntax: ANSI-Common-lisp; Package: CL-USER; Base: 10 -*- 2 | 3 | (in-package "CL-USER") 4 | 5 | (load-logical-pathname-translations "CLAWK") 6 | 7 | (mk:defsystem "CLAWK" 8 | :source-extension "lisp" 9 | :source-pathname (translate-logical-pathname "CLAWK:SRC;") 10 | :depends-on ("REGEX") 11 | :components ( 12 | (:file "packages") 13 | (:file "clawk" :depends-on ("packages")) 14 | (:file "clawktest" :depends-on ("packages" "clawk")))) 15 | 16 | 17 | 18 | (defun lc-clawk () 19 | (mk:compile-system "CLAWK")) 20 | (defun ld-clawk () 21 | (mk:load-system "CLAWK")) 22 | -------------------------------------------------------------------------------- /clawk.translations: -------------------------------------------------------------------------------- 1 | 2 | 3 | #+:Lispworks 4 | (setf (logical-pathname-translations "CLAWK") 5 | '(("CLAWK:SYS;*" "d:/lisp/*") 6 | ("CLAWK:SRC;*" "d:/lisp/clawk/*"))) 7 | 8 | -------------------------------------------------------------------------------- /clawktest.lisp: -------------------------------------------------------------------------------- 1 | ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CLAWK-TEST; Base: 10 -*- 2 | 3 | (in-package :CLAWK-TEST) 4 | 5 | 6 | ;;; 7 | ;;; Various tests, based on translations of AWK examples from the AWK 8 | ;;; book 9 | ;;; 10 | 11 | ; Chapter 1, page 1, example 1 12 | ; Print all employees that have worked this week 13 | ; 14 | ; $3 > 0 { print $1 $2 $3 } 15 | ; 16 | (defun test-1-1-1 (&optional (filename "emp.data")) 17 | (for-file-lines (filename) 18 | (with-fields ((name payrate hrsworked)) 19 | (when ($> hrsworked 0) 20 | ($print name payrate hrsworked))))) 21 | 22 | ; alternatively like this 23 | (defun test-1-1-2 (&optional (filename "emp.data")) 24 | (for-file-fields (filename (name payrate hrsworked)) 25 | (when ($> hrsworked 0) 26 | ($print name payrate hrsworked)))) 27 | 28 | ; or this 29 | (defun test-1-1-3 (&optional (filename "emp.data")) 30 | (for-file-fields (filename) 31 | (when ($> $3 0) ($print $1 $2 $3)))) 32 | 33 | ; or even like this 34 | (defawk test-1-1-4 () 35 | "Print out the employees that have worked this week." 36 | (($> $3 0) ($print $1 $2 $3))) 37 | 38 | 39 | (defun test-1-2 (&optional (filename "emp.data")) 40 | (for-file-lines (filename) 41 | (with-fields ((name payrate hrsworked)) 42 | (when ($zerop hrsworked) 43 | ($print name payrate hrsworked))))) 44 | 45 | ;;; also awk-like 46 | (defawk test-1-6 () 47 | (t ($print *NR* $0))) 48 | 49 | (defun test-1-7 (&optional (filename "emp.data")) 50 | (for-file-lines (filename) 51 | (with-fields ((name payrate hrsworked)) 52 | ($print "total pay for" name "is" ($* payrate hrsworked))))) 53 | 54 | (defun test-1-9 (&optional (filename "emp.data")) 55 | (for-file-lines (filename inf line) 56 | (with-fields ((name payrate hrsworked)) 57 | (declare (ignore name hrsworked)) 58 | (when ($>= payrate 5) 59 | ($print line))))) 60 | 61 | ; as close to the original awk as possible 62 | (defawk test-1-11-1 () 63 | ((/= *NF* 3) ($print $0 "number of fields is not equal to 3")) 64 | (($< $2 3.35) ($print $0 "rate is below minimum wage")) 65 | (($> $2 10) ($print $0 "rate exceeds $10 per hour")) 66 | (($< $3 0) ($print $0 "negative hours worked")) 67 | (($> $3 60) ($print $0 "too many hours worked")) ) 68 | 69 | ; as close to the original awk as possible 70 | (defawk test-1-11-2 () 71 | (BEGIN ($print "NAME RATE HOURS") 72 | ($print)) 73 | (t ($print $0)) ) ; empty condition 74 | 75 | (defun test-1-12-1 (&optional (filename "emp.data") &aux (emp 0)) 76 | (for-file-lines (filename) 77 | (with-fields ((name payrate hrsworked)) 78 | (declare (ignore name payrate)) 79 | (when ($> hrsworked 15) 80 | (incf emp)))) 81 | ($print emp "employees worked more than 15 hours")) 82 | 83 | (defawk test-1-12-3 (&aux (pay 0)) 84 | (t (incf pay ($* $2 $3))) 85 | (END ($print *NR* "employees") 86 | ($print "total pay is" pay) 87 | ($print "average pay is" (/ pay *NR*)))) 88 | 89 | (defun sample (filename &aux (pay 0) (nr 0)) 90 | (for-file-fields (filename (name payrate hrsworked)) 91 | (declare (ignore name)) 92 | (incf pay (* (num payrate) (num hrsworked))) 93 | (incf nr) ) 94 | (format t "~%~D employees" nr) 95 | (format t "~%total pay is ~F" pay) 96 | (format t "~%average pay is ~F" (/ pay nr)) ) 97 | 98 | (defun test-1-12-4 (&optional (filename "emp.data") &aux maxrate maxemp) 99 | (for-file-lines (filename) 100 | (with-fields ((name payrate hrsworked)) 101 | (declare (ignore hrsworked)) 102 | (when ($> payrate maxrate) 103 | (setq maxrate payrate maxemp name)))) 104 | ($print "highest hourly rate:" maxrate "for" maxemp)) 105 | 106 | (defun test-1-13-1 (&optional (filename "emp.data") &aux names) 107 | (for-file-lines (filename) 108 | (with-fields ((name payrate hrsworked)) 109 | (declare (ignore payrate hrsworked)) 110 | (setq names ($++ names name " ")))) 111 | ($print names)) 112 | 113 | (defun test-1-13-2 (&optional (filename "emp.data") &aux last) 114 | "Print last line" 115 | (for-file-lines (filename inf line) 116 | (setq last line)) 117 | ($print last)) 118 | 119 | (defun test-1-14-1 (&optional (filename "emp.data")) 120 | (for-file-lines (filename) 121 | (with-fields ((name payrate hrsworked)) 122 | (declare (ignore payrate hrsworked)) 123 | ($print name ($length name))))) 124 | 125 | (defun test-1-14-2 (filename &aux (nc 0) (nw 0)) 126 | "Count lines, words, and characters" 127 | (setq *NR* 0) ; the low-level macros don't do this for you 128 | (for-file-lines (filename inf line) 129 | (with-fields ((&rest rest)) 130 | (declare (ignore rest)) 131 | (incf nc (1+ (length line))) 132 | (incf nw *NF*))) 133 | ($print *NR* "lines," nw "words," nc "characters")) 134 | 135 | (defun test-1-14-3 (&optional (filename "emp.data") &aux (n 0) (pay 0)) 136 | (for-file-lines (filename) 137 | (with-fields ((name payrate hrsworked)) 138 | (declare (ignore name)) 139 | (when ($> payrate 6) 140 | (incf n) 141 | (incf pay ($* payrate hrsworked))))) 142 | (if (> n 0) 143 | ($print n "employees, total pay is" pay "average pay is" (/ pay n)) 144 | ($print "no employees are paid more than $6/hour"))) 145 | 146 | ; doesn't use emp.data 147 | (defun test-1-15 (filename) 148 | "Input lines: amount rate years" 149 | (for-file-lines (filename inf line) 150 | (with-fields ((&optional amount rate years)) 151 | ($print line) 152 | (loop for i from 1 by 1 153 | while ($<= i years) 154 | do (format t "~%~t~,2F" ($* amount ($expt ($+ 1 rate) i))))))) 155 | 156 | (defun test-1-16-2-1 (filename &aux lines) 157 | "Print lines in reverse order" 158 | (for-file-lines (filename inf line) 159 | (push line lines)) 160 | (loop for line in lines 161 | do ($print line))) 162 | 163 | ;; even more like the original awk, but not as quick 164 | (defawk test-1-16-2-2 (&aux (lines ($array))) 165 | "Print lines in reverse order" 166 | (t (setf ($aref lines *NR*) $0)) 167 | (END (loop for i from *NR* above 0 168 | do ($print ($aref lines i))))) 169 | 170 | (defun test-1-17-3-1 (filename) 171 | "Print the last field of every input line" 172 | (for-file-lines (filename) 173 | (with-fields ((&rest fields)) 174 | ($print (elt fields (1- *NF*)))))) 175 | 176 | ; even more like awk 177 | (defawk test-1-17-3-2 () 178 | (t ($print ($n *NF*)))) 179 | 180 | ;;;; Tests for bugs 181 | (defawk test/1 () 182 | (t 183 | (return-from test/1 (list $1 $2)))) 184 | 185 | (defawk test/2 () 186 | (BEGIN 187 | (setf *FS* ",")) 188 | (t 189 | (return-from test/2 (list $1 $2)))) 190 | 191 | #|| 192 | (assert 193 | (equal (with-input-from-string (s " one two") 194 | (test/1 s)) 195 | '("one" "two"))) 196 | 197 | (assert 198 | (equal (with-input-from-string (s "one two") 199 | (test/1 s)) 200 | '("one" "two"))) 201 | 202 | (assert 203 | (equal (with-input-from-string (s "one, two ,") 204 | (test/2 s)) 205 | '("one" " two "))) 206 | 207 | (assert 208 | (equal (with-input-from-string (s ", one, two") 209 | (test/2 s)) 210 | '("" " one"))) 211 | ||# 212 | -------------------------------------------------------------------------------- /emp.data: -------------------------------------------------------------------------------- 1 | Beth 4.00 0 2 | Dan 3.75 0 3 | Kathy 4.00 10 4 | Mark 5.00 20 5 | Mary 5.50 22 6 | Suzie 4.25 18 7 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2000,2001,2002 Kenneth Michael Parker 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | 3. The name of the author may not be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /packages.lisp: -------------------------------------------------------------------------------- 1 | ;;; -*- Mode: Lisp; Syntax: Common-Lisp; Base: 10 -*- 2 | 3 | (defpackage CLAWK 4 | #+:Genera (:use COMMON-LISP CLOS REGEX) 5 | #-:Genera (:use COMMON-LISP REGEX) 6 | #+:Genera (:import-from "SCL" DEFINE-SYMBOL-MACRO) 7 | (:export 8 | ;; install the #/../ reader 9 | "INSTALL-REGEX-SYNTAX" 10 | ;; convert any accepted representation of a pattern into a compiled pattern 11 | "GET-MATCHER-FOR-PATTERN" 12 | ;; Specials 13 | "*CURFILE*" "*CURLINE*" "*FS*" "*RSTART*" "*RLENGTH*" "*REND*" 14 | "*REGS*" "*FIELDS*" 15 | "*NR*" "*FNR*" "*NF*" "*SUBSEP*" 16 | "*LAST-MATCH*" "*LAST-SUCCESSFUL-MATCH*" 17 | ;; AWK-like functions 18 | "SUB" "GSUB" "SPLIT" "INDEX" "MATCH" "SUBSTR" 19 | "$SUB" "$GSUB" "$SPLIT" "$INDEX" "$MATCH" "$SUBSTR" 20 | "$SUBSTR" "~" "/~" "!~" 21 | ;; Handy macros 22 | "WITH-PATTERNS" "WITH-FIELDS" "WITH-REGS" "WITH-SUBMATCHES" 23 | "IF-MATCH" "WITH-MATCH" "MATCH-CASE" "MATCH-WHEN" 24 | "TOKENIZE" 25 | ;; iterate across a stream or file, evaluating the body for each 26 | ;; line 27 | "FOR-STREAM-LINES" "FOR-FILE-LINES" 28 | ;; iterate across a stream or file, splitting the lines and 29 | ;; evaluating the body for each line 30 | "FOR-STREAM-FIELDS" "FOR-FILE-FIELDS" 31 | ;; iterate across the lines in a stream or file, splitting the lines 32 | ;; and evaluating the AWK-like clauses for each line. 33 | "WHEN-STREAM-FIELDS" "WHEN-FILE-FIELDS" 34 | ;; define a function on a set of files that closely mimics the 35 | ;; structure of an AWK program. 36 | "DEFAWK" 37 | ;; handy generic functions 38 | "$+" "$-" "$*" "$/" "$REM" "$++" "$==" "$<" "$>" "$<=" "$>=" "$/=" 39 | "$MIN" "$MAX" "$ZEROP" "$LENGTH" 40 | ;; arithmetic functions 41 | "$ATAN2" "$COS" "$SIN" "$EXP" "$EXPT" "$INT" "$LOG" "$SQRT" "$SRAND" "$RAND" 42 | ;; AWK-like I/O 43 | "$PRINT" "$FPRINT" 44 | ;; hashtable-based "arrays" 45 | "$ARRAY" "$AREF" "$FOR" "$IN" "$DELETE" 46 | ;; Fields access. These don't follow the *..* convention, but they 47 | ;; still stand out visually, so I think the goal of that convention 48 | ;; is still met 49 | "$N" "$0" "$1" "$2" "$3" "$4" "$5" "$6" "$7" "$8" "$9" "$10" 50 | "$11" "$12" "$13" "$14" "$15" "$16" "$17" "$18" "$19" "$20" "$NF" 51 | ;; Fields access, as numbers 52 | "$#N" "$#0" "$#1" "$#2" "$#3" "$#4" "$#5" "$#6" "$#7" "$#8" "$#9" "$#10" 53 | "$#11" "$#12" "$#13" "$#14" "$#15" "$#16" "$#17" "$#18" "$#19" "$#20" "$#NF" 54 | ;; Coercion routines, although the generic functions above reduce 55 | ;; the need for them. 56 | "STR" "NUM" "INT" 57 | ;; Register access. 58 | "%N" "%0" "%1" "%2" "%3" "%4" "%5" "%6" "%7" "%8" "%9" 59 | "%10" "%11" "%12" "%13" "%14" "%15" "%16" "%17" "%18" "%19" "%20" 60 | ;; Register access, as numbers 61 | "%#N" "%#0" "%#1" "%#2" "%#3" "%#4" "%#5" "%#6" "%#7" "%#8" "%#9" 62 | "%#10" "%#11" "%#12" "%#13" "%#14" "%#15" "%#16" "%#17" "%#18" "%#19" "%#20" 63 | )) 64 | 65 | (defpackage CLAWK-USER 66 | (:use COMMON-LISP CLAWK)) 67 | 68 | (defpackage CLAWK-TEST 69 | (:use COMMON-LISP CLAWK)) 70 | 71 | 72 | 73 | (defun delete-clawk () 74 | (delete-package :CLAWK-TEST) 75 | (delete-package :CLAWK)) 76 | --------------------------------------------------------------------------------