├── README
├── colors.awk
├── csv.awk
├── examples
    ├── cfold
    ├── colors
    ├── csv
    ├── math
    ├── options
    ├── sort
    ├── strings
    ├── sys
    └── times
├── math.awk
├── msort.awk
├── options.awk
├── psort.awk
├── qsort.awk
├── shuf.awk
├── strings.awk
├── sys.awk
└── times.awk


/README:
--------------------------------------------------------------------------------
  1 | AWK library function descriptions
  2 | 
  3 | Every function below is fully POSIX compliant, and has been tested on gawk 3
  4 | and 4, as well as nawk 20110810 and mawk 1.3.4. Interval notation has not been
  5 | used in this library, even though POSIX states that it should be supported,
  6 | as most of the current implementations still do not support it.
  7 | 
  8 | Note: mawk 1.3.3 is even less POSIX compliant than 1.3.4, and doesn't handle
  9 | POSIX character classes in regexes (like [:space:] or [:alpha:]), among other
 10 | things. It is currently the standard on ubuntu, and is most likely standard on
 11 | other debian-based linux distributions, as well. The functions below are not
 12 | guaranteed to work on versions of mawk prior to 1.3.4, although they should not
 13 | be too difficult to alter in order to do so.
 14 | 
 15 | If you are using gawk, I recommend adding the location of this repo to the
 16 | AWKPATH environment variable. This will allow you to only supply the file name
 17 | to -f and @include, instead of having to supply the actual path to the library.
 18 | 
 19 | 
 20 | The 'examples' directory includes a sample script for each library, with sample
 21 | usage of each function. While most of the examples are solely there to give
 22 | examples, the "cfold" script is fully functioning and is (in my opinion) rather
 23 | useful. It shows just how powerful these libraries can be... most of the script
 24 | is just there to parse options. These examples are written with gawk extensions.
 25 | Making them POSIX is left as an exercise to the user, if desired.
 26 | 
 27 | 
 28 | Most of the functions in this library work by themselves, with the exception of
 29 | the functions in sort.awk and the max() and min() functions in strings.awk.
 30 | This means that they can easily be copy/pasted into a script, and will function
 31 | fine on their own. In the case of sort.awk, the functions that the others depend
 32 | on begin with '__', and which functions they go with (as well as which functions
 33 | require what) are explained in the comments.
 34 | 
 35 | 
 36 | 
 37 | 
 38 | Libraries, and the available functions within:
 39 | 
 40 | math.awk
 41 | 
 42 |   abs(number)
 43 |     returns the absolute value of "number"
 44 | 
 45 |   ceil(number)
 46 |     returns "number" rounded UP to the nearest int
 47 | 
 48 |   ceiling(multiple, number)
 49 |     returns "number" rounded UP to the nearest multiple of "multiple". integers
 50 |     only
 51 | 
 52 |   floor(multiple, number)
 53 |     returns "number" rounded DOWN to the nearest multiple of "multiple".
 54 |     integers only
 55 | 
 56 |   round(multiple, number)
 57 |     returns "number" rounded to the nearest multiple of "multiple". integers
 58 |     only
 59 | 
 60 |   rint(number)
 61 |     returns "number" rounded to the nearest integer
 62 | 
 63 |   change_base(number, start_base, end_base)
 64 |     converts "number" from "start_base" to "end_base"
 65 |     bases must be between 2 and 64. the digits greater than 9 are represented
 66 |     by the lowercase letters, the uppercase letters, @, and _, in that order.
 67 |     if ibase is less than or equal to 36, lowercase and uppercase letters may
 68 |     be used interchangeably to represent numbers between 10 and 35. integers
 69 |     only. returns 0 if any argument is invalid
 70 | 
 71 |   format_num(number)
 72 |     adds commas to "number" to make it more readable. for example,
 73 |     format_num(1000) will return "1,000", and format_num(123456.7890) will
 74 |     return "123,456.7890". also trims leading zeroes
 75 |     returns 0 if "number" is not a valid number
 76 | 
 77 |   str_to_num(string)
 78 |     examines "string", and returns its numeric value. if "string" begins with a
 79 |     leading 0, assumes that "string" is an octal number. if "string" begins with
 80 |     a leading "0x" or "0X", assumes that "string" is a hexadecimal number.
 81 |     otherwise, decimal is assumed.
 82 | 
 83 |   isint(string)
 84 |     returns 1 if "string" is a valid integer, otherwise 0
 85 | 
 86 |   isnum(string)
 87 |     returns 1 if "string" is a valid number, otherwise 0
 88 | 
 89 |   isprime(number)
 90 |     returns 1 if "number" is a prime number, otherwise 0. "number" must be a
 91 |     positive integer greater than one
 92 | 
 93 |   gcd(a, b)
 94 |     returns the greatest common denominator (greatest common factor) of a and b.
 95 |     both a and b must be positive integers. uses the recursive euclid algorithm.
 96 | 
 97 |   lcm(a, b)
 98 |     returns the least common multiple of a and b. both a and b must be positive
 99 |     integers.
100 | 
101 |   calc_e()
102 |     approximates e by calculating the sumation from k=0 to k=50 of 1/k!
103 |     returns 10 decimal places
104 | 
105 |   calc_pi()
106 |     returns pi, with an accuracy of 10 decimal places
107 | 
108 |   calc_tau()
109 |     returns tau, with an accuracy of 10 decimal places
110 |     http://tauday.com/tau-manifesto
111 | 
112 |   deg_to_rad(degrees)
113 |     converts degrees to radians
114 | 
115 |   rad_to_deg(radians)
116 |     converts radians to degrees
117 | 
118 |   tan(expr)
119 |     returns the tangent of expr, which is in radians
120 | 
121 |   csc(expr)
122 |     returns the cosecant of expr, which is in radians
123 | 
124 |   sec(expr)
125 |     returns the secant of expr, which is in radians
126 | 
127 |   cot(expr)
128 |     returns the cotangent of expr, which is in radians
129 | 
130 | 
131 | 
132 | sys.awk
133 | 
134 |   isatty(fd)
135 |     Checks if "fd" is open on a tty. Returns 1 if so, 0 if not, and -1 if an
136 |     error occurs
137 | 
138 |   mktemp(template [, type])
139 |     creates a temporary file or directory, safely, and returns its name.
140 |     if template is not a pathname, the file will be created in ENVIRON["TMPDIR"]
141 |     if set, otherwise /tmp. the last six characters of template must be "XXXXXX",
142 |     and these are replaced with a string that makes the filename unique. type, if
143 |     supplied, is either "f", "d", or "u": for file, directory, or dry run (just
144 |     returns the name, doesn't create a file), respectively. If template is not
145 |     provided, uses "tmp.XXXXXX". Files are created u+rw, and directories u+rwx,
146 |     minus umask restrictions. returns -1 if an error occurs.
147 | 
148 | 
149 | 
150 | strings.awk
151 | 
152 |   center(string [, width])
153 |     returns "string" centered based on "width". if "width" is not provided (or 
154 |     is 0), uses the width of the terminal, or 80 if standard output is not open
155 |     on a terminal.
156 |     note: does not check the length of the string. if it's wider than the
157 |     terminal, it will not center lines other than the first. for best results,
158 |     combine with fold() (see the "cfold" script in the "examples" directory for
159 |     a script that does exactly this!)
160 | 
161 |   delete_arr(array)
162 |     deletes every element in "array"
163 | 
164 |   fold(string, sep [, width])
165 |     returns "string", wrapped, with lines broken on "sep" to "width" columns.
166 |     "sep" is a list of characters to break at, similar to IFS in a POSIX shell.
167 |     if "sep" is empty, wraps at exactly "width" characters. if "width" is not
168 |     provided (or is 0), uses the width of the terminal, or 80 if standard output
169 |     is not open on a terminal.
170 |     note: currently, tabs are squeezed to a single space. this will be fixed
171 | 
172 |   shell_esc(string)
173 |     returns the string escaped so that it can be used in a shell command
174 | 
175 |   ssub(ere, repl [, in])
176 |     behaves like sub, except returns the result and doesn't modify "in".
177 |     note: 'ere' must not use /.../ literal regex quoting
178 | 
179 |   sgsub(ere, repl [, in])
180 |     behaves like gsub, except returns the result and doesn't modify "in".
181 |     note: 'ere' must not use /.../ literal regex quoting
182 | 
183 |   lsub(str, repl [, in])
184 |     substites the string "repl" in place of the first instance of "str" in the
185 |     string "in" and returns the result. does not modify the original string. if
186 |     "in" is not provided, uses $0
187 | 
188 |   glsub(str, repl [, in])
189 |     behaves like lsub, except it replaces all occurances of "str"
190 |     note: does not work in mawk when 'str' is empty
191 | 
192 |   str_to_arr(string, array)
193 |     converts string to an array, one char per element, 1-indexed
194 |     returns the array length
195 | 
196 |   extract_range(string, start, stop)
197 |     extracts fields "start" through "stop" from "string", based on FS, with the
198 |     original field separators intact. returns the extracted fields.
199 | 
200 |   fwidths(width_spec [, string])
201 |     extracts substrings from "string" according to "width_spec" from left to
202 |     right and assigns them to $1, $2, etc. also assigns the NF variable. if
203 |     "string" is not supplied, uses $0. "width_spec" is a space separated list of
204 |     numbers that specify field widths, just like GNU awk's FIELDWIDTHS variable.
205 |     if there is data left over after the last width_spec, adds it to a final
206 |     field. returns the value for NF.
207 | 
208 |   fwidths_arr(width_spec, array [, string])
209 |     the behavior is the same as that of fwidths(), except that the values are
210 |     assigned to "array", indexed with sequential integers starting with 1.
211 |     returns the length. everything else is described in fwidths() above.
212 | 
213 |   lsplit(str, arr, sep)
214 |     splits the string "str" into array elements "arr[1]", "arr[2]", .., "arr[n]",
215 |     and returns "n". all elements of "arr" are deleted before the split is
216 |     performed. the separation is done on the literal string "sep".
217 | 
218 |   ssplit(str, arr, seps [, ere])
219 |     similar to GNU awk 4's "seps" functionality for split(). splits the string
220 |     "str" into the array "arr" and the separators array "seps" on the regular
221 |     expression "ere", and returns the number of fields. the value of "seps[i]"
222 |     is the separator that appeared in front of "arr[i+1]". if "ere" is omitted or
223 |     empty, FS is used instead. if "ere" is a single space, leading whitespace in
224 |     "str" will go into the extra array element "seps[0]" and trailing whitespace
225 |     will go into the extra array element "seps[len]", where "len" is the return
226 |     value.
227 |     note: /regex/ style quoting cannot be used for "ere".
228 | 
229 |   ends_with(string, substring)
230 |     returns 1 if "strings" ends with "substring", otherwise 0
231 | 
232 |   trim(string)
233 |     returns "string" with leading and trailing whitespace trimmed
234 | 
235 |   rev(string)
236 |     returns "string" backwards
237 | 
238 |   max(array [, how ])
239 |     returns the maximum value in "array", 0 if the array is empty, or -1 if an
240 |     error occurs. the optional string "how" controls the comparison mode.
241 |     requires the __mcompare() function.
242 |     valid values for "how" are:
243 |       "std"
244 |         use awk's standard rules for comparison. this is the default
245 |       "str"
246 |         force comparison as strings
247 |       "num"
248 |         force a numeric comparison
249 | 
250 |   maxi(array [, how ])
251 |     the behavior is the same as that of max(), except that the array indices are
252 |     used, not the array values. everything else is explained in max() above.
253 | 
254 |   min(array [, how ])
255 |     the behavior is the same as that of max(), except that the minimum value is
256 |     returned instead of the maximum. everything else is explained in max() above.
257 | 
258 |   mini(array [, how ])
259 |     the behavior is the same as that of min(), except that the array indices are
260 |     used instead of the array values. everything else is explained in min() and
261 |     max() above.
262 | 
263 | 
264 | 
265 | msort.awk
266 | 
267 |   msort(s, d [, how])
268 |     sorts the elements in the array "s" using awk's normal rules for comparing
269 |     values, creating a new sorted array "d" indexed with sequential integers
270 |     starting with 1. returns the length, or -1 if an error occurs.. leaves the
271 |     indices of the source array "s" unchanged. the optional string "how" controls
272 |     the direction and the comparison mode. uses the merge sort algorithm, with an
273 |     insertion sort when the list size gets small enough. this is not a stable
274 |     sort. requires the __compare() and __mergesort() functions.
275 |     valid values for "how" are:
276 |       "std asc"
277 |         use awk's standard rules for comparison, ascending. this is the default
278 |       "std desc"
279 |         use awk's standard rules for comparison, descending.
280 |       "str asc"
281 |         force comparison as strings, ascending.
282 |       "str desc"
283 |         force comparison as strings, descending.
284 |       "num asc"
285 |         force a numeric comparison, ascending.
286 |       "num desc"
287 |         force a numeric comparison, descending.
288 | 
289 |   imsort(s [, how])
290 |     the bevavior is the same as that of msort(), except that the array "s" is
291 |     sorted in-place. the original indices are destroyed and replaced with
292 |     sequential integers. everything else is described in msort() above.
293 | 
294 |   msorti(s, d [, how])
295 |     the behavior is the same as that of msort(), except that the array indices
296 |     are used for sorting, not the array values. when done, the new array is
297 |     indexed numerically, and the values are those of the original indices.
298 |     everything else is described in msort() above.
299 | 
300 |   imsorti(s [, how])
301 |     the bevavior is the same as that of msorti(), except that the array "s" is
302 |     sorted in-place. the original indices are destroyed and replaced with
303 |     sequential integers. everything else is described in msort() and msorti()
304 |     above.
305 | 
306 |   msortv(s, d [, how])
307 |     sorts the indices in the array "s" based on the values, creating a new
308 |     sorted array "d" indexed with sequential integers starting with 1, and the
309 |     values the indices of "s". returns the length, or -1 if an error occurs.
310 |     leaves the source array "s" unchanged. the optional string "how" controls
311 |     the direction and the comparison mode. uses the merge sort algorithm, with
312 |     an insertion sort when the list size gets small enough. this is not a stable
313 |     sort. requires the __compare() and __mergesortv() functions. valid values for
314 |     "how" are explained in the msort() function above.
315 | 
316 | 
317 | 
318 | qsort.awk
319 | 
320 |   qsort(s, d [, how])
321 |     sorts the elements in the array "s" using awk's normal rules for comparing
322 |     values, creating a new sorted array "d" indexed with sequential integers
323 |     starting with 1. returns the length, or -1 if an error occurs.. leaves the
324 |     indices of the source array "s" unchanged. the optional string "how" controls
325 |     the direction and the comparison mode. uses the quick sort algorithm, with a
326 |     random pivot to avoid worst-case behavior on already sorted arrays. this is
327 |     not a stable sort. requires the __compare() and __quicksort() functions.
328 |     valid values for "how" are:
329 |       "std asc"
330 |         use awk's standard rules for comparison, ascending. this is the default
331 |       "std desc"
332 |         use awk's standard rules for comparison, descending.
333 |       "str asc"
334 |         force comparison as strings, ascending.
335 |       "str desc"
336 |         force comparison as strings, descending.
337 |       "num asc"
338 |         force a numeric comparison, ascending.
339 |       "num desc"
340 |         force a numeric comparison, descending.
341 | 
342 |   iqsort(s [, how])
343 |     the bevavior is the same as that of qsort(), except that the array "s" is
344 |     sorted in-place. the original indices are destroyed and replaced with
345 |     sequential integers. everything else is described in qsort() above.
346 | 
347 |   qsorti(s, d [, how])
348 |     the behavior is the same as that of qsort(), except that the array indices
349 |     are used for sorting, not the array values. when done, the new array is
350 |     indexed numerically, and the values are those of the original indices.
351 |     everything else is described in qsort() above.
352 | 
353 |   iqsorti(s [, how])
354 |     the bevavior is the same as that of qsorti(), except that the array "s" is
355 |     sorted in-place. the original indices are destroyed and replaced with
356 |     sequential integers. everything else is described in qsort() and qsorti()
357 |     above.
358 | 
359 |   qsortv(s, d [, how])
360 |     sorts the indices in the array "s" based on the values, creating a new
361 |     sorted array "d" indexed with sequential integers starting with 1, and the
362 |     values the indices of "s". returns the length, or -1 if an error occurs.
363 |     leaves the source array "s" unchanged. the optional string "how" controls
364 |     the direction and the comparison mode. uses the quicksort algorithm, with a
365 |     random pivot to avoid worst-case behavior on already sorted arrays. this is
366 |     not a stable sort. requires the __compare() and __vquicksort() functions.
367 |     valid values for "how" are explained in the qsort() function above.
368 | 
369 | 
370 | 
371 | psort.awk
372 | 
373 |   psort(s, d, patts, max [, how])
374 |     sorts the values of the array "s", based on the rules below. creates a new
375 |     sorted array "d" indexed with sequential integers starting with 1. "patts"
376 |     is a compact (*non-sparse) 1-indexed array containing regular expressions.
377 |     "max" is the length of the "patts" array. returns the length of the "d"
378 |     array. valid values for "how" are explained below. uses the quicksort
379 |     algorithm, with a random pivot to avoid worst-case behavior on already sorted
380 |     arrays. requires the __pcompare() and __pquicksort() functions.
381 |      Sorting rules:
382 |      - When sorting, values matching an expression in the "patts" array will
383 |        take priority over any other values
384 |      - Each expression in the "patts" array will have priority in ascending
385 |        order by index. "patts[1]" will have priority over "patts[2]" and
386 |        "patts[3]", etc
387 |      - Values both matching the same regex will be compared as usual
388 |      - All non-matching values will be compared as usual
389 |     valid values for "how" are:
390 |       "std asc"
391 |         use awk's standard rules for comparison, ascending. this is the default
392 |       "std desc"
393 |         use awk's standard rules for comparison, descending.
394 |       "str asc"
395 |         force comparison as strings, ascending.
396 |       "str desc"
397 |         force comparison as strings, descending.
398 |       "num asc"
399 |         force a numeric comparison, ascending.
400 |       "num desc"
401 |         force a numeric comparison, descending.
402 | 
403 |   ipsort(s, patts, max [, how])
404 |     the bevavior is the same as that of psort(), except that the array "s" is
405 |     sorted in-place. the original indices are destroyed and replaced with
406 |     sequential integers. everything else is described in psort() above.
407 | 
408 |   psorti(s, d, patts, max [, how])
409 |     the behavior is the same as that of psort(), except that the array indices
410 |     are used for sorting, not the array values. when done, the new array is
411 |     indexed numerically, and the values are those of the original indices.
412 |     everything else is described in psort() above.
413 | 
414 |   ipsorti(s, patts, max [, how])
415 |     the bevavior is the same as that of psorti(), except that the array "s" is
416 |     sorted in-place. the original indices are destroyed and replaced with
417 |     sequential integers. everything else is described in psort() and psorti()
418 |     above.
419 | 
420 | 
421 | 
422 | shuf.awk
423 | 
424 |   shuf(s, d)
425 |     shuffles the array "s", creating a new shuffled array "d" indexed with
426 |     sequential integers starting with one. returns the length, or -1 if an error
427 |     occurs. leaves the indices of the source array "s" unchanged. uses the knuth-
428 |     fisher-yates algorithm. requires the __shuffle() function.
429 | 
430 |   ishuf(s)
431 |     the behavior is the same as that of shuf(), except the array "s" is sorted
432 |     in-place. the original indices are destroyed and replaced with sequential
433 |     integers. everything else is described in shuf() above.
434 | 
435 |   shufi(s, d)
436 |     the bevavior is the same as that of shuf(), except that the array indices
437 |     are shuffled, not the array values. when done, the new array is indexed
438 |     numerically, and the values are those of the original indices. everything
439 |     else is described in shuf() above.
440 | 
441 |   ishufi(s)
442 |     the behavior is tha same as that of shufi(), except that the array "s" is
443 |     sorted in-place. the original indices are destroyed and replaced with
444 |     sequential integers. everything else is describmed in shuf() and shufi()
445 |     above.
446 | 
447 | 
448 | 
449 | csv.awk
450 | 
451 |   create_line(array, max [, sep [, qualifier [, quote_type] ] ])
452 |     Generates an output line in quoted CSV format, from the contents of "array"
453 |     "array" is expected to be an indexed array (1-indexed). "max" is the highest
454 |     index to be used. "sep", if provided, is the field separator. If it is more
455 |     than one character, the first character in the string is used. By default,
456 |     it is a comma. "qualifier", if provided, is the quote character. Like "sep",
457 |     it is one character. The default value is `"'. "quote_type", if provided, is
458 |     used to determine how the output fields are quoted. Valid values are given
459 |     below. For example, the array: a[1]="foo"; a[2]="bar,quux"; a[3]="blah\"baz"
460 |     when called with create_line(a, 3), will return: "foo","bar,quux","blah""baz"
461 |     note: expects a non-sparse array. empty or unset values will become
462 |     empty fields
463 |     Valid values for "quote_type":
464 |       "t": Quote all strings, do not quote numbers. This is the default
465 |       "a": Quote all fields
466 |       "m": Only quote fields with commas or quote characters in them
467 | 
468 |   qsplit(string, array [, sep [, qualifier] ])
469 |     a version of split() designed for CSV-like data. splits "string" on "sep"
470 |     (,) if not provided, into array[1], array[2], ... array[n]. returns "n", or
471 |     "-1 * n" if the line is incomplete (it has an uneven number of quotes). both
472 |     "sep" and "qualifier" will use the first character in the provided string.
473 |     uses "qualifier" (" if not provided) and ignores "sep" within quoted fields.
474 |     doubled qualifiers are considered escaped, and a single qualifier character
475 |     is used in its place. for example, foo,"bar,baz""blah",quux will be split as
476 |     such: array[1] = "foo"; array[2] = "bar,baz\"blah"; array[3] = "quux";
477 | 
478 | 
479 | 
480 | options.awk
481 | 
482 |   getopts(optstring [, longopt_array ])
483 |     parses options, and deletes them from ARGV. "optstring" is of the form
484 |     "ab:c". each letter is a possible option. if the letter is followed by a
485 |     colon (:), then the option requires an argument. if an argument is not
486 |     provided, or an invalid option is given, getopts will print the appropriate
487 |     error message and return "?". returns each option as it's read, and -1 when
488 |     no options are left. "optind" will be set to the index of the next
489 |     non-option argument when finished.  "optarg" will be set to the option's
490 |     argument, when provided. if not provided, "optarg" will be empty. "optname"
491 |     will be set to the current option, as provided. getopts will delete each
492 |     option and argument that it successfully reads, so awk will be able to treat
493 |     whatever's left as filenames/assignments, as usual. if provided,
494 |     "longopt_array" is the name of an associative array that maps long options
495 |     to the appropriate short option. (do not include the hyphens on either).
496 |     sample usage can be found in the examples dir, with gawk extensions, or in
497 |     the ogrep script for a POSIX example: https://github.com/e36freak/ogrep
498 | 
499 | 
500 | 
501 | times.awk
502 | 
503 |   month_to_num(month)
504 |     converts human readable month to the decimal representation
505 |     returns the number, -1 if the month doesn't exist
506 | 
507 |   day_to_num(day)
508 |     converts human readable day to the decimal representation
509 |     returns the number, -1 if the day doesn't exist
510 |     like date +%w, sunday is 0
511 | 
512 |   hr_to_sec(timestamp)
513 |     converts HH:MM:SS to seconds, returns -1 if invalid format
514 | 
515 |   sec_to_hr(seconds)
516 |     converts seconds to HH:MM:SS
517 | 
518 |   ms_to_hr(milliseconds)
519 |     converts milliseconds to a "time(1)"-similar human readable format, such
520 |     as 1m4.356s
521 | 
522 |   add_day_suff(day_of_month)
523 |     prepends the appropriate suffix to "day_of_month". for example,
524 |     add_day_suff(1) will return "1st", and add_day_suff(22) will return "22nd"
525 |     returns -1 if "day_of_month" is not a positive integer
526 | 
527 | 
528 | 
529 | colors.awk
530 |   set_cols(array)
531 |     sets the following values in "array" with tput. printing them will format
532 |     any text afterwards. colors and formats are:
533 |       bold - bold text (can be combined with a color)
534 |       black - black text
535 |       red - red text
536 |       green - green text
537 |       yellow - yellow text
538 |       blue - blue text
539 |       magenta - magenta text
540 |       cyan - cyan text
541 |       white - white text
542 |       reset - resets to default settings
543 | 
544 | 
545 | You can do whatever you want with this stuff, but a thanks is always appreciated
546 | 


--------------------------------------------------------------------------------
/colors.awk:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/awk -f
 2 | 
 3 | ## usage: set_cols(array)
 4 | ##   sets the following values in "array" with tput. printing them will format
 5 | ##   any text afterwards. colors and formats are:
 6 | ##     bold - bold text (can be combined with a color)
 7 | ##     black - black text
 8 | ##     red - red text
 9 | ##     green - green text
10 | ##     yellow - yellow text
11 | ##     blue - blue text
12 | ##     magenta - magenta text
13 | ##     cyan - cyan text
14 | ##     white - white text
15 | ##     reset - resets to default settings
16 | function set_cols(array) {
17 |   # bold
18 |   cmd = "tput bold";
19 |   cmd | getline array["bold"];
20 |   close(cmd);
21 |   # black
22 |   cmd = "tput setaf 0";
23 |   cmd | getline array["black"];
24 |   close(cmd);
25 |   # red
26 |   cmd = "tput setaf 1";
27 |   cmd | getline array["red"];
28 |   close(cmd);
29 |   # green
30 |   cmd = "tput setaf 2";
31 |   cmd | getline array["green"];
32 |   close(cmd);
33 |   # yellow
34 |   cmd = "tput setaf 3";
35 |   cmd | getline array["yellow"];
36 |   close(cmd);
37 |   # blue
38 |   cmd = "tput setaf 4";
39 |   cmd | getline array["blue"];
40 |   close(cmd);
41 |   # magenta
42 |   cmd = "tput setaf 5";
43 |   cmd | getline array["magenta"];
44 |   close(cmd);
45 |   # cyan
46 |   cmd = "tput setaf 6";
47 |   cmd | getline array["cyan"];
48 |   close(cmd);
49 |   # white
50 |   cmd = "tput setaf 7";
51 |   cmd | getline array["white"];
52 |   close(cmd);
53 |   # reset
54 |   cmd = "tput sgr0";
55 |   cmd | getline array["reset"];
56 |   close(cmd);
57 | }
58 | 
59 | 
60 | 
61 | # You can do whatever you want with this stuff, but a thanks is always
62 | # appreciated
63 | 


--------------------------------------------------------------------------------
/csv.awk:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | ## usage: create_line(array, max [, sep [, qualifier [, quote_type] ] ])
  4 | ## Generates an output line in quoted CSV format, from the contents of "array"
  5 | ## "array" is expected to be an indexed array (1-indexed). "max" is the highest
  6 | ## index to be used. "sep", if provided, is the field separator. If it is more
  7 | ## than one character, the first character in the string is used. By default,
  8 | ## it is a comma. "qualifier", if provided, is the quote character. Like "sep",
  9 | ## it is one character. The default value is `"'. "quote_type", if provided, is
 10 | ## used to determine how the output fields are quoted. Valid values are given
 11 | ## below. For example, the array: a[1]="foo"; a[2]="bar,quux"; a[3]="blah\"baz"
 12 | ## when called with create_line(a, 3), will return: "foo","bar,quux","blah""baz"
 13 | ## note: expects a non-sparse array. empty or unset values will become
 14 | ## empty fields
 15 | ## Valid values for "quote_type":
 16 | ##   "t": Quote all strings, do not quote numbers. This is the default
 17 | ##   "a": Quote all fields
 18 | ##   "m": Only quote fields with commas or quote characters in them
 19 | function create_line(arr, len, sep, q, type,    i, out, c, new) {
 20 |   # set "sep" if the arg was provided, using the first char
 21 |   if (length(sep)) {
 22 |     sep = substr(sep, 1, 1);
 23 |   # default
 24 |   } else {
 25 |     sep = ",";
 26 |   }
 27 | 
 28 |   # validate "type"
 29 |   if (!length(type) || type !~ /^[tam]$/) {
 30 |     type = "t";
 31 |   }
 32 | 
 33 |   # set "q" if the arg was provided, using the first char
 34 |   if (length(q)) {
 35 |     q = substr(q, 1, 1);
 36 |   # default
 37 |   } else {
 38 |     q = "\"";
 39 |   }
 40 | 
 41 |   # empty the output string
 42 |   out = "";
 43 | 
 44 |   # iterate over the array elements
 45 |   for (i=1; i<=len; i++) {
 46 |     # determine if the output string needs to be quoted
 47 |     toquote = 0;
 48 |     if (type == "t") {
 49 |       if (arr[i] ~ /[^0-9.]/ || index(arr[i], sep) || index(arr[i], q)) {
 50 |         toquote = 1;
 51 |       }
 52 |     } else if (type == "a") {
 53 |       toquote = 1;
 54 |     } else {
 55 |       if (index(arr[i], sep) || index(arr[i], q)) {
 56 |         toquote = 1;
 57 |       }
 58 |     }
 59 | 
 60 |     # create output string
 61 |     if (toquote) {
 62 |       new = "";
 63 |       while (c = index(arr[i], q)) {
 64 |         new = new substr(arr[i], 1, c - 1) q q;
 65 |         arr[i] = substr(arr[i], c + 1);
 66 |       }
 67 |       new = new arr[i];
 68 | 
 69 |       # quote escaped string, add to output with sep
 70 |       out = (i > 1) ? out sep q new q : q new q;
 71 | 
 72 |       # no quotes needed, just add to output with sep
 73 |     } else {
 74 |       out = (i > 1) ? out sep arr[i] : arr[i];
 75 |     }
 76 |   }
 77 | 
 78 |   # return output string
 79 |   return out;
 80 | }
 81 | 
 82 | ## usage: qsplit(string, array [, sep [, qualifier] ])
 83 | ## a version of split() designed for CSV-like data. splits "string" on "sep"
 84 | ## (,) if not provided, into array[1], array[2], ... array[n]. returns "n", or
 85 | ## "-1 * n" if the line is incomplete (it has an uneven number of quotes). both
 86 | ## "sep" and "qualifier" will use the first character in the provided string.
 87 | ## uses "qualifier" (" if not provided) and ignores "sep" within quoted fields.
 88 | ## doubled qualifiers are considered escaped, and a single qualifier character
 89 | ## is used in its place. for example, foo,"bar,baz""blah",quux will be split as
 90 | ## such: array[1] = "foo"; array[2] = "bar,baz\"blah"; array[3] = "quux";
 91 | function qsplit(str, arr, sep, q,    a, len, cur, isin, c) {
 92 |   delete arr;
 93 | 
 94 |   # set "sep" if the argument was provided, using the first char
 95 |   if (length(sep)) {
 96 |     sep = substr(sep, 1, 1);
 97 |   # otherwise, use ","
 98 |   } else {
 99 |     sep = ",";
100 |   }
101 | 
102 |   # set "q" if the argument was provided, using the first char
103 |   if (length(q)) {
104 |     q = substr(q, 1, 1);
105 |   # otherwise, use '"'
106 |   } else {
107 |     q = "\"";
108 |   }
109 | 
110 |   # split the string into the temporary array "a", one element per char
111 |   len = split(str, a, "");
112 | 
113 |   # "cur" contains the current element of 'arr' the function is assigning to
114 |   cur = 1;
115 |   # boolean, whether or not the iterator is in a quoted string
116 |   isin = 0;
117 |   # iterate over each character
118 |   for (c=1; c<=len; c++) {
119 |     # if the current char is a quote...
120 |     if (a[c] == q) {
121 |       # if the next char is a quote, and the previous character is not a
122 |       # delimiter, it's an escaped literal quote (allows empty fields 
123 |       # that are quoted, such as "foo","","bar")
124 |       if (a[c+1] == q && a[c-1] != sep) {
125 |         arr[cur] = arr[cur] a[c];
126 |         c++;
127 | 
128 |       # otherwise, it's a qualifier. switch boolean
129 |       } else {
130 |         isin = ! isin;
131 |       }
132 | 
133 |     # if the current char is the separator, and we're not within quotes
134 |     } else if (a[c] == sep && !isin) {
135 |       # increment array element
136 |       cur++;
137 | 
138 |     # otherwise, just append to the current element
139 |     } else {
140 |       arr[cur] = arr[cur] a[c];
141 |     }
142 |   }
143 | 
144 |   # return length
145 |   return cur * (isin ? -1 : 1);
146 | }
147 | 
148 | 
149 | 
150 | # You can do whatever you want with this stuff, but a thanks is always
151 | # appreciated
152 | 


--------------------------------------------------------------------------------
/examples/cfold:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see
  4 | # the man page (since the path is relative, this assumes the lib dir is in
  5 | # AWKPATH or the same dir)
  6 | @include "strings.awk";
  7 | @include "options.awk";
  8 | 
  9 | 
 10 | # prints usage
 11 | function usage() {
 12 |   printf("%s\n\n%s\n\n%s\n%s\n%s\n%s\n\n",
 13 | "cfold -- [OPTIONS] [FILE...]",
 14 | "the '--' is required, so AWK itself doesn't read the options",
 15 | "Wraps input lines in each FILE (standard input if not provided), writing to",
 16 | "standard output. The default width is that of the terminal, or 80 columns if",
 17 | "standard output is not a terminal. If FILE is '-', also reads the standard",
 18 | "input") > "/dev/stderr";
 19 |   printf("%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
 20 | " Options:",
 21 | "  -c, --center         center each line on the terminal. assumes a width of 80",
 22 | "                       columns if standard output is not a terminal",
 23 | "  -b, --break[=LIST]   break lines at spaces. LIST, if provided, is a list of",
 24 | "                       characters to break lines at instead of spaces. For",
 25 | "                       example, --break=abc will break lines on \"a\" or \"b\"",
 26 | "                       Note: an empty string for LIST will revert to the",
 27 | "                       default behavior, it is not the same as omitting -b",
 28 | "  -i, --ignore-breaks  convert existing single line breaks to spaces. multiple",
 29 | "                       line breaks like those at the end of paragraphs will be",
 30 | "                       truncated to a single empty line",
 31 | "  -w, --width WIDTH    use WIDTH columns instead of the terminal's width",
 32 | "  -t, --trim           trim leading and trailing whitespace from each line",
 33 | "                       prior to folding",
 34 | "  -h, --help           display this help and exit") > "/dev/stderr";
 35 | }
 36 | 
 37 | BEGIN {
 38 |   # initialize variables to defaults
 39 |   toexit = err = 0;
 40 |   tocenter = toignore = totrim = 0;
 41 |   break_chars = "";
 42 | 
 43 |   # get default width
 44 |   if (system("test -t 1")) {
 45 |     # stdout is not open on a tty
 46 |     width = 80
 47 |   } else {
 48 |     cmd = "tput cols";
 49 |     cmd | getline width;
 50 |     close(cmd);
 51 |   }
 52 | 
 53 |   # map long options to short options
 54 |   longopts["center"]        = "c";
 55 |   longopts["break"]         = "b";
 56 |   longopts["ignore-breaks"] = "i";
 57 |   longopts["width"]         = "w";
 58 |   longopts["trim"]          = "t";
 59 |   longopts["help"]          = "h";
 60 | 
 61 |   # parse the options
 62 |   while ((opt = getopts("cbiw:th", longopts)) != -1) {
 63 |     switch(opt) {
 64 |       # -c, --center
 65 |       case "c":
 66 |         tocenter = 1; break;
 67 | 
 68 |       # -b, --break
 69 |       case "b":
 70 |         if (length(optarg)) {
 71 |           break_chars = optarg;
 72 |         } else {
 73 |           break_chars = " \t\n";
 74 |         }
 75 |         break;
 76 | 
 77 |       # -i, --ignore-breaks
 78 |       case "i":
 79 |         toignore = 1; break;
 80 | 
 81 |       # w, --width
 82 |       case "w":
 83 |         # make sure optarg is an integer
 84 |         if (optarg !~ /^[0-9]+$/) {
 85 |           printf("'%s' is not a valid argument for '%s', must be a number",
 86 |                  optarg, optname) > "/dev/stderr";
 87 |           err = toexit = 1;
 88 |           exit;
 89 |         }
 90 |         width = optarg;
 91 |         break;
 92 | 
 93 |       # -t, --trim
 94 |       case "t":
 95 |         totrim = 1; break;
 96 | 
 97 |       # -h, --help
 98 |       case "h":
 99 |         usage(); toexit = 1; exit;
100 | 
101 |       # error
102 |       case "?":
103 |       default:
104 |         err = toexit = 1;
105 |         exit;
106 |     }
107 |   }
108 | 
109 |   # if --ignore-breaks was used, set RS to null so that paragraphs are
110 |   # treated as one line
111 |   if (toignore) {
112 |     RS = "";
113 |   }
114 | }
115 | 
116 | ########
117 | 
118 | # if --ignore-breaks was used, print extra newline between records
119 | toignore && NR > 1 {
120 |   print "";
121 | }
122 | 
123 | # fold each record (line, or paragraph)
124 | {
125 | 
126 |   # if --trim was used, reassign $0 with leading/trailing whitespace removed
127 |   if (totrim) {
128 |     $0 = trim($0);
129 |   }
130 | 
131 |   out = fold($0, break_chars, width);
132 | 
133 |   # if text is to be centered, split out into an array of lines and center each
134 |   if (tocenter) {
135 |     len = split(out, lines, /\n/);
136 | 
137 |     for (i=1; i<=len; i++) {
138 |       print center(lines[i]);
139 |     }
140 |   } else {
141 |     print out;
142 |   }
143 | }
144 | 
145 | END {
146 |   exit err;
147 | }
148 | 
149 | 
150 | 
151 | # You can do whatever you want with this stuff, but a thanks is always
152 | # appreciated
153 | 


--------------------------------------------------------------------------------
/examples/colors:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/awk -f
 2 | 
 3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see
 4 | # the man page (since the path is relative, this assumes the lib dir is in
 5 | # AWKPATH or the same dir)
 6 | @include "colors.awk";
 7 | 
 8 | # usage: set_cols(array)
 9 | #   sets the following values in "array" with tput. printing them will format
10 | #   any text afterwards. colors and formats are:
11 | #     bold - bold text (can be combined with a color)
12 | #     black - black text
13 | #     red - red text
14 | #     green - green text
15 | #     yellow - yellow text
16 | #     blue - blue text
17 | #     magenta - magenta text
18 | #     cyan - cyan text
19 | #     white - white text
20 | #     reset - resets to default settings
21 | BEGIN {
22 |   # set colors
23 |   set_cols(colors);
24 | 
25 |   # print colored text
26 |   print colors["red"] "red\t", colors["bold"] "bold red" colors["reset"];
27 |   print colors["black"] "black\t", colors["bold"] "bold black" colors["reset"];
28 |   print colors["green"] "green\t", colors["bold"] "bold green" colors["reset"];
29 |   print colors["yellow"] "yellow\t", colors["bold"] "bold yellow" colors["reset"];
30 |   print colors["blue"] "blue\t", colors["bold"] "bold blue" colors["reset"];
31 |   print colors["magenta"] "magenta\t", colors["bold"] "bold magenta" colors["reset"];
32 |   print colors["cyan"] "cyan\t", colors["bold"] "bold cyan" colors["reset"];
33 |   print colors["white"] "white\t", colors["bold"] "bold white" colors["reset"];
34 | }
35 | 


--------------------------------------------------------------------------------
/examples/csv:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/awk -f
 2 | 
 3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see
 4 | # the man page (since the path is relative, this assumes the lib dir is in
 5 | # AWKPATH or the same dir)
 6 | @include "csv.awk";
 7 | 
 8 | # usage: create_line(array, max [, sep [, qualifier [, quote_type] ] ])
 9 | # Generates an output line in quoted CSV format, from the contents of "array"
10 | # "array" is expected to be an indexed array (1-indexed). "max" is the highest
11 | # index to be used. "sep", if provided, is the field separator. If it is more
12 | # than one character, the first character in the string is used. By default,
13 | # it is a comma. "qualifier", if provided, is the quote character. Like "sep",
14 | # it is one character. The default value is `"'. "quote_type", if provided, is
15 | # used to determine how the output fields are quoted. Valid values are given
16 | # below. For example, the array: a[1]="foo"; a[2]="bar,quux"; a[3]="blah\"baz"
17 | # when called with create_line(a, 3), will return: "foo","bar,quux","blah""baz"
18 | # note: expects a non-sparse array. empty or unset values will become
19 | # empty fields
20 | # Valid values for "quote_type":
21 | #   "t": Quote all strings, do not quote numbers. This is the default
22 | #   "a": Quote all fields
23 | #   "m": Only quote fields with commas or quote characters in them
24 | BEGIN {
25 |   print "create_line:";
26 |   print "";
27 | 
28 |   # populate array
29 |   a[1] = "foo"; a[2] = "with,comma"; a[3] = ""; a[4] = "with\"quote";
30 | 
31 |   # print array in CSV format
32 |   print create_line(a, 4);
33 | 
34 |   print "";
35 |   print "";
36 | }
37 | 
38 | # usage: qsplit(string, array [, sep [, qualifier] ])
39 | # a version of split() designed for CSV-like data. splits "string" on "sep"
40 | # (,) if not provided, into array[1], array[2], ... array[n]. returns "n", or
41 | # "-1 * n" if the line is incomplete (it has an uneven number of quotes). both
42 | # "sep" and "qualifier" will use the first character in the provided string.
43 | # uses "qualifier" (" if not provided) and ignores "sep" within quoted fields.
44 | # doubled qualifiers are considered escaped, and a single qualifier character
45 | # is used in its place. for example, foo,"bar,baz""blah",quux will be split as
46 | # such: array[1] = "foo"; array[2] = "bar,baz\"blah"; array[3] = "quux";
47 | BEGIN {
48 |   print "qsplit:";
49 |   print "";
50 | 
51 |   # populate initial string
52 |   str = "\"foo\",\"with,comma\",\"\",\"with\"\"quote\"";
53 | 
54 |   print "initial string: ";
55 |   print str;
56 |   print "";
57 | 
58 |   # split string into array
59 |   len = qsplit(str, b);
60 |   # get the absolute value of the length (could also be done with abs() in
61 |   # math.awk)
62 |   len = len < 0 ? -len : len;
63 | 
64 |   print "one element per line:"
65 | 
66 |   # print one element per line
67 |   for (i=1; i<=len; i++) {
68 |     print b[i];
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/examples/math:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see
  4 | # the man page (since the path is relative, this assumes the lib dir is in
  5 | # AWKPATH or the same dir)
  6 | @include "math.awk";
  7 | 
  8 | # usage: abs(number)
  9 | # returns the absolute value of "number"
 10 | BEGIN {
 11 |   print abs(-2.3);
 12 |   print abs(0);
 13 |   print abs(2.3);
 14 | }
 15 | 
 16 | # usage: ceil(number)
 17 | # returns "number" rounded UP to the nearest int
 18 | BEGIN {
 19 |   print ceil(2.3);
 20 |   print ceil(-2.3);
 21 | }
 22 | 
 23 | # usage: ceiling(multiple, number)
 24 | # returns "number" rounded UP to the nearest multiple of "multiple"
 25 | BEGIN {
 26 |   # nearest multiple of 5 above 6 is 10
 27 |   print ceiling(5, 6);
 28 | }
 29 | 
 30 | # usage: floor(multiple, number)
 31 | # returns "number" rounded DOWN to the nearest multiple of "multiple"
 32 | BEGIN {
 33 |   # nearest multiple of 5 below 9 is 5
 34 |   print floor(5, 9);
 35 | }
 36 | 
 37 | # usage: round(multiple, number)
 38 | # returns "number" rounded to the nearest multiple of "multiple"
 39 | BEGIN {
 40 |   # nearest multiple of 5 to 8 is 10
 41 |   print round(5, 8);
 42 | }
 43 | 
 44 | # usage: rint(number)
 45 | # returns "number" rounded to the nearest integer
 46 | BEGIN {
 47 |   # round to the nearest int
 48 |   print rint(1.3), rint(1.5), rint(2.8)
 49 | }
 50 | 
 51 | # usage: change_base(number, start_base, end_base)
 52 | # converts "number" from "start_base" to "end_base"
 53 | # bases must be between 2 and 64. the digits greater than 9 are represented
 54 | # by the lowercase letters, the uppercase letters, @, and _, in that order.
 55 | # if ibase is less than or equal to 36, lowercase and uppercase letters may
 56 | # be used interchangeably to represent numbers between 10 and 35.
 57 | # returns 0 if any argument is invalid
 58 | BEGIN {
 59 |   # convert '3' to binary:
 60 |   print change_base(3, 10, 2);
 61 | 
 62 |   # convert '111' in binary to decimal:
 63 |   print change_base(111, 2, 10);
 64 | 
 65 |   # convert 111 in binary to hex:
 66 |   print change_base(111, 2, 16);
 67 | 
 68 |   # convert 1f in hex to decimal:
 69 |   print change_base("1f", 16, 10);
 70 | }
 71 | 
 72 | # usage: format_num(number)
 73 | # adds commas to "number" to make it more readable. for example,
 74 | # format_num(1000) will return "1,000", and format_num(123456.7891) will
 75 | # return "123,456.7891". also trims leading zeroes
 76 | # returns 0 if "number" is not a valid number
 77 | BEGIN {
 78 |   num = "1000";
 79 |   printf("unformatted: %s\nformatted: %s\n\n", num, format_num(num));
 80 | 
 81 |   num = "123456.7891";
 82 |   printf("unformatted: %s\nformatted: %s\n\n", num, format_num(num));
 83 | }
 84 | 
 85 | # usage: str_to_num(string)
 86 | # examines "string", and returns its numeric value. if "string" begins with a
 87 | # leading 0, assumes that "string" is an octal number. if "string" begins with
 88 | # a leading "0x" or "0X", assumes that "string" is a hexadecimal number.
 89 | # otherwise, decimal is assumed.
 90 | BEGIN {
 91 |   num = "12";
 92 |   printf("str_to_num(\"%s\") == %s\n", num, str_to_num(num));
 93 |   num = "012";
 94 |   printf("str_to_num(\"%s\") == %s\n", num, str_to_num(num));
 95 |   num = "0x12";
 96 |   printf("str_to_num(\"%s\") == %s\n", num, str_to_num(num));
 97 | 
 98 |   print "";
 99 | }
100 | 
101 | # usage: isint(string)
102 | # returns 1 if "string" is a valid integer, otherwise 0
103 | BEGIN {
104 |   var = "3";
105 |   if (isint(var)) {
106 |     print var " is a valid integer";
107 |   } else {
108 |     print var " is not a valid integer";
109 |   }
110 | 
111 | 
112 |   var = "1.34";
113 |   if (isint(var)) {
114 |     print var " is a valid integer";
115 |   } else {
116 |     print var " is not a valid integer";
117 |   }
118 | 
119 |   var = "foo";
120 |   if (isint(var)) {
121 |     print var " is a valid integer";
122 |   } else {
123 |     print var " is not a valid integer";
124 |   }
125 | }
126 | 
127 | # usage: isnum(string)
128 | # returns 1 if "string" is a valid number, otherwise 0
129 | BEGIN {
130 |   var = "3";
131 |   if (isnum(var)) {
132 |     print var " is a valid number";
133 |   } else {
134 |     print var " is not a valid number";
135 |   }
136 | 
137 | 
138 |   var = "1.34";
139 |   if (isnum(var)) {
140 |     print var " is a valid number";
141 |   } else {
142 |     print var " is not a valid number";
143 |   }
144 | 
145 |   var = "foo";
146 |   if (isnum(var)) {
147 |     print var " is a valid number";
148 |   } else {
149 |     print var " is not a valid number";
150 |   }
151 | }
152 | 
153 | # usage: isprime(number)
154 | # returns 1 if "number" is a prime number, otherwise 0. "number" must be a
155 | # positive integer
156 | BEGIN {
157 |   print "primes from 1 through 10:";
158 |   for (i=1; i<=10; i++) {
159 |     if (isprime(i)) {
160 |       printf("%s ", $i);
161 |     }
162 |   }
163 |   print "";
164 | }
165 | 
166 | # usage: gcd(a, b)
167 | # returns the greatest common denominator (greatest common factor) of a and b.
168 | # both a and b must be positive integers. uses the recursive euclid algorithm.
169 | BEGIN {
170 |   print "the greatest common factor of 3 and 6 is", gcd(3, 6);
171 | }
172 | 
173 | # usage: lcm(a, b)
174 | # returns the least common multiple of a and b. both a and b must be positive
175 | # integers.
176 | BEGIN {
177 |   print "the least common multiple of 4 and 16 is", lcm(4, 16);
178 | }
179 | 
180 | # usage: calc_e()
181 | # approximates e by calculating the sumation from k=0 to k=50 of 1/k!
182 | # returns 10 decimal places
183 | BEGIN {
184 |   # prints e
185 |   print "e is approximately " calc_e();
186 | }
187 | 
188 | # usage: calc_pi()
189 | # returns pi, with an accuracy of 10 decimal places
190 | BEGIN {
191 |   # prints pi
192 |   print "pi is approximately " calc_pi();
193 | }
194 | 
195 | # usage: calc_tau()
196 | # returns tau, with an accuracy of 10 decimal places
197 | # http://tauday.com/tau-manifesto
198 | BEGIN {
199 |   # prints tau
200 |   print "pi is wrong! tau is approximately " calc_tau();
201 | }
202 | 
203 | # usage: deg_to_rad(degrees)
204 | # converts degrees to radians
205 | BEGIN {
206 |   # convert 90 degrees to radians
207 |   print "90 degrees is " deg_to_rad(90) " radians";
208 | }
209 | 
210 | # usage: rad_to_deg(radians)
211 | # converts radians to degrees
212 | BEGIN {
213 |   # convert pi radians to degrees
214 |   print "pi radians is " rad_to_deg(calc_pi()) " degrees";
215 | }
216 | 
217 | # usage: tan(expr)
218 | # returns the tangent of expr, which is in radians
219 | BEGIN {
220 |   # print the tangent of pi radians
221 |   print "the tan of pi radians is " tan(calc_pi());
222 | }
223 | 
224 | # usage: csc(expr)
225 | # returns the cosecant of expr, which is in radians
226 | BEGIN {
227 |   # print the cosecant of pi radians
228 |   print "the csc of pi radians is " csc(calc_pi());
229 | }
230 | 
231 | # usage: sec(expr)
232 | # returns the secant of expr, which is in radians
233 | BEGIN {
234 |   # print the secant of pi radians
235 |   print "the sec of pi radians is " sec(calc_pi());
236 | }
237 | 
238 | # usage: cot(expr)
239 | # returns the cotangent of expr, which is in radians
240 | BEGIN {
241 |  # print the cotangent of pi radians
242 |   print "the cot of pi radians is " cot(calc_pi());
243 | }
244 | 


--------------------------------------------------------------------------------
/examples/options:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/gawk -f
  2 | 
  3 | # example usage of getopts() from http://github.com/e36freak/awk-libs
  4 | 
  5 | # include the lib with getopts. the path to the lib is not needed here if the
  6 | # directory is in AWKPATH, on gawk (this examples assumes it is, or that the
  7 | # lib is in the current working dir). otherwise, use the path to the lib, or
  8 | # copy and paste the whole function here.
  9 | @include "options.awk";
 10 | 
 11 | # prints usage information
 12 | # to see this, make sure you use ./script -- -h. without the '--', awk will
 13 | # treat the -h as an argument to awk itself, not the script
 14 | function usage() {
 15 |   printf("%s\n\n%s\n\n%s\n%s\n\n",
 16 | "example usage of getopts() from http://github.com/e36freak/awk-libs",
 17 | "awk_getopts -- [OPTIONS] [FILE(s)]",
 18 | "the \"--\" is required so that options are parsed by the script, and not",
 19 | "awk itself") > "/dev/stderr";
 20 | 
 21 |   printf("%s\n%s\n%s\n%s\n%s\n%s\n%s\n\n%s\n%s\n%s\n%s\n%s\n",
 22 | " Options:",
 23 | "  -h, --help         Display this help and exit",
 24 | "  -a, --arg ARG      Option that requires an argument. prints the arg",
 25 | "  -b, --blah         Option that doesn't require an arg. prints \"hello world\"",
 26 | "  -i, --info         Displays optind and getopts' return value for each",
 27 | "                     iteration from when the option is used, onward. This is to",
 28 | "                     help give some insight into how the function works",
 29 | "  -p, --print[=ARG]  This is an example of an OPTIONAL argument. if ARG is",
 30 | "                     provided (and non-empty), it will be printed. Otherwise,",
 31 | "                     the string \"no arg\" will be printed.",
 32 | "This program then prints the name of each file passed, and the total number",
 33 | "of files at the end") > "/dev/stderr";
 34 | }
 35 | 
 36 | # we do the option parsing in the BEGIN block, of course
 37 | BEGIN {
 38 |   # i'm using the associative array 'longopts' to map long options to the
 39 |   # appropriate short option. you could use any name you choose, or none at
 40 |   # all if you only want to use short options.
 41 |   longopts["help"] = "h";   # maps --help to -h
 42 |   longopts["arg"] = "a";    # --arg to -a
 43 |   longopts["blah"] = "b";   # --blah to -b
 44 |   longopts["info"] = "i";   # --info to -i
 45 |   longopts["print"] = "p";  # and --print to -p
 46 | 
 47 |   # now we do the actual option parsing
 48 |   # since -a requires an argument, it must be followed by a ':' in "optstring"
 49 |   # we pass 'longopts' as the second argument. Note that no ':' is used for 'p'
 50 |   # because the argument is OPTIONAL, not required
 51 |   while ((opt = getopts("ha:bip", longopts)) != -1) {
 52 |     # i'm using gawk's switch() to handle the various args. you could use
 53 |     # if/else if you wanted (see ogrep on the same github for an example), but
 54 |     # this is the cleanest and easiest when portability is not an issue, and is
 55 |     # the most common structure used in other languages (C, or bash's case)
 56 |     switch(opt) {
 57 |       # -h, or --help. both will return "h"
 58 |       case "h":
 59 |         usage();
 60 |         toexit = 1;
 61 |         exit;
 62 | 
 63 |       # same with --arg or -a
 64 |       case "a":
 65 |         # this one requires an arg, so 'optarg' will be set to its argument
 66 |         # this version of getopts handles -aARG, or --arg=ARG, or --arg ARG,
 67 |         # and of course -a ARG.
 68 |         print optarg;
 69 |         break;
 70 |       
 71 |       # and --blah, -b
 72 |       case "b":
 73 |         print "hello world";
 74 |         break;
 75 | 
 76 |       # --info, -i
 77 |       case "i":
 78 |         info = 1;
 79 |         break;
 80 | 
 81 |       # --print, -p
 82 |       case "p":
 83 |         # if ARG was provided, 'optarg' will be non-empty
 84 |         if (length(optarg)) {
 85 |           print optarg;
 86 |         } else {
 87 |           print "no arg";
 88 |         }
 89 |         break;
 90 | 
 91 |       # getopts will return "?" on error
 92 |       case "?":
 93 |       default:
 94 |         err = toexit = 1;
 95 |         exit;
 96 |     }
 97 | 
 98 |     # if info is on...
 99 |     if (info) {
100 |       printf("optind is now: %d, and opt (getopts' return value) is now: %s\n",
101 |              optind, opt) > "/dev/stderr";
102 |     }
103 |   }
104 | 
105 |   # again, if info is on
106 |   if (info) {
107 |     print "finished processing arguments" > "/dev/stderr";
108 | 
109 |     # in case you want to use it, 'optind' will now be set to the correct index
110 |     # for the first non-option argument in ARGV. the previous options WILL be
111 |     # deleted from ARGV at this point.
112 |     printf("optind is now: %d, ", optind) > "/dev/stderr";
113 | 
114 |     # you'll see opt is -1 now, since there are no options left to process
115 |     printf("and opt is now: %s\n", opt) > "/dev/stderr";
116 |   }
117 | }
118 | 
119 | # just an example block that reads from the file(s) given
120 | {
121 |   print FILENAME;
122 |   files++;
123 | 
124 |   nextfile;
125 | }
126 | 
127 | # END block. prints the number of files read. here to show that another exit
128 | # call is needed, because awk will run the END block after 'exit' is called
129 | # unless you exit again within the block. (toexit is used so -h|--help doesn't
130 | # cause the script to exit >0)
131 | END {
132 |   if (toexit) {
133 |     exit err;
134 |   }
135 | 
136 |   print files;
137 | }
138 | 


--------------------------------------------------------------------------------
/examples/sort:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see
  4 | # the man page (since the path is relative, this assumes the lib dir is in
  5 | # AWKPATH or the same dir)
  6 | @include "sort.awk";
  7 | 
  8 | # usage: qsort(s, d [, how])
  9 | # sorts the elements in the array "s" using awk's normal rules for comparing
 10 | # values, creating a new sorted array "d" indexed with sequential integers
 11 | # starting with 1. returns the length, or -1 if an error occurs.. leaves the
 12 | # indices of the source array "s" unchanged. the optional string "how" controls
 13 | # the direction and the comparison mode. uses the quick sort algorithm, with a
 14 | # random pivot to avoid worst-case behavior on already sorted arrays. requires
 15 | # the __compare() and __quicksort() functions.
 16 | # valid values for "how" are:
 17 | #   "std asc"
 18 | #     use awk's standard rules for comparison, ascending. this is the default
 19 | #   "std desc"
 20 | #     use awk's standard rules for comparison, descending.
 21 | #   "str asc"
 22 | #     force comparison as strings, ascending.
 23 | #   "str desc"
 24 | #     force comparison as strings, descending.
 25 | #   "num asc"
 26 | #     force a numeric comparison, ascending.
 27 | #   "num desc"
 28 | #     force a numeric comparison, descending.
 29 | BEGIN {
 30 |   # populate array
 31 |   for (i=10; i>0; i--) {
 32 |     a[i] = i;
 33 |   }
 34 | 
 35 |   # sort, numerically ascending
 36 |   len = qsort(a, b, "num asc");
 37 | 
 38 |   # dump
 39 |   for (i=1; i<=len; i++) {
 40 |     print b[i];
 41 |   }
 42 | 
 43 |   print "";
 44 | }
 45 | 
 46 | # usage: iqsort(s [, how])
 47 | # the bevavior is the same as that of qsort(), except that the array "s" is
 48 | # sorted in-place. the original indices are destroyed and replaced with
 49 | # sequential integers. everything else is described in qsort() above.
 50 | BEGIN {
 51 |   # populate array
 52 |   for (i=10; i>0; i--) {
 53 |     a[i] = i;
 54 |   }
 55 | 
 56 |   # sort in place, numerically ascending
 57 |   len = iqsort(a, "num asc");
 58 | 
 59 |   # dump
 60 |   for (i=1; i<=len; i++) {
 61 |     print a[i];
 62 |   }
 63 | 
 64 |   print "";
 65 | }
 66 | 
 67 | # usage: qsorti(s, d [, how])
 68 | # the behavior is the same as that of qsort(), except that the array indices
 69 | # are used for sorting, not the array values. when done, the new array is
 70 | # indexed numerically, and the values are those of the original indices.
 71 | # everything else is described in qsort() above.
 72 | BEGIN { 
 73 |   # populate array
 74 |   for (i=10; i>0; i--) {
 75 |     a[i];
 76 |   }
 77 | 
 78 |   # sort indices, numerically ascending
 79 |   len = qsorti(a, b, "num asc");
 80 | 
 81 |   # dump
 82 |   for (i=1; i<=len; i++) {
 83 |     print b[i];
 84 |   }
 85 | 
 86 |   print "";
 87 | }
 88 | 
 89 | # usage: iqsorti(s [, how])
 90 | # the bevavior is the same as that of qsorti(), except that the array "s" is
 91 | # sorted in-place. the original indices are destroyed and replaced with
 92 | # sequential integers. everything else is described in qsort() and qsorti()
 93 | # above.
 94 | BEGIN {
 95 |   # populate array
 96 |   for (i=10; i>0; i--) {
 97 |     a[i];
 98 |   }
 99 | 
100 |   # sort indices in place, numerically ascending
101 |   len = iqsorti(a, "num asc");
102 | 
103 |   # dump
104 |   for (i=1; i<=len; i++) {
105 |     print a[i];
106 |   }
107 | 
108 |   print "";
109 | }
110 | 
111 | # usage: qsortv(s, d [, how])
112 | # sorts the indices in the array "s" based on the values, creating a new
113 | # sorted array "d" indexed with sequential integers starting with 1, and the
114 | # values the indices of "s". returns the length, or -1 if an error occurs.
115 | # leaves the source array "s" unchanged. the optional string "how" controls
116 | # the direction and the comparison mode. uses the quicksort algorithm, with a
117 | # random pivot to avoid worst-case behavior on already sorted arrays. requires
118 | # the __compare() and __vquicksort() functions. valid values for "how" are
119 | # explained in the qsort() function above.
120 | BEGIN {
121 |   # populate array
122 |   j=10
123 |   for (i=1; i<=10; i++) {
124 |     a[i] = j--;
125 |   }
126 | 
127 |   # sort indices based on numeric values
128 |   len = qsortv(a, b, "num asc");
129 | 
130 |   # dump
131 |   for (i=1; i<=len; i++) {
132 |     print b[i], a[b[i]];
133 |   }
134 | 
135 |   print "";
136 | }
137 | 
138 | 
139 | 
140 | # usage: shuf(s, d)
141 | # shuffles the array "s", creating a new shuffled array "d" indexed with
142 | # sequential integers starting with one. returns the length, or -1 if an error
143 | # occurs. leaves the indices of the source array "s" unchanged. uses the knuth-
144 | # fisher-yates algorithm. requires the __shuffle() function.
145 | BEGIN {
146 |   # populate array
147 |   for (i=1; i<=10; i--) {
148 |     a[i] = i;
149 |   }
150 | 
151 |   # shuffle
152 |   len = shuf(a, b);
153 | 
154 |   # dump
155 |   for (i=1; i<=len; i++) {
156 |     print b[i];
157 |   }
158 | 
159 |   print "";
160 | }
161 | 
162 | # usage: ishuf(s)
163 | # the behavior is the same as that of shuf(), except the array "s" is sorted
164 | # in-place. the original indices are destroyed and replaced with sequential
165 | # integers. everything else is described in shuf() above.
166 | BEGIN {
167 |   # populate array
168 |   for (i=1; i<=10; i--) {
169 |     a[i] = i;
170 |   }
171 | 
172 |   # shuffle in place
173 |   len = ishuf(a);
174 | 
175 |   # dump
176 |   for (i=1; i<=len; i++) {
177 |     print a[i];
178 |   }
179 | 
180 |   print "";
181 | }
182 | 
183 | # usage: shufi(s, d)
184 | # the bevavior is the same as that of shuf(), except that the array indices
185 | # are shuffled, not the array values. when done, the new array is indexed
186 | # numerically, and the values are those of the original indices. everything
187 | # else is described in shuf() above.
188 | BEGIN {
189 |   # populate array
190 |   for (i=1; i<=10; i--) {
191 |     a[i] = i;
192 |   }
193 | 
194 |   # shuffle indices
195 |   len = shufi(a, b);
196 | 
197 |   # dump
198 |   for (i=1; i<=len; i++) {
199 |     print b[i];
200 |   }
201 | 
202 |   print "";
203 | }
204 | 
205 | # usage: ishufi(s)
206 | # the behavior is tha same as that of shufi(), except that the array "s" is
207 | # sorted in-place. the original indices are destroyed and replaced with
208 | # sequential integers. everything else is describmed in shuf() and shufi()
209 | # above.
210 | BEGIN {
211 |   # populate array
212 |   for (i=1; i<=10; i--) {
213 |     a[i] = i;
214 |   }
215 | 
216 |   # shuffle indices in place
217 |   len = ishufi(a);
218 | 
219 |   # dump
220 |   for (i=1; i<=len; i++) {
221 |     print a[i];
222 |   }
223 | 
224 |   print "";
225 | }
226 | 


--------------------------------------------------------------------------------
/examples/strings:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see
  4 | # the man page (since the path is relative, this assumes the lib dir is in
  5 | # AWKPATH or the same dir)
  6 | @include "strings.awk";
  7 | 
  8 | # usage: center(string [, width])
  9 | # returns "string" centered based on "width". if "width" is not provided (or 
 10 | # is 0), uses the width of the terminal, or 80 if standard output is not open
 11 | # on a terminal.
 12 | # note: does not check the length of the string. if it's wider than the
 13 | # terminal, it will not center lines other than the first. for best results,
 14 | # combine with fold().
 15 | BEGIN {
 16 |   print center("this string is centered");
 17 | 
 18 |   print "";
 19 | }
 20 | 
 21 | # usage: fold(string, sep [, width])
 22 | # returns "string", wrapped, with lines broken on "sep" to "width" columns.
 23 | # "sep" is a list of characters to break at, similar to IFS in a POSIX shell.
 24 | # if "sep" is empty, wraps at exactly "width" characters. if "width" is not
 25 | # provided (or is 0), uses the width of the terminal, or 80 if standard output
 26 | # is not open on a terminal.
 27 | # note: currently, tabs are squeezed to a single space. this will be fixed
 28 | BEGIN {
 29 |   # folds the alphabet on vowels, to 12 and 15 characters
 30 |   alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
 31 | 
 32 |   print "alphabet, folded to 12 characters on vowels:";
 33 |   print "";
 34 |   print fold(alphabet, "AEIOU", 12);
 35 |   print "";
 36 | 
 37 |   print "alphabet, folded to 15 characters on vowels:";
 38 |   print "";
 39 |   print fold(alphabet, "AEIOU", 15);
 40 |   print "";
 41 | }
 42 | 
 43 | # usage: ssub(ere, repl [, in])
 44 | # behaves like sub, except returns the result and doesn't modify the original
 45 | BEGIN {
 46 |   string = "this is some string";
 47 | 
 48 |   print "ssub: ";
 49 |   print "";
 50 | 
 51 |   print string;
 52 |   print ssub("str.*", "replaced &", string);
 53 |   print string;
 54 | 
 55 |   print "";
 56 | }
 57 | 
 58 | # usage: sgsub(ere, repl [, in])
 59 | # behaves like gsub, except returns the result and doesn't modify the original
 60 | BEGIN {
 61 |   string = "this is some search string search";
 62 | 
 63 |   print "sgsub: ";
 64 |   print "";
 65 | 
 66 |   print string;
 67 |   print sgsub("search", "replace", string);
 68 |   print string;
 69 | 
 70 |   print "";
 71 | }
 72 | 
 73 | # usage: lsub(str, repl [, in])
 74 | # substites the string "repl" in place of the first instance of "str" in the
 75 | # string "in" and returns the result. does not modify the original string.
 76 | # if "in" is not provided, uses $0.
 77 | BEGIN {
 78 |   string = "string with special .* characters";
 79 | 
 80 |   print "lsub: ";
 81 |   print "";
 82 | 
 83 |   print string;
 84 |   print lsub(".*", "literal", string);
 85 |   print string;
 86 | 
 87 |   print "";
 88 | }
 89 | 
 90 | # usage: glsub(str, repl [, in])
 91 | # behaves like lsub, except it replaces all occurances of "str"
 92 | BEGIN {
 93 |   string = "string with .* special .* characters";
 94 | 
 95 |   print "glsub: ";
 96 |   print "";
 97 | 
 98 |   print string;
 99 |   print glsub(".*", "literal", string);
100 |   print string;
101 | 
102 |   print "";
103 | }
104 | 
105 | # usage: shell_esc(string)
106 | # returns the string escaped so that it can be used in a shell command
107 | BEGIN {
108 |   file = "some 'filename' with * special characters to be used in system()";
109 | 
110 |   print "shell escape:"
111 |   print "";
112 |   print "first is normal, second escaped: ";
113 |   print file;
114 |   print shell_esc(file);
115 |   print "";
116 | }
117 | 
118 | # usage: str_to_arr(string, array)
119 | # converts string to an array, one char per element, 1-indexed
120 | # returns the array length
121 | BEGIN {
122 |   string = "some string";
123 | 
124 |   print "str_to_arr:";
125 |   print "";
126 | 
127 |   print "initial string: ";
128 |   print string;
129 |   print "";
130 | 
131 |   print "one character per line:"
132 |   len = str_to_arr(string, array);
133 | 
134 |   for (i=1; i<=len; i++) {
135 |     print array[i];
136 |   }
137 | 
138 |   print "";
139 | }
140 | 
141 | # usage: extract_range(string, start, stop)
142 | # extracts fields "start" through "stop" from "string", based on FS, with the
143 | # original field separators intact. returns the extracted fields.
144 | BEGIN {
145 |   str = "foo bar   baz\tblah  quux";
146 |   FS = " ";
147 | 
148 |   printf("extract_range(str, 1, 3): %s\n", extract_range(str, 1, 3));
149 |   printf("extract_range(str, 2, NF): %s\n", extract_range(str, 2, NF));
150 |   printf("extract_range(str, 3, 8): %s\n", extract_range(str, 3, 8));
151 | 
152 |   print "";
153 | }
154 | 
155 | # usage: fwidths(width_spec [, string])
156 | # extracts substrings from "string" according to "width_spec" from left to
157 | # right and assigns them to $1, $2, etc. also assigns the NF variable. if
158 | # "string" is not supplied, uses $0. "width_spec" is a space separated list of
159 | # numbers that specify field widths, just like GNU awk's FIELDWIDTHS variable.
160 | # if there is data left over after the last width_spec, adds it to a final
161 | # field. returns the value for NF.
162 | BEGIN {
163 |   str = "1234567890";
164 |   fwidths("3 2 5", str);
165 | 
166 |   for (i=1; i<=NF; i++) {
167 |     print $i;
168 |   }
169 | 
170 |   print "";
171 | }
172 | 
173 | # usage: fwidths_arr(width_spec, array [, string])
174 | # the behavior is the same as that of fwidths(), except that the values are
175 | # assigned to "array", indexed with sequential integers starting with 1.
176 | # returns the length. everything else is described in fwidths() above.
177 | BEGIN {
178 |   str = "1234567890";
179 |   len = fwidths("2 1 4 3", a, str);
180 | 
181 |   for (i=1; i<=len; i++) {
182 |     print a[i];
183 |   }
184 | 
185 |   print "";
186 | }
187 | 
188 | # usage: lsplit(str, arr, sep)
189 | # splits the string "str" into array elements "arr[1]", "arr[2]", .., "arr[n]",
190 | # and returns "n". all elements of "arr" are deleted before the split is
191 | # performed. the separation is done on the literal string "sep".
192 | BEGIN {
193 |   string = "foo.bar.baz"
194 |   sep = ".";
195 |   printf("original: <%s>\nsep: <%s>\n", string, sep);
196 |   
197 |   len = lsplit(string, a, sep);
198 |   print "  after: len = lsplit(string, a, sep)";
199 |   printf("    len: %d\n", len);
200 |   for (i=1; i<=len; i++) {
201 |     printf("    a[%d]: <%s>\n", i, a[i]);
202 |   }
203 | 
204 |   print "";
205 | }
206 | 
207 | # usage: ssplit(str, arr, seps [, ere])
208 | # similar to GNU awk 4's "seps" functionality for split(). splits the string
209 | # "str" into the array "arr" and the separators array "seps" on the regular
210 | # expression "ere", and returns the number of fields. the value of "seps[i]"
211 | # is the separator that appeared in front of "arr[i+1]". if "ere" is omitted or
212 | # empty, FS is used instead. if "ere" is a single space, leading whitespace in
213 | # "str" will go into the extra array element "seps[0]" and trailing whitespace
214 | # will go into the extra array element "seps[len]", where "len" is the return
215 | # value.
216 | # note: /regex/ style quoting cannot be used for "ere".
217 | BEGIN {
218 |   string = " one  two   three    ";
219 |   printf("original: <%s>\n", string);
220 | 
221 |   len = ssplit(string, a, s, " ");
222 |   print "  after: len = ssplit(string, a, s, \" \")";
223 |   printf("    len: %d\n    s[0]: <%s>\n", len, s[0]);
224 |   for (i=1; i<=len; i++) {
225 |     printf("    a[%d]: <%s>, s[%d]: <%s>\n", i, a[i], i, s[i]);
226 |   }
227 |   
228 |   print "";
229 | }
230 | 
231 | # usage: ends_with(string, substring)
232 | # returns 1 if "strings" ends with "substring", otherwise 0
233 | BEGIN {
234 |   string = "foobar"; s = "bar";
235 |   if (ends_with(string, s)) {
236 |     printf("%s ends with %s\n", string, s);
237 |   } else {
238 |     printf("%s does not end with %s\n", string, s);
239 |   }
240 | 
241 |   string = "foobarfoo"; s = "bar";
242 |   if (ends_with(string, s)) {
243 |     printf("%s ends with %s\n", string, s);
244 |   } else {
245 |     printf("%s does not end with %s\n", string, s);
246 |   }
247 | 
248 |   print "";
249 | }
250 | 
251 | # usage: trim(string)
252 | # returns "string" with leading and trailing whitespace trimmed
253 | BEGIN {
254 |   string = "      whitespace      ";
255 | 
256 |   print "normal: <" string ">";
257 |   print "after trim(): <" trim(string) ">";
258 | 
259 |   print "";
260 | }
261 | 
262 | # usage: rev(string)
263 | # returns "string" backwards
264 | BEGIN {
265 |   string = "forwards";
266 | 
267 |   print "normal: " string;
268 |   print "after rev(): " rev(string);
269 | }
270 | 
271 | # usage: max(array [, how ])
272 | # returns the maximum value in "array", 0 if the array is empty, or -1 if an
273 | # error occurs. the optional string "how" controls the comparison mode.
274 | # requires the __mcompare() function.
275 | # valid values for "how" are:
276 | #   "std"
277 | #     use awk's standard rules for comparison. this is the default
278 | #   "str"
279 | #     force comparison as strings
280 | #   "num"
281 | #     force a numeric comparison
282 | BEGIN {
283 |   # populate array with random numbers
284 |   for (i=0; i<10; i++) {
285 |     a[i] = int(rand() * 10);
286 |   }
287 | 
288 |   # print the max
289 |   print "max: " max(a, "num");
290 | }
291 | 
292 | # usage: maxi(array [, how ])
293 | # the behavior is the same as that of max(), except that the array indices are
294 | # used, not the array values. everything else is explained in max() above.
295 | BEGIN {
296 |   # populate array with random numbers
297 |   for (i=0; i<10; i++) {
298 |     a[int(rand() * 10)];
299 |   }
300 | 
301 |   # print the max
302 |   print "max: " maxi(a, "num");
303 | }
304 | 
305 | # usage: min(array [, how ])
306 | # the behavior is the same as that of max(), except that the minimum value is
307 | # returned instead of the maximum. everything else is explained in max() above.
308 | BEGIN {
309 |   # populate array with random numbers
310 |   for (i=0; i<10; i++) {
311 |     a[i] = int(rand() * 10);
312 |   }
313 | 
314 |   # print the min
315 |   print "min: " min(a, "num");
316 | }
317 | 
318 | # usage: mini(array [, how ])
319 | # the behavior is the same as that of min(), except that the array indices are
320 | # used instead of the array values. everything else is explained in min() and
321 | # max() above.
322 | BEGIN {
323 |   # populate array with random numbers
324 |   for (i=0; i<10; i++) {
325 |     a[int(rand() * 10)];
326 |   }
327 | 
328 |   # print the min
329 |   print "min: " mini(a, "num");
330 | }
331 | 


--------------------------------------------------------------------------------
/examples/sys:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/awk -f
 2 | 
 3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see
 4 | # the man page (since the path is relative, this assumes the lib dir is in
 5 | # AWKPATH or the same dir)
 6 | @include "sys.awk";
 7 | 
 8 | # usage: isatty(fd)
 9 | # Checks if "fd" is open on a tty. Returns 1 if so, 0 if not, and -1 if an
10 | # error occurs
11 | BEGIN {
12 |   if (isatty(0)) {
13 |     print "stdin is open on a tty";
14 |   } else {
15 |     print "stdin is not open on a tty";
16 |   }
17 | 
18 |   if (isatty(1)) {
19 |     print "stdout is open on a tty";
20 |   } else {
21 |     print "stdout is not open on a tty";
22 |   }
23 | 
24 |   if (isatty(2)) {
25 |     print "stderr is open on a tty";
26 |   } else {
27 |     print "stderr is not open on a tty";
28 |   }
29 | }
30 | 
31 | # usage: mktemp(template [, type])
32 | # creates a temporary file or directory, safely, and returns its name.
33 | # if template is not a pathname, the file will be created in ENVIRON["TMPDIR"]
34 | # if set, otherwise /tmp. the last six characters of template must be "XXXXXX",
35 | # and these are replaced with a string that makes the filename unique. type, if
36 | # supplied, is either "f", "d", or "u": for file, directory, or dry run (just
37 | # returns the name, doesn't create a file), respectively. If template is not
38 | # provided, uses "tmp.XXXXXX". Files are created u+rw, and directories u+rwx,
39 | # minus umask restrictions. returns -1 if an error occurs.
40 | BEGIN {
41 |   print mktemp("foo.XXXXXX", "u");
42 |   print mktemp("./bar.XXXXXX");
43 |   print mktemp("./dir.XXXXXX", "d");
44 |   print mktemp("broken.XXX", "u");
45 | }
46 | 


--------------------------------------------------------------------------------
/examples/times:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/awk -f
 2 | 
 3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see
 4 | # the man page (since the path is relative, this assumes the lib dir is in
 5 | # AWKPATH or the same dir)
 6 | @include "times.awk";
 7 | 
 8 | # usage: month_to_num(month)
 9 | # converts human readable month to the decimal representation
10 | # returns the number, -1 if the month doesn't exist
11 | BEGIN {
12 |   print "month_to_num(\"January\"): " month_to_num("January");
13 |   print "month_to_num(\"Jan\"): " month_to_num("Jan");
14 | 
15 |   print "";
16 | }
17 | 
18 | # usage: day_to_num(day)
19 | # converts human readable day to the decimal representation
20 | # returns the number, -1 if the day doesn't exist
21 | # like date +%w, sunday is 0
22 | BEGIN {
23 |   print "day_to_num(\"Monday\"): " day_to_num("Monday");
24 |   print "day_to_num(\"Mon\"): " day_to_num("Mon");
25 | 
26 |   print "";
27 | }
28 | 
29 | # usage: hr_to_sec(timestamp)
30 | # converts HH:MM:SS or MM:SS to seconds
31 | # returns -1 if invalid format
32 | BEGIN {
33 |   print "hr_to_sec(\"00:05:03\"): " hr_to_sec("00:05:03");
34 | 
35 |   print "";
36 | }
37 | 
38 | # usage: sec_to_hr(seconds)
39 | # converts seconds to HH:MM:SS
40 | BEGIN {
41 |   print "sec_to_hr(500): " sec_to_hr(500);
42 | 
43 |   print "";
44 | }
45 | 
46 | # usage: ms_to_hr(milliseconds)
47 | # converts milliseconds to a "time(1)"-similar human readable format, such
48 | # as 1m4.356s
49 | BEGIN {
50 |   print "ms_to_hr(116529): " ms_to_hr(116529);
51 | 
52 |   print "";
53 | }
54 | 
55 | # usage: add_day_suff(day_of_month)
56 | # prepends the appropriate suffix to "day_of_month". for example,
57 | # add_day_suff(1) will return "1st", and add_day_suff(22) will return "22nd"
58 | # returns -1 if "day_of_month" is not a positive integer
59 | BEGIN {
60 |   print "adding suffixes for days 1-31:";
61 |   for (d=1; d<=31; d++) {
62 |     printf("add_day_suff(%s) -> %s\n", d, add_day_suff(d));
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/math.awk:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | ## usage: abs(number)
  4 | ## returns the absolute value of "number"
  5 | function abs(num) {
  6 |   return num < 0 ? -num : num;
  7 | }
  8 | 
  9 | ## usage: ceil(number)
 10 | ## returns "number" rounded UP to the nearest int
 11 | function ceil(num) {
 12 |   if (num < 0) {
 13 |     return int(num);
 14 |   } else {
 15 |     return int(num) + (num == int(num) ? 0 : 1);
 16 |   }
 17 | }
 18 | 
 19 | ## usage: ceiling(multiple, number)
 20 | ## returns "number" rounded UP to the nearest multiple of "multiple"
 21 | function ceiling(mult, num,    r) {
 22 |   return (r = num % mult) ? num + (mult - r) : num;
 23 | }
 24 | 
 25 | ## usage: change_base(number, start_base, end_base)
 26 | ## converts "number" from "start_base" to "end_base"
 27 | ## bases must be between 2 and 64. the digits greater than 9 are represented
 28 | ## by the lowercase letters, the uppercase letters, @, and _, in that order.
 29 | ## if ibase is less than or equal to 36, lowercase and uppercase letters may
 30 | ## be used interchangeably to represent numbers between 10 and 35.
 31 | ## returns 0 if any argument is invalid
 32 | function change_base(num, ibase, obase,
 33 |                      chars, c, l, i, j, cur, b10, f, fin, isneg) {
 34 |   # convert number to lowercase if ibase <= 36
 35 |   if (ibase <= 36) {
 36 |     num = tolower(num);
 37 |   }
 38 | 
 39 |   # determine if number is negative. if so, set isneg=1 and remove the '-'
 40 |   if (sub(/^-/, "", num)) {
 41 |     isneg = 1;
 42 |   }
 43 | 
 44 |   # determine if inputs are valid
 45 |   if (num ~ /[^[:xdigit:]]/ || ibase != int(ibase) || obase != int(obase) ||
 46 |       ibase < 2 || ibase > 64 || obase < 2 || obase > 64) {
 47 |     return 0;
 48 |   }
 49 | 
 50 |   # set letters to numbers conversion array
 51 |   if (ibase > 10 || obase > 10) {
 52 |     # set chars[] array to convert letters to numbers
 53 |     c = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@_";
 54 |     l = length(c);
 55 | 
 56 |     j = 10;
 57 |     for (i=1; i<=l; i++) {
 58 |       cur = substr(c, i, 1);
 59 |       chars[cur] = j;
 60 |       chars[j] = cur;
 61 | 
 62 |       j++;
 63 |     }
 64 |   }
 65 |   
 66 |   # convert to base 10
 67 |   if (ibase != 10) { 
 68 |     l = length(num);
 69 | 
 70 |     j = b10 = 0;
 71 |     for (i=l; i>0; i--) {
 72 |       c = substr(num, i, 1);
 73 | 
 74 |       # if char is a non-digit convert to dec
 75 |       if (c !~ /^[0-9]$/) {
 76 |         c = chars[c];
 77 |       }
 78 | 
 79 |       # check to make sure value isn't too great for base
 80 |       if (+c >= ibase) {
 81 |         return 0;
 82 |       }
 83 | 
 84 |       b10 += c * (ibase ^ j++);
 85 |     }
 86 |   } else {
 87 |     # num is already base 10
 88 |     b10 = num;
 89 |   }
 90 |   
 91 |   # convert from base 10 to obase
 92 |   if (obase != 10) {
 93 |     # build number backwards
 94 |     j = 0;
 95 |     do {
 96 |       f[++j] = (c = b10 % obase) > 9 ? chars[c] : c;
 97 |       b10 = int(b10 / obase);
 98 |     } while (b10);
 99 | 
100 |     # reverse number
101 |     fin = f[j];
102 |     for (i=j-1; i>0; i--) {
103 |       fin = fin f[i];
104 |     }
105 |   } else {
106 |     # num has already been converted to base 10
107 |     fin = b10;
108 |   }
109 | 
110 |   # add '-' if number was negative
111 |   if (isneg) {
112 |     fin = "-" fin;
113 |   }
114 | 
115 |   return fin;
116 | }
117 | 
118 | ## usage: format_num(number)
119 | ## adds commas to "number" to make it more readable. for example,
120 | ## format_num(1000) will return "1,000", and format_num(123456.7890) will
121 | ## return "123,456.7890". also trims leading zeroes
122 | ## returns 0 if "number" is not a valid number
123 | function format_num(num,    is_float, b, e, i, len, r, out) {
124 |   # trim leading zeroes
125 |   sub(/^0+/, "", num);
126 | 
127 |   # make sure "num" is a valid number
128 |   if (num ~ /[^0-9.]/ || num ~ /\..*\./) {
129 |     return 0;
130 |   }
131 |   
132 |   # if "num" is not an int, split it into pre and post decimal parts.
133 |   # use sub() instead of int() because int() can be funny for float arithmetic
134 |   # results
135 |   if (num ~ /\./) {
136 |     is_float = 1; # flag "num" as a float
137 |     b = e = num;
138 |     sub(/\..*/, "", b);
139 |     sub(/.*\./, "", e);
140 | 
141 |   # otherwise, just assign the number to "b"
142 |   } else {
143 |     is_float = 0;
144 |     b = num;
145 |   }
146 | 
147 |   len = length(b)
148 | 
149 |   # only do anything if the pre-decimal section is greater than 3 digits
150 |   if (len < 3) {
151 |     return num;
152 |   }
153 | 
154 |   # start by assigning the last 3 pre-decimal digits to out
155 |   out = substr(b, len - 2);
156 | 
157 |   # loop backwards over each grouping of 3 numbers after that, prepending
158 |   # each to out (with a comma)
159 |   for (i=len-5; i>0; i-=3) {
160 |     out = substr(b, i, 3) "," out;
161 |   }
162 | 
163 |   # if the length is not a multiple of 3, prepend the remaining digits
164 |   if (r = len % 3) {
165 |     out = substr(b, 1, r) "," out;
166 |   }
167 | 
168 |   # if number was a float, add the post-decimal digits back on
169 |   if (is_float) {
170 |     out = out "." e;
171 |   }
172 | 
173 |   # return the formatted number
174 |   return out;
175 | }
176 | 
177 | ## usage: str_to_num(string)
178 | ## examines "string", and returns its numeric value. if "string" begins with a
179 | ## leading 0, assumes that "string" is an octal number. if "string" begins with
180 | ## a leading "0x" or "0X", assumes that "string" is a hexadecimal number.
181 | ## otherwise, decimal is assumed.
182 | function str_to_num(str,    base, isneg, l, i, j, chars, c, num) {
183 |   # convert to all lowercase
184 |   str = tolower(str);
185 | 
186 |   # determine if number is negative. if so, set isneg=1 and remove the '-'
187 |   if (sub(/^-/, "", num)) {
188 |     isneg = 1;
189 |   }
190 | 
191 |   # examine the string, to determine the base and trim said base information
192 |   if (sub(/^0x/, "", str)) {
193 |     base = 16;
194 |   } else if (sub(/^0/, "", str)) {
195 |     base = 8;
196 |   } else {
197 |     base = 10;
198 |   }
199 | 
200 |   # trim everything from the first non-number character to the end
201 |   if (base == 16) {
202 |     sub(/[^[:xdigit:]].*/, "", str);
203 |   } else {
204 |     sub(/[^[:digit:]].*/, "", str);
205 |   }
206 | 
207 |   # if the base is octal, but there's a number >= 8, set it to decimal instead
208 |   if (base == 8 && str ~ /[89]/) {
209 |     base = 10;
210 |   }
211 | 
212 |   # don't need to convert if the base is 10
213 |   if (base == 10) {
214 |     return isneg ? -str : +str;
215 |   }
216 | 
217 |   # set letters for hex
218 |   if (base == 16) {
219 |     chars["a"] = 10; chars["b"] = 11; chars["c"] = 12;
220 |     chars["d"] = 13; chars["e"] = 14; chars["f"] = 15;
221 |   }
222 | 
223 |   # convert to base 10
224 |   l = length(str);
225 | 
226 |   j = num = 0;
227 |   for (i=l; i>0; i--) {
228 |     c = substr(str, i, 1);
229 | 
230 |     # if char is a non-digit convert to dec
231 |     if (c !~ /^[0-9]$/) {
232 |       c = chars[c];
233 |     }
234 | 
235 |     num += c * (base ^ j++);
236 |   }
237 |   
238 |   # return the number
239 |   return isneg ? -num : +num;
240 | }
241 | 
242 | ## usage: floor(multiple, number)
243 | ## returns "number" rounded DOWN to the nearest multiple of "multiple"
244 | function floor(mult, num) {
245 |   return num - (num % mult);
246 | }
247 | 
248 | ## usage: round(multiple, number)
249 | ## returns "number" rounded to the nearest multiple of "multiple"
250 | function round(mult, num,    r) {
251 |   if (num % mult < mult / 2) {
252 |     return num - (num % mult);
253 |   } else {
254 |     return (r = num % mult) ? num + (mult - r) : num;
255 |   }
256 | }
257 | 
258 | ## usage: rint(number)
259 | ## returns "number" rounded to the nearest integer
260 | function rint(num,    n) {
261 |   if (num < 0) {
262 |     return (num - (n = int(num)) < -.5) ? n - 1 : n;
263 |   } else {
264 |     return (num - (n = int(num)) >= .5) ? n + 1 : n;
265 |   }
266 | }
267 | 
268 | ## usage: isint(string)
269 | ## returns 1 if "string" is a valid integer, otherwise 0
270 | function isint(str) {
271 |   if (str !~ /^-?[0-9]+$/) {
272 |     return 0;
273 |   }
274 | 
275 |   return 1;
276 | }
277 | 
278 | ## usage: isnum(string)
279 | ## returns 1 if "string" is a valid number, otherwise 0
280 | function isnum(str) {
281 |   # use a regex comparison because 'str == str + 0' has issues with some floats
282 |   if (str !~ /^-?[0-9.]+$/ || str ~ /\..*\./) {
283 |     return 0;
284 |   }
285 | 
286 |   return 1;
287 | }
288 | 
289 | ## usage: isprime(number)
290 | ## returns 1 if "number" is a prime number, otherwise 0. "number" must be a
291 | ## positive integer greater than one
292 | function isprime(num,    i, s) {
293 |   # check to make sure "num" is a valid positive int (and not 1)
294 |   if (num !~ /^[0-9]+$/ || num <= 1) {
295 |     return 0;
296 |   }
297 | 
298 |   # 1, 2, and 3 are prime
299 |   if (num <= 3) {
300 |     return 1;
301 |   }
302 |   
303 |   # check if even or divisible by 3
304 |   if (!(num % 2) || !(num % 3)) {
305 |     return 0;
306 |   }
307 |   
308 |   # use naive method, fermats little theorem had overflow and did not work
309 |   # for primes larger than 1021
310 |   s = sqrt(num);
311 |   for (i=5; i<=s; i+=2) {
312 |     if (!(num % i)) {
313 |       return 0;
314 |     }
315 |   }
316 | 
317 |   return 1;
318 | }
319 | 
320 | ## usage: gcd(a, b)
321 | ## returns the greatest common denominator (greatest common factor) of a and b.
322 | ## both a and b must be positive integers. uses the recursive euclid algorithm.
323 | function gcd(a, b,    f) {
324 |   # check to make sure both numbers are positive ints
325 |   if (!f) {
326 |     if (a !~ /^[0-9]+$/ || !a || b !~ /^[0-9]+$/ || !b) {
327 |       return 0;
328 |     }
329 |   }
330 | 
331 |   if (b) {
332 |     return gcd(b, a % b, 1);
333 | 
334 |   } else {
335 |     # return the absolute value
336 |     return a < 0 ? -a : a;
337 |   }
338 | }
339 | 
340 | ## usage: lcm(a, b)
341 | ## returns the least common multiple of a and b. both a and b must be positive
342 | ## integers.
343 | function lcm(a, b,    m, l) {
344 |   # check to make sure both numbers are positive ints
345 |   if (a !~ /^[0-9]+$/ || !a || b !~ /^[0-9]+$/ || !b) {
346 |     return 0;
347 |   }
348 | 
349 |   m = 0;
350 |   while ((l = ++m * a) % b);
351 | 
352 |   return l;
353 | }
354 | 
355 | ## usage: calc_e()
356 | ## approximates e by calculating the sumation from k=0 to k=50 of 1/k!
357 | ## returns 10 decimal places
358 | function calc_e(lim,    e, k, i, f) {
359 |   for (k=0; k<=50; k++) {
360 |     # calculate factorial
361 |     f = 1;
362 |     for (i=1; i<=k; i++) {
363 |       f = f * i;
364 |     }
365 | 
366 |     # add to e
367 |     e += 1 / f;
368 |   }
369 | 
370 |   return sprintf("%0.10f", e);
371 | }
372 | 
373 | 
374 | ## usage: calc_pi()
375 | ## returns pi, with an accuracy of 10 decimal places
376 | function calc_pi() {
377 |   return sprintf("%0.10f", atan2(0, -1));
378 | }
379 | 
380 | ## usage: calc_tau()
381 | ## returns tau, with an accuracy of 10 decimal places
382 | function calc_tau() {
383 |   return sprintf("%0.10f", 2 * atan2(0, -1));
384 | }
385 | 
386 | ## usage: deg_to_rad(degrees)
387 | ## converts degrees to radians
388 | function deg_to_rad(deg,    tau) {
389 |   tau = 8 * atan2(1,1);
390 | 
391 |   return (deg/360) * tau;
392 | }
393 | 
394 | ## usage: rad_to_deg(radians)
395 | ## converts radians to degrees
396 | function rad_to_deg(rad,    tau) {
397 |   tau = 8 * atan2(1,1);
398 | 
399 |   return (rad/tau) * 360;
400 | }
401 | 
402 | ## usage: tan(expr)
403 | ## returns the tangent of expr, which is in radians
404 | function tan(ang) {
405 |   return sin(ang)/cos(ang);
406 | }
407 | 
408 | ## usage: csc(expr)
409 | ## returns the cosecant of expr, which is in radians
410 | function csc(ang) {
411 |   return 1/sin(ang);
412 | }
413 | 
414 | ## usage: sec(expr)
415 | ## returns the secant of expr, which is in radians
416 | function sec(ang) {
417 |   return 1/cos(ang);
418 | }
419 | 
420 | ## usage: cot(expr)
421 | ## returns the cotangent of expr, which is in radians
422 | function cot(ang) {
423 |   return cos(ang)/sin(ang);
424 | }
425 | 
426 | 
427 | 
428 | # You can do whatever you want with this stuff, but a thanks is always
429 | # appreciated
430 | 


--------------------------------------------------------------------------------
/msort.awk:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | # comparison function
  4 | # usage: __compare(a, b, how)
  5 | # compares "a" and "b" based on "how", returning 0 for false and 1 for true.
  6 | # required for all of the msort() functions below
  7 | function __compare(a, b, how) {
  8 |   # standard comparisons
  9 |   if (how == "std asc") {
 10 |     return a < b;
 11 |   } else if (how == "std desc") {
 12 |     return a > b;
 13 | 
 14 |   # force string comps
 15 |   } else if (how == "str asc") {
 16 |     return "a" a < "a" b;
 17 |   } else if (how == "str desc") {
 18 |     return "a" a > "a" b;
 19 | 
 20 |   # force numeric
 21 |   } else if (how == "num asc") {
 22 |     return +a < +b;
 23 |   } else if (how == "num desc") {
 24 |     return +a > +b;
 25 |   }
 26 | }
 27 | 
 28 | # actual sorting function
 29 | # usage: __mergesort(array, len, how)
 30 | # sorts the values in "array" in-place, from indices 1 to "len", based
 31 | # on the comparison mode "how" (see the msort() description).
 32 | # required for all of the msort() functions below
 33 | function __mergesort(array, len, how,
 34 |                      tmpa, alen, a, tmpb, blen, b, half, cur, pos, tmp) {
 35 |   # if there are 10 elements or less, use an insertion sort and return
 36 |   if (len <= 10) {
 37 |     # loop over each item, starting with the second
 38 |     for (cur=2; cur<=len; cur++) {
 39 |       pos = cur;
 40 |       # shift the item down the list into position
 41 |       while (pos > 1 && __compare(array[pos], array[pos-1], how)) {
 42 |         tmp = array[pos];
 43 |         array[pos] = array[pos-1];
 44 |         array[pos-1] = tmp;
 45 | 
 46 |         pos--;
 47 |       }
 48 |     }
 49 | 
 50 |     # return
 51 |     return len;
 52 |   }
 53 | 
 54 |   # determine the halfway point of the indices
 55 |   half = int(len / 2);
 56 | 
 57 |   # create temp arrays of the two halves
 58 |   a = 0;
 59 |   for (i=1; i<=half; i++) {
 60 |     tmpa[++a] = array[i];
 61 | 
 62 |     # remove the index from the original array
 63 |     delete array[i];
 64 |   }
 65 |   b = 0;
 66 |   for (i=half+1; i<=len; i++) {
 67 |     tmpb[++b] = array[i];
 68 | 
 69 |     # remove the index from the original array
 70 |     delete array[i];
 71 |   }
 72 | 
 73 |   # sort the two halves with recursive calls
 74 |   alen = __mergesort(tmpa, a, how);
 75 |   blen = __mergesort(tmpb, b, how);
 76 | 
 77 |   # merge the two halves
 78 |   len = 0;
 79 |   a = b = 1;
 80 |   # loop while there is still an element in either array
 81 |   while (a <= alen || b <= blen) {
 82 |     # a sorts first
 83 |     if (a <= alen && (b > blen || __compare(tmpa[a], tmpb[b], how))) {
 84 |       array[++len] = tmpa[a];
 85 |       delete tmpa[a++]; # remove the index from the temp array
 86 | 
 87 |     # b sorts first
 88 |     } else {
 89 |       array[++len] = tmpb[b];
 90 |       delete tmpb[b++]; # remove the index from the temp array
 91 |     }
 92 |   }
 93 | 
 94 |   # return the length
 95 |   return len;
 96 | }
 97 | 
 98 | # actual sorting function for the msortv() function
 99 | # usage: __mergesortv(array, values, len, how)
100 | # sorts the values in "array" on the original values in "values", from indices
101 | # 1 through "len", based on the comparison mode "how" (see the msortv()
102 | # description). required for all of the msortv() functions below
103 | function __mergesortv(array, values, len, how,
104 |                       tmpa, tmpva, alen, a, tmpb, tmpvb, blen, b,
105 |                       half, cur, pos, tmp) {
106 |   # if there are 10 elements or less, use an insertion sort and return
107 |   if (len <= 10) {
108 |     # loop over each item, starting with the second
109 |     for (cur=2; cur<=len; cur++) {
110 |       pos = cur;
111 |       # shift the item down the list into position
112 |       while (pos > 1 && __compare(values[pos], values[pos-1], how)) {
113 |         tmp = array[pos];
114 |         array[pos] = array[pos-1];
115 |         array[pos-1] = tmp;
116 |         tmp = values[pos];
117 |         values[pos] = values[pos-1];
118 |         values[pos-1] = tmp;
119 | 
120 |         pos--;
121 |       }
122 |     }
123 | 
124 |     # return
125 |     return len;
126 |   }
127 | 
128 |   # determine the halfway point of the indices
129 |   half = int(len / 2);
130 | 
131 |   # create temp arrays of the two halves
132 |   a = 0;
133 |   for (i=1; i<=half; i++) {
134 |     tmpa[++a] = array[i];
135 |     tmpva[a] = values[i];
136 | 
137 |     # remove the index from the original array
138 |     delete array[i];
139 |   }
140 |   b = 0;
141 |   for (i=half+1; i<=len; i++) {
142 |     tmpb[++b] = array[i];
143 |     tmpvb[b] = values[i];
144 | 
145 |     # remove the index from the original array
146 |     delete array[i];
147 |   }
148 | 
149 |   # sort the two halves with recursive calls
150 |   alen = __mergesortv(tmpa, tmpva, a, how);
151 |   blen = __mergesortv(tmpb, tmpvb, b, how);
152 | 
153 |   # merge the two halves
154 |   len = 0;
155 |   a = b = 1;
156 |   # loop while there is still an element in either array
157 |   while (a <= alen || b <= blen) {
158 |     # a sorts first
159 |     if (a <= alen && (b > blen || __compare(tmpva[a], tmpvb[b], how))) {
160 |       array[++len] = tmpa[a];
161 |       values[len] = tmpva[a];
162 |       delete tmpva[a];
163 |       delete tmpa[a++]; # remove the index from the temp array
164 | 
165 |     # b sorts first
166 |     } else {
167 |       array[++len] = tmpb[b];
168 |       values[len] = tmpvb[b];
169 |       delete tmpvb[b];
170 |       delete tmpb[b++]; # remove the index from the temp array
171 |     }
172 |   }
173 | 
174 |   # return the length
175 |   return len;
176 | }
177 | 
178 | 
179 | 
180 | ## usage: msort(s, d [, how])
181 | ## sorts the elements in the array "s" using awk's normal rules for comparing
182 | ## values, creating a new sorted array "d" indexed with sequential integers
183 | ## starting with 1. returns the length, or -1 if an error occurs.. leaves the
184 | ## indices of the source array "s" unchanged. the optional string "how" controls
185 | ## the direction and the comparison mode. uses the merge sort algorithm, with an
186 | ## insertion sort when the list size gets small enough. this is not a stable
187 | ## sort. requires the __compare() and __mergesort() functions.
188 | ## valid values for "how" are:
189 | ##   "std asc"
190 | ##     use awk's standard rules for comparison, ascending. this is the default
191 | ##   "std desc"
192 | ##     use awk's standard rules for comparison, descending.
193 | ##   "str asc"
194 | ##     force comparison as strings, ascending.
195 | ##   "str desc"
196 | ##     force comparison as strings, descending.
197 | ##   "num asc"
198 | ##     force a numeric comparison, ascending.
199 | ##   "num desc"
200 | ##     force a numeric comparison, descending.
201 | function msort(array, out, how,    count, i) {
202 |   # make sure how is correct
203 |   if (length(how)) {
204 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
205 |       return -1;
206 |     }
207 | 
208 |   # how was not passed, use the default
209 |   } else {
210 |     how = "std asc";
211 |   }
212 |   
213 |   # loop over each index, and generate a new array with the same values and
214 |   # sequential indices
215 |   count = 0;
216 |   for (i in array) {
217 |     out[++count] = array[i];
218 |   }
219 | 
220 |   # actually sort
221 |   return __mergesort(out, count, how);
222 | }
223 | 
224 | ## usage: imsort(s [, how])
225 | ## the bevavior is the same as that of msort(), except that the array "s" is
226 | ## sorted in-place. the original indices are destroyed and replaced with
227 | ## sequential integers. everything else is described in msort() above.
228 | function imsort(array, how,    tmp, count, i) {
229 |   # make sure how is correct
230 |   if (length(how)) {
231 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
232 |       return -1;
233 |     }
234 | 
235 |   # how was not passed, use the default
236 |   } else {
237 |     how = "std asc";
238 |   }
239 |   
240 |   # loop over each index, and generate a new array with the same values and
241 |   # sequential indices
242 |   count = 0;
243 |   for (i in array) {
244 |     tmp[++count] = array[i];
245 |     delete array[i];
246 |   }
247 | 
248 |   # copy tmp back over array
249 |   for (i=1; i<=count; i++) {
250 |     array[i] = tmp[i];
251 |     delete tmp[i];
252 |   }
253 | 
254 |   # actually sort
255 |   return __mergesort(array, count, how);
256 | }
257 | 
258 | ## usage: msorti(s, d [, how])
259 | ## the behavior is the same as that of msort(), except that the array indices
260 | ## are used for sorting, not the array values. when done, the new array is
261 | ## indexed numerically, and the values are those of the original indices.
262 | ## everything else is described in msort() above.
263 | function msorti(array, out, how,    count, i) {
264 |   # make sure how is correct
265 |   if (length(how)) {
266 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
267 |       return -1;
268 |     }
269 | 
270 |   # how was not passed, use the default
271 |   } else {
272 |     how = "std asc";
273 |   }
274 | 
275 |   # loop over each index, and generate a new array with the original indices
276 |   # mapped to new numeric ones
277 |   count = 0;
278 |   for (i in array) {
279 |     out[++count] = i;
280 |   }
281 | 
282 |   # actually sort
283 |   return __mergesort(out, count, how);
284 | }
285 | 
286 | ## usage: imsorti(s [, how])
287 | ## the bevavior is the same as that of msorti(), except that the array "s" is
288 | ## sorted in-place. the original indices are destroyed and replaced with
289 | ## sequential integers. everything else is described in msort() and msorti()
290 | ## above.
291 | function imsorti(array, how,    tmp, count, i) {
292 |   # make sure how is correct
293 |   if (length(how)) {
294 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
295 |       return -1;
296 |     }
297 | 
298 |   # how was not passed, use the default
299 |   } else {
300 |     how = "std asc";
301 |   }
302 | 
303 |   # loop over each index, and generate a new array with the original indices
304 |   # mapped to new numeric ones
305 |   count = 0;
306 |   for (i in array) {
307 |     tmp[++count] = i;
308 |     delete array[i];
309 |   }
310 | 
311 |   # copy tmp back over the original array
312 |   for (i=1; i<=count; i++) {
313 |     array[i] = tmp[i];
314 |     delete tmp[i];
315 |   }
316 | 
317 |   # actually sort
318 |   return __mergesort(array, count, how);
319 | }
320 | 
321 | ## usage: msortv(s, d [, how])
322 | ## sorts the indices in the array "s" based on the values, creating a new
323 | ## sorted array "d" indexed with sequential integers starting with 1, and the
324 | ## values the indices of "s". returns the length, or -1 if an error occurs.
325 | ## leaves the source array "s" unchanged. the optional string "how" controls
326 | ## the direction and the comparison mode. uses the merge sort algorithm, with
327 | ## an insertion sort when the list size gets small enough. this is not a stable
328 | ## sort. requires the __compare() and __mergesortv() functions. valid values for
329 | ## "how" are explained in the msort() function above.
330 | function msortv(array, out, how,    values, count, i) {
331 |   # make sure how is correct
332 |   if (length(how)) {
333 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
334 |       return -1;
335 |     }
336 | 
337 |   # how was not passed, use the default
338 |   } else {
339 |     how = "std asc";
340 |   }
341 | 
342 |   # loop over each index, and generate two new arrays: the original indices
343 |   # mapped to numeric ones, and the values mapped to the same indices
344 |   count = 0;
345 |   for (i in array) {
346 |     count++;
347 |     out[count] = i;
348 |     values[count] = array[i];
349 |   }
350 | 
351 |   # actually sort
352 |   return __mergesortv(out, values, count, how);
353 | }
354 | 
355 | 
356 | 
357 | # You can do whatever you want with this stuff, but a thanks is always
358 | # appreciated
359 | 


--------------------------------------------------------------------------------
/options.awk:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | ## usage: getopts(optstring [, longopt_array ])
  4 | ## Parses options, and deletes them from ARGV. "optstring" is of the form
  5 | ## "ab:c". Each letter is a possible option. If the letter is followed by a
  6 | ## colon (:), then the option requires an argument. If an argument is not
  7 | ## provided, or an invalid option is given, getopts will print the appropriate
  8 | ## error message and return "?". Returns each option as it's read, and -1 when
  9 | ## no options are left. "optind" will be set to the index of the next
 10 | ## non-option argument when finished. "optarg" will be set to the option's
 11 | ## argument, when provided. If not provided, "optarg" will be empty. "optname"
 12 | ## will be set to the current option, as provided. Getopts will delete each
 13 | ## option and argument that it successfully reads, so awk will be able to treat
 14 | ## whatever's left as filenames/assignments, as usual. If provided,
 15 | ## "longopt_array" is the name of an associative array that maps long options to
 16 | ## the appropriate short option (do not include the hyphens on either).
 17 | ## Sample usage can be found in the examples dir, with gawk extensions, or in
 18 | ## the ogrep script for a POSIX example: https://github.com/e36freak/ogrep
 19 | function getopts(optstring, longarr,    opt, trimmed, hasarg, repeat) {
 20 |   hasarg = repeat = 0;
 21 |   optarg = "";
 22 |   # increment optind
 23 |   optind++;
 24 | 
 25 |   # return -1 if the current arg is not an option or there are no args left
 26 |   if (ARGV[optind] !~ /^-/ || optind >= ARGC) {
 27 |     return -1;
 28 |   }
 29 | 
 30 |   # if option is "--" (end of options), delete arg and return -1
 31 |   if (ARGV[optind] == "--") {
 32 |     for (i=1; i<=optind; i++) {
 33 |       delete ARGV[i];
 34 |     }
 35 |     return -1;
 36 |   }
 37 | 
 38 |   # if the option is a long argument...
 39 |   if (ARGV[optind] ~ /^--/) {
 40 |     # trim hyphens
 41 |     trimmed = substr(ARGV[optind], 3);
 42 |     # if of the format --foo=bar, split the two. assign "bar" to optarg and
 43 |     # set hasarg to 1
 44 |     if (trimmed ~ /=/) {
 45 |       optarg = trimmed;
 46 |       sub(/=.*/, "", trimmed); sub(/^[^=]*=/, "", optarg);
 47 |       hasarg = 1;
 48 |     }
 49 |     
 50 |     # invalid long opt
 51 |     if (!(trimmed in longarr)) {
 52 |       printf("unrecognized option -- '%s'\n", ARGV[optind]) > "/dev/stderr";
 53 |       return "?";
 54 |     }
 55 | 
 56 |     opt = longarr[trimmed];
 57 |     # set optname by prepending dashes to the trimmed argument
 58 |     optname = "--" trimmed;
 59 | 
 60 |   # otherwise, it is a short option
 61 |   } else {
 62 |     # remove the hyphen, and get just the option letter
 63 |     opt = substr(ARGV[optind], 2, 1);
 64 |     # set trimmed to whatevers left
 65 |     trimmed = substr(ARGV[optind], 3);
 66 | 
 67 |     # invalid option
 68 |     if (!index(optstring, opt)) {
 69 |       printf("invalid option -- '%s'\n", opt) > "/dev/stderr";
 70 |       return "?";
 71 |     }
 72 | 
 73 |     # if there is more to the argument than just -o
 74 |     if (length(trimmed)) {
 75 |       # if option requires an argument, set the rest to optarg and hasarg to 1
 76 |       if (index(optstring, opt ":")) {
 77 |         optarg = trimmed;
 78 |         hasarg = 1;
 79 | 
 80 |       # otherwise, prepend a hyphen to the rest and set repeat to 1, so the
 81 |       # same arg is processed again without the first option
 82 |       } else {
 83 |         ARGV[optind] = "-" trimmed;
 84 |         repeat = 1;
 85 |       }
 86 |     }
 87 | 
 88 |     # set optname by prepending a hypen to opt
 89 |     optname = "-" opt;
 90 |   }
 91 | 
 92 |   # if the option requires an arg and hasarg is 0
 93 |   if (index(optstring, opt ":") && !hasarg) {
 94 |     # increment optind, check if no arguments are left
 95 |     if (++optind >= ARGC) {
 96 |       printf("option requires an argument -- '%s'\n", optname) > "/dev/stderr";
 97 |       return "?";
 98 |     }
 99 | 
100 |     # set optarg
101 |     optarg = ARGV[optind];
102 | 
103 |   # if repeat is set, decrement optind so we process the same arg again
104 |   # mutually exclusive to needing an argument, otherwise hasarg would be set
105 |   } else if (repeat) {
106 |     optind--;
107 |   }
108 | 
109 |   # delete all arguments up to this point, just to make sure
110 |   for (i=1; i<=optind; i++) {
111 |     delete ARGV[i];
112 |   }
113 | 
114 |   # return the option letter
115 |   return opt;
116 | }
117 | 
118 | 
119 | 
120 | # You can do whatever you want with this stuff, but a thanks is always
121 | # appreciated
122 | 


--------------------------------------------------------------------------------
/psort.awk:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | # comparison function for the *psort* functions
  4 | # usage: __pcompare(a, b, patterns, max, how)
  5 | # compares "a" and "b" based on "patterns" and "how", returning 0 for false and
  6 | # 1 for true. "patterns" is an indexed array of regexes, from 1 through "max".
  7 | # each regex takes priority over subsequent regexes, followed by non-matching
  8 | # values. required for all of the psort() functions below
  9 | function __pcompare(a, b, pattens, plen, how,    p) {
 10 |   # loop over each regex in order, and check if either value matches
 11 |   for (p=1; p<=plen; p++) {
 12 |     # if the first matches...
 13 |     if (a ~ p) {
 14 |       # check if the second also matches. if so, do a normal comparison
 15 |       if (b ~ p) {
 16 |         # standard comparisons
 17 |         if (how == "std asc") {
 18 |           return a < b;
 19 |         } else if (how == "std desc") {
 20 |           return a > b;
 21 | 
 22 |         # force string comps
 23 |         } else if (how == "str asc") {
 24 |           return "a" a < "a" b;
 25 |         } else if (how == "str desc") {
 26 |           return "a" a > "a" b;
 27 | 
 28 |         # force numeric
 29 |         } else if (how == "num asc") {
 30 |           return +a < +b;
 31 |         } else if (how == "num desc") {
 32 |           return +a > +b;
 33 |         }
 34 | 
 35 |       # if the second doesn't match, the first sorts higher
 36 |       } else {
 37 |         return 1;
 38 |       }
 39 | 
 40 |     # if the second matches but the first didn't, the second sorts higher
 41 |     } else if (b ~ p) {
 42 |       return 0;
 43 |     }
 44 |   }
 45 | 
 46 |   # no patterns matched, do a normal comparison
 47 |   return __compare(a, b, how);
 48 | }
 49 | 
 50 | 
 51 | # actual sorting function for the *psort* functions
 52 | # sorts the values in "array" in-place, from indices "left" to "right", based
 53 | # on "how" and the array "patterns" (see the psort() description)
 54 | # required for all of the psort() functions below
 55 | function __pquicksort(array, left, right, patterns, plen, how,
 56 |                       piv, mid, tmp) {
 57 |   # return if array contains one element or less
 58 |   if ((right - left) <= 0) {
 59 |     return;
 60 |   }
 61 | 
 62 |   # choose random pivot
 63 |   piv = int(rand() * (right - left + 1)) + left;
 64 | 
 65 |   # swap left and pivot
 66 |   tmp = array[piv];
 67 |   array[piv] = array[left];
 68 |   array[left] = tmp;
 69 |   
 70 |   mid = left;
 71 |   # iterate over each element from the second to the last, and compare
 72 |   for (piv=left+1; piv<=right; piv++) {
 73 |     # if the comparison based on "how" is true...
 74 |     if (__pcompare(array[piv], array[left], patterns, plen, how)) {
 75 |       # increment mid
 76 |       mid++;
 77 | 
 78 |       # swap mid and pivot
 79 |       tmp = array[piv];
 80 |       array[piv] = array[mid];
 81 |       array[mid] = tmp;
 82 |     }
 83 |   }
 84 | 
 85 |   # swap left and mid
 86 |   tmp = array[mid];
 87 |   array[mid] = array[left];
 88 |   array[left] = tmp;
 89 |   
 90 |   # recursively sort the two halves
 91 |   __pquicksort(array, left, mid - 1, patterns, plen, how);
 92 |   __pquicksort(array, mid + 1, right, patterns, plen, how);
 93 | }
 94 | 
 95 | 
 96 | ## usage: psort(s, d, patts, max [, how])
 97 | ## sorts the values of the array "s", based on the rules below. creates a new
 98 | ## sorted array "d" indexed with sequential integers starting with 1. "patts"
 99 | ## is a compact (*non-sparse) 1-indexed array containing regular expressions.
100 | ## "max" is the length of the "patts" array. returns the length of the "d"
101 | ## array. valid values for "how" are explained below. uses the quicksort
102 | ## algorithm, with a random pivot to avoid worst-case behavior on already sorted
103 | ## arrays. requires the __pcompare() and __pquicksort() functions.
104 | ##
105 | ##  Sorting rules:
106 | ##  - When sorting, values matching an expression in the "patts" array will
107 | ##    take priority over any other values
108 | ##  - Each expression in the "patts" array will have priority in ascending
109 | ##    order by index. "patts[1]" will have priority over "patts[2]" and
110 | ##    "patts[3]", etc
111 | ##  - Values both matching the same regex will be compared as usual
112 | ##  - All non-matching values will be compared as usual
113 | ##
114 | ## valid values for "how" are:
115 | ##   "std asc"
116 | ##     use awk's standard rules for comparison, ascending. this is the default
117 | ##   "std desc"
118 | ##     use awk's standard rules for comparison, descending.
119 | ##   "str asc"
120 | ##     force comparison as strings, ascending.
121 | ##   "str desc"
122 | ##     force comparison as strings, descending.
123 | ##   "num asc"
124 | ##     force a numeric comparison, ascending.
125 | ##   "num desc"
126 | ##     force a numeric comparison, descending.
127 | function psort(array, out, patterns, plen, how,    count, i) {
128 |   # make sure how is correct
129 |   if (length(how)) {
130 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
131 |       return -1;
132 |     }
133 | 
134 |   # how was not passed, use the default
135 |   } else {
136 |     how = "std asc";
137 |   }
138 |   
139 |   # loop over each index, and generate a new array with the same values and
140 |   # sequential indices
141 |   count = 0;
142 |   for (i in array) {
143 |     out[++count] = array[i];
144 |   }
145 | 
146 |   # seed the random number generator
147 |   srand();
148 | 
149 |   # actually sort
150 |   __pquicksort(out, 1, count, patterns, plen, how);
151 | 
152 |   # return the length
153 |   return count;
154 | }
155 | 
156 | ## usage: ipsort(s, patts, max [, how])
157 | ## the bevavior is the same as that of psort(), except that the array "s" is
158 | ## sorted in-place. the original indices are destroyed and replaced with
159 | ## sequential integers. everything else is described in psort() above.
160 | function ipsort(array, patterns, plen, how,    tmp, count, i) {
161 |   # make sure how is correct
162 |   if (length(how)) {
163 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
164 |       return -1;
165 |     }
166 | 
167 |   # how was not passed, use the default
168 |   } else {
169 |     how = "std asc";
170 |   }
171 |   
172 |   # loop over each index, and generate a new array with the same values and
173 |   # sequential indices
174 |   count = 0;
175 |   for (i in array) {
176 |     tmp[++count] = array[i];
177 |     delete array[i];
178 |   }
179 | 
180 |   # copy tmp back over array
181 |   for (i=1; i<=count; i++) {
182 |     array[i] = tmp[i];
183 |     delete tmp[i];
184 |   }
185 | 
186 |   # seed the random number generator
187 |   srand();
188 | 
189 |   # actually sort
190 |   __pquicksort(array, 1, count, patterns, plen, how);
191 | 
192 |   # return the length
193 |   return count;
194 | }
195 | 
196 | ## usage: psorti(s, d, patts, max [, how])
197 | ## the behavior is the same as that of psort(), except that the array indices
198 | ## are used for sorting, not the array values. when done, the new array is
199 | ## indexed numerically, and the values are those of the original indices.
200 | ## everything else is described in psort() above.
201 | function psorti(array, out, patterns, plen, how,    count, i) {
202 |   # make sure how is correct
203 |   if (length(how)) {
204 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
205 |       return -1;
206 |     }
207 | 
208 |   # how was not passed, use the default
209 |   } else {
210 |     how = "std asc";
211 |   }
212 | 
213 |   # loop over each index, and generate a new array with the original indices
214 |   # mapped to new numeric ones
215 |   count = 0;
216 |   for (i in array) {
217 |     out[++count] = i;
218 |   }
219 | 
220 |   # seed the random number generator
221 |   srand();
222 | 
223 |   # actually sort
224 |   __pquicksort(out, 1, count, patterns, plen, how);
225 | 
226 |   # return the length
227 |   return count;
228 | }
229 | 
230 | ## usage: ipsorti(s, patts, max [, how])
231 | ## the bevavior is the same as that of psorti(), except that the array "s" is
232 | ## sorted in-place. the original indices are destroyed and replaced with
233 | ## sequential integers. everything else is described in psort() and psorti()
234 | ## above.
235 | function ipsorti(array, patterns, plen, how,    tmp, count, i) {
236 |   # make sure how is correct
237 |   if (length(how)) {
238 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
239 |       return -1;
240 |     }
241 | 
242 |   # how was not passed, use the default
243 |   } else {
244 |     how = "std asc";
245 |   }
246 | 
247 |   # loop over each index, and generate a new array with the original indices
248 |   # mapped to new numeric ones
249 |   count = 0;
250 |   for (i in array) {
251 |     tmp[++count] = i;
252 |     delete array[i];
253 |   }
254 | 
255 |   # copy tmp back over the original array
256 |   for (i=1; i<=count; i++) {
257 |     array[i] = tmp[i];
258 |     delete tmp[i];
259 |   }
260 | 
261 |   # seed the random number generator
262 |   srand();
263 | 
264 |   # actually sort
265 |   __pquicksort(array, 1, count, patterns, plen, how);
266 | 
267 |   # return the length
268 |   return count;
269 | }
270 | 
271 | 
272 | # You can do whatever you want with this stuff, but a thanks is always
273 | # appreciated
274 | 


--------------------------------------------------------------------------------
/qsort.awk:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | # comparison function
  4 | # usage: __compare(a, b, how)
  5 | # compares "a" and "b" based on "how", returning 0 for false and 1 for true.
  6 | # required for all of the qsort() functions below
  7 | function __compare(a, b, how) {
  8 |   # standard comparisons
  9 |   if (how == "std asc") {
 10 |     return a < b;
 11 |   } else if (how == "std desc") {
 12 |     return a > b;
 13 | 
 14 |   # force string comps
 15 |   } else if (how == "str asc") {
 16 |     return "a" a < "a" b;
 17 |   } else if (how == "str desc") {
 18 |     return "a" a > "a" b;
 19 | 
 20 |   # force numeric
 21 |   } else if (how == "num asc") {
 22 |     return +a < +b;
 23 |   } else if (how == "num desc") {
 24 |     return +a > +b;
 25 |   }
 26 | }
 27 | 
 28 | # actual sorting function
 29 | # sorts the values in "array" in-place, from indices "left" to "right", based
 30 | # on the comparison mode "how" (see the qsort() description).
 31 | # required for all of the qsort() functions below
 32 | function __quicksort(array, left, right, how,    piv, mid, tmp) {
 33 |   # return if array contains one element or less
 34 |   if ((right - left) <= 0) {
 35 |     return;
 36 |   }
 37 | 
 38 |   # choose random pivot
 39 |   piv = int(rand() * (right - left + 1)) + left;
 40 | 
 41 |   # swap left and pivot
 42 |   tmp = array[piv];
 43 |   array[piv] = array[left];
 44 |   array[left] = tmp;
 45 |   
 46 |   mid = left;
 47 |   # iterate over each element from the second to the last, and compare
 48 |   for (piv=left+1; piv<=right; piv++) {
 49 |     # if the comparison based on "how" is true...
 50 |     if (__compare(array[piv], array[left], how)) {
 51 |       # increment mid
 52 |       mid++;
 53 | 
 54 |       # swap mid and pivot
 55 |       tmp = array[piv];
 56 |       array[piv] = array[mid];
 57 |       array[mid] = tmp;
 58 |     }
 59 |   }
 60 | 
 61 |   # swap left and mid
 62 |   tmp = array[mid];
 63 |   array[mid] = array[left];
 64 |   array[left] = tmp;
 65 |   
 66 |   # recursively sort the two halves
 67 |   __quicksort(array, left, mid - 1, how);
 68 |   __quicksort(array, mid + 1, right, how);
 69 | }
 70 | 
 71 | # actual sorting function for the qsortv() function
 72 | # sorts the indices in "array" on the original values in "values", from indices
 73 | # "left" to "right", based on the comparison mode "how" (see the qsortv()
 74 | # description)
 75 | # required for the qsortv() function below
 76 | function __vquicksort(array, values, left, right, how,    piv, mid, tmp) {
 77 |   # return if array contains one element or less
 78 |   if ((right - left) <= 0) {
 79 |     return;
 80 |   }
 81 | 
 82 |   # choose random pivot
 83 |   piv = int(rand() * (right - left + 1)) + left;
 84 | 
 85 |   # swap left and pivot
 86 |   tmp = array[piv];
 87 |   array[piv] = array[left];
 88 |   array[left] = tmp;
 89 |   tmp = values[piv];
 90 |   values[piv] = values[left];
 91 |   values[left] = tmp;
 92 |   
 93 |   mid = left;
 94 |   # iterate over each element from the second to the last, and compare
 95 |   for (piv=left+1; piv<=right; piv++) {
 96 |     # if the comparison based on "how" is true...
 97 |     if (__compare(values[piv], values[left], how)) {
 98 |       # increment mid
 99 |       mid++;
100 | 
101 |       # swap mid and pivot
102 |       tmp = array[piv];
103 |       array[piv] = array[mid];
104 |       array[mid] = tmp;
105 |       tmp = values[piv];
106 |       values[piv] = values[mid];
107 |       values[mid] = tmp;
108 |     }
109 |   }
110 | 
111 |   # swap left and mid
112 |   tmp = array[mid];
113 |   array[mid] = array[left];
114 |   array[left] = tmp;
115 |   tmp = values[mid];
116 |   values[mid] = values[left];
117 |   values[left] = tmp;
118 |   
119 |   # recursively sort the two halves
120 |   __vquicksort(array, values, left, mid - 1, how);
121 |   __vquicksort(array, values, mid + 1, right, how);
122 | }
123 | 
124 | 
125 | 
126 | ## usage: qsort(s, d [, how])
127 | ## sorts the elements in the array "s" using awk's normal rules for comparing
128 | ## values, creating a new sorted array "d" indexed with sequential integers
129 | ## starting with 1. returns the length, or -1 if an error occurs.. leaves the
130 | ## indices of the source array "s" unchanged. the optional string "how" controls
131 | ## the direction and the comparison mode. uses the quick sort algorithm, with a
132 | ## random pivot to avoid worst-case behavior on already sorted arrays. this is
133 | ## not a stable sort. requires the __compare() and __quicksort() functions.
134 | ## valid values for "how" are:
135 | ##   "std asc"
136 | ##     use awk's standard rules for comparison, ascending. this is the default
137 | ##   "std desc"
138 | ##     use awk's standard rules for comparison, descending.
139 | ##   "str asc"
140 | ##     force comparison as strings, ascending.
141 | ##   "str desc"
142 | ##     force comparison as strings, descending.
143 | ##   "num asc"
144 | ##     force a numeric comparison, ascending.
145 | ##   "num desc"
146 | ##     force a numeric comparison, descending.
147 | function qsort(array, out, how,    count, i) {
148 |   # make sure how is correct
149 |   if (length(how)) {
150 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
151 |       return -1;
152 |     }
153 | 
154 |   # how was not passed, use the default
155 |   } else {
156 |     how = "std asc";
157 |   }
158 |   
159 |   # loop over each index, and generate a new array with the same values and
160 |   # sequential indices
161 |   count = 0;
162 |   for (i in array) {
163 |     out[++count] = array[i];
164 |   }
165 | 
166 |   # seed the random number generator
167 |   srand();
168 | 
169 |   # actually sort
170 |   __quicksort(out, 1, count, how);
171 | 
172 |   # return the length
173 |   return count;
174 | }
175 | 
176 | ## usage: iqsort(s [, how])
177 | ## the bevavior is the same as that of qsort(), except that the array "s" is
178 | ## sorted in-place. the original indices are destroyed and replaced with
179 | ## sequential integers. everything else is described in qsort() above.
180 | function iqsort(array, how,    tmp, count, i) {
181 |   # make sure how is correct
182 |   if (length(how)) {
183 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
184 |       return -1;
185 |     }
186 | 
187 |   # how was not passed, use the default
188 |   } else {
189 |     how = "std asc";
190 |   }
191 |   
192 |   # loop over each index, and generate a new array with the same values and
193 |   # sequential indices
194 |   count = 0;
195 |   for (i in array) {
196 |     tmp[++count] = array[i];
197 |     delete array[i];
198 |   }
199 | 
200 |   # copy tmp back over array
201 |   for (i=1; i<=count; i++) {
202 |     array[i] = tmp[i];
203 |     delete tmp[i];
204 |   }
205 | 
206 |   # seed the random number generator
207 |   srand();
208 | 
209 |   # actually sort
210 |   __quicksort(array, 1, count, how);
211 | 
212 |   # return the length
213 |   return count;
214 | }
215 | 
216 | ## usage: qsorti(s, d [, how])
217 | ## the behavior is the same as that of qsort(), except that the array indices
218 | ## are used for sorting, not the array values. when done, the new array is
219 | ## indexed numerically, and the values are those of the original indices.
220 | ## everything else is described in qsort() above.
221 | function qsorti(array, out, how,    count, i) {
222 |   # make sure how is correct
223 |   if (length(how)) {
224 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
225 |       return -1;
226 |     }
227 | 
228 |   # how was not passed, use the default
229 |   } else {
230 |     how = "std asc";
231 |   }
232 | 
233 |   # loop over each index, and generate a new array with the original indices
234 |   # mapped to new numeric ones
235 |   count = 0;
236 |   for (i in array) {
237 |     out[++count] = i;
238 |   }
239 | 
240 |   # seed the random number generator
241 |   srand();
242 | 
243 |   # actually sort
244 |   __quicksort(out, 1, count, how);
245 | 
246 |   # return the length
247 |   return count;
248 | }
249 | 
250 | ## usage: iqsorti(s [, how])
251 | ## the bevavior is the same as that of qsorti(), except that the array "s" is
252 | ## sorted in-place. the original indices are destroyed and replaced with
253 | ## sequential integers. everything else is described in qsort() and qsorti()
254 | ## above.
255 | function iqsorti(array, how,    tmp, count, i) {
256 |   # make sure how is correct
257 |   if (length(how)) {
258 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
259 |       return -1;
260 |     }
261 | 
262 |   # how was not passed, use the default
263 |   } else {
264 |     how = "std asc";
265 |   }
266 | 
267 |   # loop over each index, and generate a new array with the original indices
268 |   # mapped to new numeric ones
269 |   count = 0;
270 |   for (i in array) {
271 |     tmp[++count] = i;
272 |     delete array[i];
273 |   }
274 | 
275 |   # copy tmp back over the original array
276 |   for (i=1; i<=count; i++) {
277 |     array[i] = tmp[i];
278 |     delete tmp[i];
279 |   }
280 | 
281 |   # seed the random number generator
282 |   srand();
283 | 
284 |   # actually sort
285 |   __quicksort(array, 1, count, how);
286 | 
287 |   # return the length
288 |   return count;
289 | }
290 | 
291 | ## usage: qsortv(s, d [, how])
292 | ## sorts the indices in the array "s" based on the values, creating a new
293 | ## sorted array "d" indexed with sequential integers starting with 1, and the
294 | ## values the indices of "s". returns the length, or -1 if an error occurs.
295 | ## leaves the source array "s" unchanged. the optional string "how" controls
296 | ## the direction and the comparison mode. uses the quicksort algorithm, with a
297 | ## random pivot to avoid worst-case behavior on already sorted arrays. this is
298 | ## not a stable sort. requires the __compare() and __vquicksort() functions.
299 | ## valid values for "how" are explained in the qsort() function above.
300 | function qsortv(array, out, how,    values, count, i) {
301 |   # make sure how is correct
302 |   if (length(how)) {
303 |     if (how !~ /^(st[rd]|num) (a|de)sc$/) {
304 |       return -1;
305 |     }
306 | 
307 |   # how was not passed, use the default
308 |   } else {
309 |     how = "std asc";
310 |   }
311 | 
312 |   # loop over each index, and generate two new arrays: the original indices
313 |   # mapped to numeric ones, and the values mapped to the same indices
314 |   count = 0;
315 |   for (i in array) {
316 |     count++;
317 |     out[count] = i;
318 |     values[count] = array[i];
319 |   }
320 | 
321 |   # seed the random number generator
322 |   srand();
323 | 
324 |   # actually sort
325 |   __vquicksort(out, values, 1, count, how);
326 | 
327 |   # return the length
328 |   return count;
329 | }
330 | 
331 | 
332 | 
333 | # You can do whatever you want with this stuff, but a thanks is always
334 | # appreciated
335 | 


--------------------------------------------------------------------------------
/shuf.awk:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | # actual shuffle function
  4 | # shuffles the values in "array" in-place, from indices "left" to "right".
  5 | # required for all of the shuf() functions below
  6 | function __shuffle(array, left, right,    r, i, tmp) {
  7 |   # loop backwards over the elements
  8 |   for (i=right; i>left; i--) {
  9 |     # generate a random number between the start and current element
 10 |     r = int(rand() * (i - left + 1)) + left;
 11 | 
 12 |     # swap current element and randomly generated one
 13 |     tmp = array[i];
 14 |     array[i] = array[r];
 15 |     array[r] = tmp;
 16 |   }
 17 | }
 18 | 
 19 | 
 20 | 
 21 | ## usage: shuf(s, d)
 22 | ## shuffles the array "s", creating a new shuffled array "d" indexed with
 23 | ## sequential integers starting with one. returns the length, or -1 if an error
 24 | ## occurs. leaves the indices of the source array "s" unchanged. uses the knuth-
 25 | ## fisher-yates algorithm. requires the __shuffle() function.
 26 | function shuf(array, out,    count, i) {
 27 |   # loop over each index, and generate a new array with the same values and
 28 |   # sequential indices
 29 |   count = 0;
 30 |   for (i in array) {
 31 |     out[++count] = array[i];
 32 |   }
 33 | 
 34 |   # seed the random number generator
 35 |   srand();
 36 | 
 37 |   # actually shuffle
 38 |   __shuffle(out, 1, count);
 39 | 
 40 |   # return the length
 41 |   return count;
 42 | }
 43 | 
 44 | ## usage: ishuf(s)
 45 | ## the behavior is the same as that of shuf(), except the array "s" is sorted
 46 | ## in-place. the original indices are destroyed and replaced with sequential
 47 | ## integers. everything else is described in shuf() above.
 48 | function ishuf(array,    tmp, count, i) {
 49 |   # loop over each index, and generate a new array with the same values and
 50 |   # sequential indices
 51 |   count = 0;
 52 |   for (i in array) {
 53 |     tmp[++count] = array[i];
 54 |     delete array[i];
 55 |   }
 56 | 
 57 |   # copy tmp back over array
 58 |   for (i=1; i<=count; i++) {
 59 |     array[i] = tmp[i];
 60 |     delete tmp[i];
 61 |   }
 62 | 
 63 |   # seed the random number generator
 64 |   srand();
 65 | 
 66 |   # actually shuffle
 67 |   __shuffle(array, 1, count);
 68 | 
 69 |   # return the length
 70 |   return count;
 71 | }
 72 | 
 73 | ## usage: shufi(s, d)
 74 | ## the bevavior is the same as that of shuf(), except that the array indices
 75 | ## are shuffled, not the array values. when done, the new array is indexed
 76 | ## numerically, and the values are those of the original indices. everything
 77 | ## else is described in shuf() above.
 78 | function shufi(array, out,    count, i) {
 79 |   # loop over each index, and generate a new array with the original indices
 80 |   # mapped to new numeric ones
 81 |   count = 0;
 82 |   for (i in array) {
 83 |     out[++count] = i;
 84 |   }
 85 | 
 86 |   # seed the random number generator
 87 |   srand();
 88 | 
 89 |   # actually shuffle
 90 |   __shuffle(out, 1, count);
 91 | 
 92 |   # return the length
 93 |   return count;
 94 | }
 95 | 
 96 | ## usage: ishufi(s)
 97 | ## the behavior is tha same as that of shufi(), except that the array "s" is
 98 | ## sorted in-place. the original indices are destroyed and replaced with
 99 | ## sequential integers. everything else is describmed in shuf() and shufi()
100 | ## above.
101 | function ishufi(array,    tmp, count, i) {
102 |   # loop over each index, and generate a new array with the original indices
103 |   # mapped to new numeric ones
104 |   count = 0;
105 |   for (i in array) {
106 |     tmp[++count] = i;
107 |     delete array[i];
108 |   }
109 | 
110 |   # copy tmp back over the original array
111 |   for (i=1; i<=count; i++) {
112 |     array[i] = tmp[i];
113 |     delete tmp[i];
114 |   }
115 | 
116 |   # seed the random number generator
117 |   srand();
118 | 
119 |   # actually shuffle
120 |   __shuffle(array, 1, count);
121 | 
122 |   # return the length
123 |   return count;
124 | }
125 | 
126 | 
127 | 
128 | # You can do whatever you want with this stuff, but a thanks is always
129 | # appreciated
130 | 


--------------------------------------------------------------------------------
/strings.awk:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | # comparison function
  4 | # compares "A" and "b" based on "how", returning 0 for false and 1 for true
  5 | # required for all max() and min() functions below
  6 | function __mcompare(a, b, how) {
  7 |  # standard comparison
  8 |   if (how == "std") {
  9 |     return a > b;
 10 | 
 11 |   # force string comp
 12 |   } else if (how == "str") {
 13 |     return "a" a > "a" b;
 14 | 
 15 |   # force numeric
 16 |   } else if (how == "num") {
 17 |     return +a > +b;
 18 |   }
 19 | }
 20 | 
 21 | 
 22 | 
 23 | ## usage: center(string [, width])
 24 | ## returns "string" centered based on "width". if "width" is not provided (or 
 25 | ## is 0), uses the width of the terminal, or 80 if standard output is not open
 26 | ## on a terminal.
 27 | ## note: does not check the length of the string. if it's wider than the
 28 | ## terminal, it will not center lines other than the first. for best results,
 29 | ## combine with fold() (see the cfold script in the examples directory for a
 30 | ## script that does exactly this)
 31 | function center(str, cols,    off, cmd) {
 32 |   if (!cols) {
 33 |     # checks if stdout is a tty
 34 |     if (system("test -t 1")) {
 35 |       cols = 80;
 36 |     } else {
 37 |       cmd = "tput cols";
 38 |       cmd | getline cols;
 39 |       close(cmd);
 40 |     }
 41 |   }
 42 | 
 43 |   off = int((cols/2) + (length(str)/2));
 44 | 
 45 |   return sprintf("%*s", off, str);
 46 | }
 47 | 
 48 | ## usage: delete_arr(array)
 49 | ## deletes every element in "array"
 50 | function delete_arr(arr) {
 51 |   split("", arr);
 52 | }
 53 | 
 54 | ## usage: fold(string, sep [, width])
 55 | ## returns "string", wrapped, with lines broken on "sep" to "width" columns.
 56 | ## "sep" is a list of characters to break at, similar to IFS in a POSIX shell.
 57 | ## if "sep" is empty, wraps at exactly "width" characters. if "width" is not
 58 | ## provided (or is 0), uses the width of the terminal, or 80 if standard output
 59 | ## is not open on a terminal.
 60 | ## note: currently, tabs are squeezed to a single space. this will be fixed
 61 | function fold(str, sep, cols,    out, cmd, i, len, chars, c, last, f, first) {
 62 |   if (!cols) {
 63 |     # checks if stdout is a tty
 64 |     if (system("test -t 1")) {
 65 |       cols = 80;
 66 |     } else {
 67 |       cmd = "tput cols";
 68 |       cmd | getline cols;
 69 |       close(cmd);
 70 |     }
 71 |   }
 72 | 
 73 |   # squeeze tabs and newlines to spaces
 74 |   gsub(/[\t\n]/, " ", str);
 75 | 
 76 |   # if "sep" is empty, just fold on cols with substr
 77 |   if (!length(sep)) {
 78 |     len = length(str);
 79 | 
 80 |     out = substr(str, 1, cols);
 81 |     for (i=cols+1; i<=len; i+=cols) {
 82 |       out = out "\n" substr(str, i, cols);
 83 |     }
 84 | 
 85 |     return out;
 86 | 
 87 |   # otherwise, we have to loop over every character (can't split() on sep, it
 88 |   # would destroy the existing separators)
 89 |   } else {
 90 |     # split string into char array
 91 |     len = split(str, chars, "");
 92 |     # set boolean, used to assign the first line differently
 93 |     first = 1;
 94 | 
 95 |     for (i=1; i<=len; i+=last) {
 96 |       f = 0;
 97 |       for (c=i+cols-1; c>=i; c--) {
 98 |         if (index(sep, chars[c])) {
 99 |           last = c - i + 1;
100 |           f = 1;
101 |           break;
102 |         }
103 |       }
104 | 
105 |       if (!f) {
106 |         last = cols;
107 |       }
108 | 
109 |       if (first) {
110 |         out = substr(str, i, last);
111 |         first = 0;
112 |       } else {
113 |         out = out "\n" substr(str, i, last);
114 |       }
115 |     }
116 |   }
117 | 
118 |   # return the output
119 |   return out;
120 | }
121 | 
122 | ## usage: ssub(ere, repl [, in])
123 | ## behaves like sub, except returns the result and doesn't modify the original
124 | function ssub(ere, repl, str) {
125 |   # if "in" is not provided, use $0
126 |   if (!length(str)) {
127 |     str = $0;
128 |   }
129 | 
130 |   # substitute
131 |   sub(ere, repl, str);
132 |   return str;
133 | }
134 | 
135 | ## usage: sgsub(ere, repl [, in])
136 | ## behaves like gsub, except returns the result and doesn't modify the original
137 | function sgsub(ere, repl, str) {
138 |   # if "in" is not provided, use $0
139 |   if (!length(str)) {
140 |     str = $0;
141 |   }
142 | 
143 |   # substitute
144 |   gsub(ere, repl, str);
145 |   return str;
146 | }
147 | 
148 | ## usage: lsub(str, repl [, in])
149 | ## substites the string "repl" in place of the first instance of "str" in the
150 | ## string "in" and returns the result. does not modify the original string.
151 | ## if "in" is not provided, uses $0.
152 | function lsub(str, rep, val,    len, i) {
153 |   # if "in" is not provided, use $0
154 |   if (!length(val)) {
155 |     val = $0;
156 |   }
157 | 
158 |   # get the length of val, in order to know how much of the string to remove
159 |   if (!(len = length(str))) {
160 |     # if "str" is empty, just prepend "rep" and return
161 |     val = rep val;
162 |     return val;
163 |   }
164 | 
165 |   # substitute val for rep
166 |   if (i = index(val, str)) {
167 |     val = substr(val, 1, i - 1) rep substr(val, i + len);
168 |   }
169 | 
170 |   # return the result
171 |   return val;
172 | }
173 | 
174 | ## usage: glsub(str, repl [, in])
175 | ## behaves like lsub, except it replaces all occurances of "str"
176 | function glsub(str, rep, val,    out, len, i, a, l) {
177 |   # if "in" is not provided, use $0
178 |   if (!length(val)) {
179 |     val = $0;
180 |   }
181 |   # empty the output string
182 |   out = "";
183 | 
184 |   # get the length of val, in order to know how much of the string to remove
185 |   if (!(len = length(str))) {
186 |     # if "str" is empty, adds "rep" between every character and returns
187 |     l = split(val, a, "");
188 |     for (i=1; i<=l; i++) {
189 |       out = out rep a[i];
190 |     }
191 | 
192 |     return out rep;
193 |   }
194 | 
195 |   # loop while 'val' is in 'str'
196 |   while (i = index(val, str)) {
197 |     # append everything up to the search string, and the replacement, to out
198 |     out = out substr(val, 1, i - 1) rep;
199 |     # remove everything up to and including the first instance of str from val
200 |     val = substr(val, i + len);
201 |   }
202 | 
203 |   # append whatever is left in val to out and return
204 |   return out val;
205 | }
206 | 
207 | ## usage: shell_esc(string)
208 | ## returns the string escaped so that it can be used in a shell command
209 | function shell_esc(str) {
210 |   gsub(/'/, "'\\''", str);
211 | 
212 |   return "'" str "'";
213 | }
214 | 
215 | ## usage: str_to_arr(string, array)
216 | ## converts string to an array, one char per element, 1-indexed
217 | ## returns the array length
218 | function str_to_arr(str, arr) {
219 |   return split(str, arr, "");
220 | }
221 | 
222 | ## usage: extract_range(string, start, stop)
223 | ## extracts fields "start" through "stop" from "string", based on FS, with the
224 | ## original field separators intact. returns the extracted fields.
225 | function extract_range(str, start, stop,    i, re, out) {
226 |   # if FS is the default, trim leading and trailing spaces from "string" and
227 |   # set "re" to the appropriate regex
228 |   if (FS == " ") {
229 |     gsub(/^[[:space:]]+|[[:space:]]+$/, "", str);
230 |     re = "[[:space:]]+";
231 |   } else {
232 |     re = FS;
233 |   }
234 | 
235 |   # remove fields 1 through start - 1 from the beginning
236 |   for (i=1; i<start; i++) {
237 |     if (match(str, re)) {
238 |       str = substr(str, RSTART + RLENGTH);
239 | 
240 |     # there's no FS left, therefore the range is empty
241 |     } else {
242 |       return "";
243 |     }
244 |   }
245 | 
246 |   # add fields start through stop - 1 to the output var
247 |   for (i=start; i<stop; i++) {
248 |     if (match(str, re)) {
249 |       # append the field to the output
250 |       out = out substr(str, 1, RSTART + RLENGTH - 1);
251 | 
252 |       # remove the field from the line
253 |       str = substr(str, RSTART + RLENGTH);
254 | 
255 |     # no FS left, just append the rest of the line and return
256 |     } else {
257 |       return out str;
258 |     }
259 |   }
260 | 
261 |   # append the last field and return
262 |   if (match(str, re)) {
263 |     return out substr(str, 1, RSTART - 1);
264 |   } else {
265 |     return out str;
266 |   }
267 | }
268 | 
269 | ## usage: fwidths(width_spec [, string])
270 | ## extracts substrings from "string" according to "width_spec" from left to
271 | ## right and assigns them to $1, $2, etc. also assigns the NF variable. if
272 | ## "string" is not supplied, uses $0. "width_spec" is a space separated list of
273 | ## numbers that specify field widths, just like GNU awk's FIELDWIDTHS variable.
274 | ## if there is data left over after the last width_spec, adds it to a final
275 | ## field. returns the value for NF.
276 | function fwidths(wspec, str,    fw, i, len) {
277 |   if (!length(str)) {
278 |     str = $0;
279 |   }
280 | 
281 |   # turn wspec into the array fw
282 |   len = split(wspec, fw, / /);
283 |   
284 |   # loop over each wspec value, while the string is not exhausted
285 |   for (i=1; i <= len && length(str); i++) {
286 |     # assign the field
287 |     $i = substr(str, 1, fw[i]);
288 | 
289 |     # chop the value off of the original string
290 |     str = substr(str, fw[i] + 1);
291 |   }
292 | 
293 |   # if there's anything left, add another field
294 |   if (length(str)) {
295 |     $i = str;
296 |   } else {
297 |     i--;
298 |   }
299 | 
300 |   # set and return NF
301 |   return NF = i;
302 | }
303 | 
304 | ## usage: fwidths_arr(width_spec, array [, string])
305 | ## the behavior is the same as that of fwidths(), except that the values are
306 | ## assigned to "array", indexed with sequential integers starting with 1.
307 | ## returns the length. everything else is described in fwidths() above.
308 | function fwidths_arr(wspec, arr, str,    fw, i, len) {
309 |   if (!length(str)) {
310 |     str = $0;
311 |   }
312 | 
313 |   # turn wspec into the array fw
314 |   len = split(wspec, fw, / /);
315 | 
316 |   # loop over each wspec value, while the string is not exhausted
317 |   for (i=1; i <= len && length(str); i++) {
318 |     # assign the array element
319 |     arr[i] = substr(str, 1, fw[i]);
320 | 
321 |     # chop the value off of the original string
322 |     str = substr(str, fw[i] + 1);
323 |   }
324 | 
325 |   # if there's anything left, add another field
326 |   if (length(str)) {
327 |     arr[i] = str;
328 |   } else {
329 |     i--;
330 |   }
331 | 
332 |   # return the array length
333 |   return i;
334 | }
335 | 
336 | ## usage: lsplit(str, arr, sep)
337 | ## splits the string "str" into array elements "arr[1]", "arr[2]", .., "arr[n]",
338 | ## and returns "n". all elements of "arr" are deleted before the split is
339 | ## performed. the separation is done on the literal string "sep".
340 | function lsplit(str, arr, sep,    len, slen, i) {
341 |   # empty "arr"
342 |   split("", arr);
343 | 
344 |   # if "sep" is empty, just do a normal split
345 |   if (!(slen = length(sep))) {
346 |     return split(str, arr, "");
347 |   }
348 | 
349 |   # loop while "sep" is matched
350 |   while (i = index(str, sep)) {
351 |     # append field to array
352 |     arr[++len] = substr(str, 1, i - 1);
353 | 
354 |     # remove that portion (with the sep) from the string
355 |     str = substr(str, i + slen);
356 |   }
357 | 
358 |   # append last field to "arr"
359 |   arr[++len] = str;
360 | 
361 |   # return the length
362 |   return len;
363 | }
364 | 
365 | ## usage: ssplit(str, arr, seps [, ere])
366 | ## similar to GNU awk 4's "seps" functionality for split(). splits the string
367 | ## "str" into the array "arr" and the separators array "seps" on the regular
368 | ## expression "ere", and returns the number of fields. the value of "seps[i]"
369 | ## is the separator that appeared in front of "arr[i+1]". if "ere" is omitted or
370 | ## empty, FS is used instead. if "ere" is a single space, leading whitespace in
371 | ## "str" will go into the extra array element "seps[0]" and trailing whitespace
372 | ## will go into the extra array element "seps[len]", where "len" is the return
373 | ## value.
374 | ## note: /regex/ style quoting cannot be used for "ere".
375 | function ssplit(str, arr, seps, ere,    len, totrim) {
376 |   # if "ere" is unset or empty, use FS
377 |   if (!length(ere)) {
378 |     ere = FS;
379 |   }
380 | 
381 |   # if "ere" is a single space...
382 |   if (ere == " ") {
383 |     # set it to match all spaces
384 |     ere = "[[:space:]]+";
385 | 
386 |     # trim leading whitespace and assign it to seps[0]
387 |     if (match(str, /[^[:space:]]/)) {
388 |       seps[0] = substr(str, 1, RSTART - 1);
389 |       str = substr(str, RSTART);
390 | 
391 |     # no non-space characters in the line, just return
392 |     } else {
393 |       return 0;
394 |     }
395 | 
396 |     # don't put an empty element after the last separator
397 |     totrim = 1;
398 |   }
399 | 
400 | 
401 |   # loop while "ere" is matched 
402 |   while (match(str, ere)) {
403 |     # append field and sep to arrays
404 |     len++;
405 |     arr[len] = substr(str, 1, RSTART - 1);
406 |     seps[len] = substr(str, RSTART, RLENGTH);
407 | 
408 |     # remove matched portion from the string
409 |     str = substr(str, RSTART + RLENGTH);
410 |   }
411 | 
412 |   # append last field to "arr" if needed
413 |   if (length(str) || !totrim) {
414 |     arr[++len] = str;
415 |   }
416 | 
417 |   # return the length
418 |   return len;
419 | }
420 | 
421 | ## usage: ends_with(string, substring)
422 | ## returns 1 if "strings" ends with "substring", otherwise 0
423 | function ends_with(string, s) {
424 |   return substr(string, length(string) - length(s) + 1) == s;
425 | }
426 | 
427 | ## usage: trim(string)
428 | ## returns "string" with leading and trailing whitespace trimmed
429 | function trim(str) {
430 |   gsub(/^[[:blank:]]+|[[:blank:]]+$/, "", str);
431 | 
432 |   return str;
433 | }
434 | 
435 | ## usage: rev(string)
436 | ## returns "string" backwards
437 | function rev(str,    a, len, i, o) {
438 |   # split string into character array
439 |   len = split(str, a, "");
440 | 
441 |   # iterate backwards and append to the output string
442 |   for (i=len; i>0; i--) {
443 |     o = o a[i];
444 |   }
445 | 
446 |   return o;
447 | }
448 | 
449 | ## usage: max(array [, how ])
450 | ## returns the maximum value in "array", 0 if the array is empty, or -1 if an
451 | ## error occurs. the optional string "how" controls the comparison mode.
452 | ## requires the __mcompare() function.
453 | ## valid values for "how" are:
454 | ##   "std"
455 | ##     use awk's standard rules for comparison. this is the default
456 | ##   "str"
457 | ##     force comparison as strings
458 | ##   "num"
459 | ##     force a numeric comparison
460 | function max(array, how,    m, i, f) {
461 |   # make sure how is correct
462 |   if (length(how)) {
463 |     if (how !~ /^(st[rd]|num)$/) {
464 |       return -1;
465 |     }
466 | 
467 |   # how was not passed, use the default
468 |   } else {
469 |     how = "std";
470 |   }
471 | 
472 |   m = 0;
473 |   f = 1;
474 | 
475 |   # loop over each array value
476 |   for (i in array) {
477 |     # if this is the first iteration, use the value as m
478 |     if (f) {
479 |       m = array[i];
480 |       f = 0;
481 | 
482 |       continue;
483 |     }
484 | 
485 |     # otherwise, if it's greater than "m", reassign it
486 |     if (__mcompare(array[i], m, how)) {
487 |       m = array[i];
488 |     }
489 |   }
490 | 
491 |   return m;
492 | }
493 | 
494 | ## usage: maxi(array [, how ])
495 | ## the behavior is the same as that of max(), except that the array indices are
496 | ## used, not the array values. everything else is explained in max() above.
497 | function maxi(array, how,    m, i, f) {
498 |   # make sure how is correct
499 |   if (length(how)) {
500 |     if (how !~ /^(st[rd]|num)$/) {
501 |       return -1;
502 |     }
503 | 
504 |   # how was not passed, use the default
505 |   } else {
506 |     how = "std";
507 |   }
508 | 
509 |   m = 0;
510 |   f = 1;
511 | 
512 |   # loop over each index
513 |   for (i in array) {
514 |     # if this is the first iteration, use the value as m
515 |     if (f) {
516 |       m = i;
517 |       f = 0;
518 | 
519 |       continue;
520 |     }
521 | 
522 |     # otherwise, if it's greater than "m", reassign it
523 |     if (__mcompare(i, m, how)) {
524 |       m = i;
525 |     }
526 |   }
527 | 
528 |   return m;
529 | }
530 | 
531 | ## usage: min(array [, how ])
532 | ## the behavior is the same as that of max(), except that the minimum value is
533 | ## returned instead of the maximum. everything else is explained in max() above.
534 | function min(array, how,    m, i, f) {
535 |   # make sure how is correct
536 |   if (length(how)) {
537 |     if (how !~ /^(st[rd]|num)$/) {
538 |       return -1;
539 |     }
540 | 
541 |   # how was not passed, use the default
542 |   } else {
543 |     how = "std";
544 |   }
545 | 
546 |   m = 0;
547 |   f = 1;
548 | 
549 |   # loop over each index
550 |   for (i in array) {
551 |     # if this is the first iteration, use the value as m
552 |     if (f) {
553 |       m = array[i];
554 |       f = 0;
555 | 
556 |       continue;
557 |     }
558 | 
559 |     # otherwise, if it's less than "m", reassign it
560 |     if (__mcompare(m, array[i], how)) {
561 |       m = array[i];
562 |     }
563 |   }
564 | 
565 |   return m;
566 | }
567 | 
568 | ## usage: mini(array [, how ])
569 | ## the behavior is the same as that of min(), except that the array indices are
570 | ## used instead of the array values. everything else is explained in min() and
571 | ## max() above.
572 | function mini(array, how,    m, i, f) {
573 |   # make sure how is correct
574 |   if (length(how)) {
575 |     if (how !~ /^(st[rd]|num)$/) {
576 |       return -1;
577 |     }
578 | 
579 |   # how was not passed, use the default
580 |   } else {
581 |     how = "std";
582 |   }
583 | 
584 |   m = 0;
585 |   f = 1;
586 | 
587 |   # loop over each index
588 |   for (i in array) {
589 |     # if this is the first iteration, use the value as m
590 |     if (f) {
591 |       m = i;
592 |       f = 0;
593 | 
594 |       continue;
595 |     }
596 | 
597 |     # otherwise, if it's less than "m", reassign it
598 |     if (__mcompare(m, i, how)) {
599 |       m = i;
600 |     }
601 |   }
602 | 
603 |   return m;
604 | }
605 | 
606 | 
607 | 
608 | # You can do whatever you want with this stuff, but a thanks is always
609 | # appreciated
610 | 


--------------------------------------------------------------------------------
/sys.awk:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | ## usage: isatty(fd)
  4 | ## Checks if "fd" is open on a tty. Returns 1 if so, 0 if not, and -1 if an
  5 | ## error occurs
  6 | function isatty(fd) {
  7 |   # make sure fd is an int
  8 |   if (fd !~ /^[0-9]+$/) {
  9 |     return -1;
 10 |   }
 11 | 
 12 |   # actually test
 13 |   return !system("test -t " fd);
 14 | }
 15 | 
 16 | ## usage: mktemp(template [, type])
 17 | ## creates a temporary file or directory, safely, and returns its name.
 18 | ## if template is not a pathname, the file will be created in ENVIRON["TMPDIR"]
 19 | ## if set, otherwise /tmp. the last six characters of template must be "XXXXXX",
 20 | ## and these are replaced with a string that makes the filename unique. type, if
 21 | ## supplied, is either "f", "d", or "u": for file, directory, or dry run (just
 22 | ## returns the name, doesn't create a file), respectively. If template is not
 23 | ## provided, uses "tmp.XXXXXX". Files are created u+rw, and directories u+rwx,
 24 | ## minus umask restrictions. returns -1 if an error occurs.
 25 | function mktemp(template, type,
 26 |                 c, chars, len, dir, dir_esc, rstring, i, out, out_esc, umask,
 27 |                 cmd) {
 28 |   # portable filename characters
 29 |   c = "012345689ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
 30 |   len = split(c, chars, "");
 31 | 
 32 |   # make sure template is valid
 33 |   if (length(template)) {
 34 |     if (template !~ /XXXXXX$/) {
 35 |       return -1;
 36 |     }
 37 | 
 38 |   # template was not supplied, use the default
 39 |   } else {
 40 |     template = "tmp.XXXXXX";
 41 |   }
 42 | 
 43 |   # make sure type is valid
 44 |   if (length(type)) {
 45 |     if (type !~ /^[fdu]$/) {
 46 |       return -1;
 47 |     }
 48 | 
 49 |   # type was not supplied, use the default
 50 |   } else {
 51 |     type = "f";
 52 |   }
 53 | 
 54 |   # if template is a path...
 55 |   if (template ~ /\//) {
 56 |     dir = template;
 57 |     sub(/\/[^/]*$/, "", dir);
 58 |     sub(/.*\//, "", template);
 59 | 
 60 |   # template is not a path, determine base dir
 61 |   } else {
 62 |     if (length(ENVIRON["TMPDIR"])) {
 63 |       dir = ENVIRON["TMPDIR"];
 64 |     } else {
 65 |       dir = "/tmp";
 66 |     }
 67 |   }
 68 | 
 69 |   # escape dir for shell commands
 70 |   esc_dir = dir;
 71 |   sub(/'/, "'\\''", esc_dir);
 72 |   esc_dir = "'" esc_dir "'";
 73 | 
 74 |   # if this is not a dry run, make sure the dir exists
 75 |   if (type != "u" && system("test -d " esc_dir)) {
 76 |     return -1;
 77 |   }
 78 | 
 79 |   # get the base of the template, sans Xs
 80 |   template = substr(template, 0, length(template) - 6);
 81 |   
 82 |   # generate the filename
 83 |   do {
 84 |     rstring = "";
 85 |     for (i=0; i<6; i++) {
 86 |       c = chars[int(rand() * len) + 1];
 87 |       rstring = rstring c;
 88 |     }
 89 |     
 90 |     out_esc = out = dir "/" template rstring;
 91 |     sub(/'/, "'\\''", out_esc);
 92 |     out_esc = "'" out_esc "'";
 93 |   } while (!system("test -e " out_esc));
 94 | 
 95 |   # if needed, create the filename
 96 |   if (type == "f") {
 97 |     system("touch " out_esc);
 98 |     cmd = "umask";
 99 |     cmd | getline umask;
100 |     close(cmd);
101 |     umask = substr(umask, 2, 1);
102 |     system("chmod 0" 6 - umask "00 " out_esc);
103 |   } else if (type == "d") {
104 |     system("mkdir " out_esc);
105 |     cmd = "umask";
106 |     cmd | getline umask;
107 |     close(cmd);
108 |     umask = substr(umask, 2, 1);
109 |     system("chmod 0" 7 - umask "00 " out_esc);
110 |   }
111 | 
112 |   # return the filename
113 |   return out;
114 | }
115 | 
116 | 
117 | 
118 | # You can do whatever you want with this stuff, but a thanks is always
119 | # appreciated
120 | 


--------------------------------------------------------------------------------
/times.awk:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/awk -f
  2 | 
  3 | ## usage: month_to_num(month)
  4 | ## converts human readable month to the decimal representation
  5 | ## returns the number, -1 if the month doesn't exist
  6 | function month_to_num(mon,    months, m) {
  7 |   # populate months[] array
  8 |   months["january"] =  1; months["february"] =  2; months["march"]     =  3;
  9 |   months["april"]   =  4; months["may"]      =  5; months["june"]      =  6;
 10 |   months["july"]    =  7; months["august"]   =  8; months["september"] =  9;
 11 |   months["october"] = 10; months["november"] = 11; months["december"]  = 12;
 12 | 
 13 |   # also populate abbreviations
 14 |   for (m in months) {
 15 |     months[substr(m, 1, 3)] = months[m];
 16 |   }
 17 | 
 18 |   # convert month to lowercase
 19 |   mon = tolower(mon);
 20 | 
 21 |   # check if month exists
 22 |   if (mon in months) {
 23 |     return months[mon];
 24 |   } else {
 25 |     return -1;
 26 |   }
 27 | }
 28 | 
 29 | ## usage: day_to_num(day)
 30 | ## converts human readable day to the decimal representation
 31 | ## returns the number, -1 if the day doesn't exist
 32 | ## like date +%w, sunday is 0
 33 | function day_to_num(day,    days, d) {
 34 |   # populate days[] array
 35 |     days["sunday"]    = 0; days["monday"]   = 1; days["tuesday"] = 2;
 36 |     days["wednesday"] = 3; days["thursday"] = 4; days["friday"]  = 5;
 37 |     days["saturday"]  = 6;
 38 | 
 39 |   # also populate abbreviations
 40 |     days["sun"]   = 0; days["mon"] = 1; days["tues"] = 2; days["wed"] = 3;
 41 |     days["thurs"] = 4; days["fri"] = 5; days["sat"]  = 6;
 42 | 
 43 |   # convert day to lowercase
 44 |     day = tolower(day);
 45 | 
 46 |   # check if day exists
 47 |   if (day in days) {
 48 |     return days[day];
 49 |   } else {
 50 |     return -1;
 51 |   }
 52 | }
 53 | 
 54 | ## usage: hr_to_sec(timestamp)
 55 | ## converts HH:MM:SS or MM:SS to seconds
 56 | ## returns -1 if invalid format
 57 | function hr_to_sec(time,    t, l, i, j) {
 58 |   # check for valid format
 59 |   if (time !~ /^[0-9]+(:[0-9][0-9])?:[0-9][0-9]$/) {
 60 |     return -1;
 61 |   }
 62 | 
 63 |   # convert
 64 |   l = split(time, t, /:/);
 65 |   
 66 |   j = time = 0;
 67 |   for (i=l; i>0; i--) {
 68 |     time += t[i] * (60 ^ j++);
 69 |   }
 70 | 
 71 |   return time;
 72 | }
 73 | 
 74 | ## usage: sec_to_hr(seconds)
 75 | ## converts seconds to HH:MM:SS
 76 | function sec_to_hr(sec,    m, s) {
 77 |   s = sec % 60;
 78 |   sec = int(sec / 60);
 79 |   m = sec % 60;
 80 |   sec = int(sec / 60);
 81 | 
 82 |   return sprintf("%02d:%02d:%02d", sec, m, s);
 83 | }
 84 | 
 85 | ## usage: ms_to_hr(milliseconds)
 86 | ## converts milliseconds to a "time(1)"-similar human readable format, such
 87 | ## as 1m4.356s
 88 | function ms_to_hr(ms,    m, s, ns) {
 89 |   ms = ms / 1000;
 90 |   s = int(ms);
 91 |   m = int(s / 60);
 92 |   ns = s % 60;
 93 | 
 94 |   return sprintf("%dm%0.3fs", m, ns + (ms - s));
 95 | }
 96 | 
 97 | ## usage: add_day_suff(day_of_month)
 98 | ## prepends the appropriate suffix to "day_of_month". for example,
 99 | ## add_day_suff(1) will return "1st", and add_day_suff(22) will return "22nd"
100 | ## returns -1 if "day_of_month" is not a positive integer
101 | function add_day_suff(day) {
102 |   # make sure day is a positive int
103 |   if (day !~ /^[0-9]+$/ || day <= 0) {
104 |     return -1;
105 |   }
106 | 
107 |   # append prefix
108 |   if ((day > 3 && day < 21) || day ~ /[04-9]$/) {
109 |     return day "th";
110 |   } else if (day ~ /1$/) {
111 |     return day "st";
112 |   } else if (day ~ /2$/) {
113 |     return day "nd";
114 |   } else {
115 |     return day "rd";
116 |   }
117 | }
118 | 
119 | 
120 | 
121 | 
122 | # You can do whatever you want with this stuff, but a thanks is always
123 | # appreciated
124 | 


--------------------------------------------------------------------------------