├── README ├── colors.awk ├── csv.awk ├── examples ├── cfold ├── colors ├── csv ├── math ├── options ├── sort ├── strings ├── sys └── times ├── math.awk ├── msort.awk ├── options.awk ├── psort.awk ├── qsort.awk ├── shuf.awk ├── strings.awk ├── sys.awk └── times.awk /README: -------------------------------------------------------------------------------- 1 | AWK library function descriptions 2 | 3 | Every function below is fully POSIX compliant, and has been tested on gawk 3 4 | and 4, as well as nawk 20110810 and mawk 1.3.4. Interval notation has not been 5 | used in this library, even though POSIX states that it should be supported, 6 | as most of the current implementations still do not support it. 7 | 8 | Note: mawk 1.3.3 is even less POSIX compliant than 1.3.4, and doesn't handle 9 | POSIX character classes in regexes (like [:space:] or [:alpha:]), among other 10 | things. It is currently the standard on ubuntu, and is most likely standard on 11 | other debian-based linux distributions, as well. The functions below are not 12 | guaranteed to work on versions of mawk prior to 1.3.4, although they should not 13 | be too difficult to alter in order to do so. 14 | 15 | If you are using gawk, I recommend adding the location of this repo to the 16 | AWKPATH environment variable. This will allow you to only supply the file name 17 | to -f and @include, instead of having to supply the actual path to the library. 18 | 19 | 20 | The 'examples' directory includes a sample script for each library, with sample 21 | usage of each function. While most of the examples are solely there to give 22 | examples, the "cfold" script is fully functioning and is (in my opinion) rather 23 | useful. It shows just how powerful these libraries can be... most of the script 24 | is just there to parse options. These examples are written with gawk extensions. 25 | Making them POSIX is left as an exercise to the user, if desired. 26 | 27 | 28 | Most of the functions in this library work by themselves, with the exception of 29 | the functions in sort.awk and the max() and min() functions in strings.awk. 30 | This means that they can easily be copy/pasted into a script, and will function 31 | fine on their own. In the case of sort.awk, the functions that the others depend 32 | on begin with '__', and which functions they go with (as well as which functions 33 | require what) are explained in the comments. 34 | 35 | 36 | 37 | 38 | Libraries, and the available functions within: 39 | 40 | math.awk 41 | 42 | abs(number) 43 | returns the absolute value of "number" 44 | 45 | ceil(number) 46 | returns "number" rounded UP to the nearest int 47 | 48 | ceiling(multiple, number) 49 | returns "number" rounded UP to the nearest multiple of "multiple". integers 50 | only 51 | 52 | floor(multiple, number) 53 | returns "number" rounded DOWN to the nearest multiple of "multiple". 54 | integers only 55 | 56 | round(multiple, number) 57 | returns "number" rounded to the nearest multiple of "multiple". integers 58 | only 59 | 60 | rint(number) 61 | returns "number" rounded to the nearest integer 62 | 63 | change_base(number, start_base, end_base) 64 | converts "number" from "start_base" to "end_base" 65 | bases must be between 2 and 64. the digits greater than 9 are represented 66 | by the lowercase letters, the uppercase letters, @, and _, in that order. 67 | if ibase is less than or equal to 36, lowercase and uppercase letters may 68 | be used interchangeably to represent numbers between 10 and 35. integers 69 | only. returns 0 if any argument is invalid 70 | 71 | format_num(number) 72 | adds commas to "number" to make it more readable. for example, 73 | format_num(1000) will return "1,000", and format_num(123456.7890) will 74 | return "123,456.7890". also trims leading zeroes 75 | returns 0 if "number" is not a valid number 76 | 77 | str_to_num(string) 78 | examines "string", and returns its numeric value. if "string" begins with a 79 | leading 0, assumes that "string" is an octal number. if "string" begins with 80 | a leading "0x" or "0X", assumes that "string" is a hexadecimal number. 81 | otherwise, decimal is assumed. 82 | 83 | isint(string) 84 | returns 1 if "string" is a valid integer, otherwise 0 85 | 86 | isnum(string) 87 | returns 1 if "string" is a valid number, otherwise 0 88 | 89 | isprime(number) 90 | returns 1 if "number" is a prime number, otherwise 0. "number" must be a 91 | positive integer greater than one 92 | 93 | gcd(a, b) 94 | returns the greatest common denominator (greatest common factor) of a and b. 95 | both a and b must be positive integers. uses the recursive euclid algorithm. 96 | 97 | lcm(a, b) 98 | returns the least common multiple of a and b. both a and b must be positive 99 | integers. 100 | 101 | calc_e() 102 | approximates e by calculating the sumation from k=0 to k=50 of 1/k! 103 | returns 10 decimal places 104 | 105 | calc_pi() 106 | returns pi, with an accuracy of 10 decimal places 107 | 108 | calc_tau() 109 | returns tau, with an accuracy of 10 decimal places 110 | http://tauday.com/tau-manifesto 111 | 112 | deg_to_rad(degrees) 113 | converts degrees to radians 114 | 115 | rad_to_deg(radians) 116 | converts radians to degrees 117 | 118 | tan(expr) 119 | returns the tangent of expr, which is in radians 120 | 121 | csc(expr) 122 | returns the cosecant of expr, which is in radians 123 | 124 | sec(expr) 125 | returns the secant of expr, which is in radians 126 | 127 | cot(expr) 128 | returns the cotangent of expr, which is in radians 129 | 130 | 131 | 132 | sys.awk 133 | 134 | isatty(fd) 135 | Checks if "fd" is open on a tty. Returns 1 if so, 0 if not, and -1 if an 136 | error occurs 137 | 138 | mktemp(template [, type]) 139 | creates a temporary file or directory, safely, and returns its name. 140 | if template is not a pathname, the file will be created in ENVIRON["TMPDIR"] 141 | if set, otherwise /tmp. the last six characters of template must be "XXXXXX", 142 | and these are replaced with a string that makes the filename unique. type, if 143 | supplied, is either "f", "d", or "u": for file, directory, or dry run (just 144 | returns the name, doesn't create a file), respectively. If template is not 145 | provided, uses "tmp.XXXXXX". Files are created u+rw, and directories u+rwx, 146 | minus umask restrictions. returns -1 if an error occurs. 147 | 148 | 149 | 150 | strings.awk 151 | 152 | center(string [, width]) 153 | returns "string" centered based on "width". if "width" is not provided (or 154 | is 0), uses the width of the terminal, or 80 if standard output is not open 155 | on a terminal. 156 | note: does not check the length of the string. if it's wider than the 157 | terminal, it will not center lines other than the first. for best results, 158 | combine with fold() (see the "cfold" script in the "examples" directory for 159 | a script that does exactly this!) 160 | 161 | delete_arr(array) 162 | deletes every element in "array" 163 | 164 | fold(string, sep [, width]) 165 | returns "string", wrapped, with lines broken on "sep" to "width" columns. 166 | "sep" is a list of characters to break at, similar to IFS in a POSIX shell. 167 | if "sep" is empty, wraps at exactly "width" characters. if "width" is not 168 | provided (or is 0), uses the width of the terminal, or 80 if standard output 169 | is not open on a terminal. 170 | note: currently, tabs are squeezed to a single space. this will be fixed 171 | 172 | shell_esc(string) 173 | returns the string escaped so that it can be used in a shell command 174 | 175 | ssub(ere, repl [, in]) 176 | behaves like sub, except returns the result and doesn't modify "in". 177 | note: 'ere' must not use /.../ literal regex quoting 178 | 179 | sgsub(ere, repl [, in]) 180 | behaves like gsub, except returns the result and doesn't modify "in". 181 | note: 'ere' must not use /.../ literal regex quoting 182 | 183 | lsub(str, repl [, in]) 184 | substites the string "repl" in place of the first instance of "str" in the 185 | string "in" and returns the result. does not modify the original string. if 186 | "in" is not provided, uses $0 187 | 188 | glsub(str, repl [, in]) 189 | behaves like lsub, except it replaces all occurances of "str" 190 | note: does not work in mawk when 'str' is empty 191 | 192 | str_to_arr(string, array) 193 | converts string to an array, one char per element, 1-indexed 194 | returns the array length 195 | 196 | extract_range(string, start, stop) 197 | extracts fields "start" through "stop" from "string", based on FS, with the 198 | original field separators intact. returns the extracted fields. 199 | 200 | fwidths(width_spec [, string]) 201 | extracts substrings from "string" according to "width_spec" from left to 202 | right and assigns them to $1, $2, etc. also assigns the NF variable. if 203 | "string" is not supplied, uses $0. "width_spec" is a space separated list of 204 | numbers that specify field widths, just like GNU awk's FIELDWIDTHS variable. 205 | if there is data left over after the last width_spec, adds it to a final 206 | field. returns the value for NF. 207 | 208 | fwidths_arr(width_spec, array [, string]) 209 | the behavior is the same as that of fwidths(), except that the values are 210 | assigned to "array", indexed with sequential integers starting with 1. 211 | returns the length. everything else is described in fwidths() above. 212 | 213 | lsplit(str, arr, sep) 214 | splits the string "str" into array elements "arr[1]", "arr[2]", .., "arr[n]", 215 | and returns "n". all elements of "arr" are deleted before the split is 216 | performed. the separation is done on the literal string "sep". 217 | 218 | ssplit(str, arr, seps [, ere]) 219 | similar to GNU awk 4's "seps" functionality for split(). splits the string 220 | "str" into the array "arr" and the separators array "seps" on the regular 221 | expression "ere", and returns the number of fields. the value of "seps[i]" 222 | is the separator that appeared in front of "arr[i+1]". if "ere" is omitted or 223 | empty, FS is used instead. if "ere" is a single space, leading whitespace in 224 | "str" will go into the extra array element "seps[0]" and trailing whitespace 225 | will go into the extra array element "seps[len]", where "len" is the return 226 | value. 227 | note: /regex/ style quoting cannot be used for "ere". 228 | 229 | ends_with(string, substring) 230 | returns 1 if "strings" ends with "substring", otherwise 0 231 | 232 | trim(string) 233 | returns "string" with leading and trailing whitespace trimmed 234 | 235 | rev(string) 236 | returns "string" backwards 237 | 238 | max(array [, how ]) 239 | returns the maximum value in "array", 0 if the array is empty, or -1 if an 240 | error occurs. the optional string "how" controls the comparison mode. 241 | requires the __mcompare() function. 242 | valid values for "how" are: 243 | "std" 244 | use awk's standard rules for comparison. this is the default 245 | "str" 246 | force comparison as strings 247 | "num" 248 | force a numeric comparison 249 | 250 | maxi(array [, how ]) 251 | the behavior is the same as that of max(), except that the array indices are 252 | used, not the array values. everything else is explained in max() above. 253 | 254 | min(array [, how ]) 255 | the behavior is the same as that of max(), except that the minimum value is 256 | returned instead of the maximum. everything else is explained in max() above. 257 | 258 | mini(array [, how ]) 259 | the behavior is the same as that of min(), except that the array indices are 260 | used instead of the array values. everything else is explained in min() and 261 | max() above. 262 | 263 | 264 | 265 | msort.awk 266 | 267 | msort(s, d [, how]) 268 | sorts the elements in the array "s" using awk's normal rules for comparing 269 | values, creating a new sorted array "d" indexed with sequential integers 270 | starting with 1. returns the length, or -1 if an error occurs.. leaves the 271 | indices of the source array "s" unchanged. the optional string "how" controls 272 | the direction and the comparison mode. uses the merge sort algorithm, with an 273 | insertion sort when the list size gets small enough. this is not a stable 274 | sort. requires the __compare() and __mergesort() functions. 275 | valid values for "how" are: 276 | "std asc" 277 | use awk's standard rules for comparison, ascending. this is the default 278 | "std desc" 279 | use awk's standard rules for comparison, descending. 280 | "str asc" 281 | force comparison as strings, ascending. 282 | "str desc" 283 | force comparison as strings, descending. 284 | "num asc" 285 | force a numeric comparison, ascending. 286 | "num desc" 287 | force a numeric comparison, descending. 288 | 289 | imsort(s [, how]) 290 | the bevavior is the same as that of msort(), except that the array "s" is 291 | sorted in-place. the original indices are destroyed and replaced with 292 | sequential integers. everything else is described in msort() above. 293 | 294 | msorti(s, d [, how]) 295 | the behavior is the same as that of msort(), except that the array indices 296 | are used for sorting, not the array values. when done, the new array is 297 | indexed numerically, and the values are those of the original indices. 298 | everything else is described in msort() above. 299 | 300 | imsorti(s [, how]) 301 | the bevavior is the same as that of msorti(), except that the array "s" is 302 | sorted in-place. the original indices are destroyed and replaced with 303 | sequential integers. everything else is described in msort() and msorti() 304 | above. 305 | 306 | msortv(s, d [, how]) 307 | sorts the indices in the array "s" based on the values, creating a new 308 | sorted array "d" indexed with sequential integers starting with 1, and the 309 | values the indices of "s". returns the length, or -1 if an error occurs. 310 | leaves the source array "s" unchanged. the optional string "how" controls 311 | the direction and the comparison mode. uses the merge sort algorithm, with 312 | an insertion sort when the list size gets small enough. this is not a stable 313 | sort. requires the __compare() and __mergesortv() functions. valid values for 314 | "how" are explained in the msort() function above. 315 | 316 | 317 | 318 | qsort.awk 319 | 320 | qsort(s, d [, how]) 321 | sorts the elements in the array "s" using awk's normal rules for comparing 322 | values, creating a new sorted array "d" indexed with sequential integers 323 | starting with 1. returns the length, or -1 if an error occurs.. leaves the 324 | indices of the source array "s" unchanged. the optional string "how" controls 325 | the direction and the comparison mode. uses the quick sort algorithm, with a 326 | random pivot to avoid worst-case behavior on already sorted arrays. this is 327 | not a stable sort. requires the __compare() and __quicksort() functions. 328 | valid values for "how" are: 329 | "std asc" 330 | use awk's standard rules for comparison, ascending. this is the default 331 | "std desc" 332 | use awk's standard rules for comparison, descending. 333 | "str asc" 334 | force comparison as strings, ascending. 335 | "str desc" 336 | force comparison as strings, descending. 337 | "num asc" 338 | force a numeric comparison, ascending. 339 | "num desc" 340 | force a numeric comparison, descending. 341 | 342 | iqsort(s [, how]) 343 | the bevavior is the same as that of qsort(), except that the array "s" is 344 | sorted in-place. the original indices are destroyed and replaced with 345 | sequential integers. everything else is described in qsort() above. 346 | 347 | qsorti(s, d [, how]) 348 | the behavior is the same as that of qsort(), except that the array indices 349 | are used for sorting, not the array values. when done, the new array is 350 | indexed numerically, and the values are those of the original indices. 351 | everything else is described in qsort() above. 352 | 353 | iqsorti(s [, how]) 354 | the bevavior is the same as that of qsorti(), except that the array "s" is 355 | sorted in-place. the original indices are destroyed and replaced with 356 | sequential integers. everything else is described in qsort() and qsorti() 357 | above. 358 | 359 | qsortv(s, d [, how]) 360 | sorts the indices in the array "s" based on the values, creating a new 361 | sorted array "d" indexed with sequential integers starting with 1, and the 362 | values the indices of "s". returns the length, or -1 if an error occurs. 363 | leaves the source array "s" unchanged. the optional string "how" controls 364 | the direction and the comparison mode. uses the quicksort algorithm, with a 365 | random pivot to avoid worst-case behavior on already sorted arrays. this is 366 | not a stable sort. requires the __compare() and __vquicksort() functions. 367 | valid values for "how" are explained in the qsort() function above. 368 | 369 | 370 | 371 | psort.awk 372 | 373 | psort(s, d, patts, max [, how]) 374 | sorts the values of the array "s", based on the rules below. creates a new 375 | sorted array "d" indexed with sequential integers starting with 1. "patts" 376 | is a compact (*non-sparse) 1-indexed array containing regular expressions. 377 | "max" is the length of the "patts" array. returns the length of the "d" 378 | array. valid values for "how" are explained below. uses the quicksort 379 | algorithm, with a random pivot to avoid worst-case behavior on already sorted 380 | arrays. requires the __pcompare() and __pquicksort() functions. 381 | Sorting rules: 382 | - When sorting, values matching an expression in the "patts" array will 383 | take priority over any other values 384 | - Each expression in the "patts" array will have priority in ascending 385 | order by index. "patts[1]" will have priority over "patts[2]" and 386 | "patts[3]", etc 387 | - Values both matching the same regex will be compared as usual 388 | - All non-matching values will be compared as usual 389 | valid values for "how" are: 390 | "std asc" 391 | use awk's standard rules for comparison, ascending. this is the default 392 | "std desc" 393 | use awk's standard rules for comparison, descending. 394 | "str asc" 395 | force comparison as strings, ascending. 396 | "str desc" 397 | force comparison as strings, descending. 398 | "num asc" 399 | force a numeric comparison, ascending. 400 | "num desc" 401 | force a numeric comparison, descending. 402 | 403 | ipsort(s, patts, max [, how]) 404 | the bevavior is the same as that of psort(), except that the array "s" is 405 | sorted in-place. the original indices are destroyed and replaced with 406 | sequential integers. everything else is described in psort() above. 407 | 408 | psorti(s, d, patts, max [, how]) 409 | the behavior is the same as that of psort(), except that the array indices 410 | are used for sorting, not the array values. when done, the new array is 411 | indexed numerically, and the values are those of the original indices. 412 | everything else is described in psort() above. 413 | 414 | ipsorti(s, patts, max [, how]) 415 | the bevavior is the same as that of psorti(), except that the array "s" is 416 | sorted in-place. the original indices are destroyed and replaced with 417 | sequential integers. everything else is described in psort() and psorti() 418 | above. 419 | 420 | 421 | 422 | shuf.awk 423 | 424 | shuf(s, d) 425 | shuffles the array "s", creating a new shuffled array "d" indexed with 426 | sequential integers starting with one. returns the length, or -1 if an error 427 | occurs. leaves the indices of the source array "s" unchanged. uses the knuth- 428 | fisher-yates algorithm. requires the __shuffle() function. 429 | 430 | ishuf(s) 431 | the behavior is the same as that of shuf(), except the array "s" is sorted 432 | in-place. the original indices are destroyed and replaced with sequential 433 | integers. everything else is described in shuf() above. 434 | 435 | shufi(s, d) 436 | the bevavior is the same as that of shuf(), except that the array indices 437 | are shuffled, not the array values. when done, the new array is indexed 438 | numerically, and the values are those of the original indices. everything 439 | else is described in shuf() above. 440 | 441 | ishufi(s) 442 | the behavior is tha same as that of shufi(), except that the array "s" is 443 | sorted in-place. the original indices are destroyed and replaced with 444 | sequential integers. everything else is describmed in shuf() and shufi() 445 | above. 446 | 447 | 448 | 449 | csv.awk 450 | 451 | create_line(array, max [, sep [, qualifier [, quote_type] ] ]) 452 | Generates an output line in quoted CSV format, from the contents of "array" 453 | "array" is expected to be an indexed array (1-indexed). "max" is the highest 454 | index to be used. "sep", if provided, is the field separator. If it is more 455 | than one character, the first character in the string is used. By default, 456 | it is a comma. "qualifier", if provided, is the quote character. Like "sep", 457 | it is one character. The default value is `"'. "quote_type", if provided, is 458 | used to determine how the output fields are quoted. Valid values are given 459 | below. For example, the array: a[1]="foo"; a[2]="bar,quux"; a[3]="blah\"baz" 460 | when called with create_line(a, 3), will return: "foo","bar,quux","blah""baz" 461 | note: expects a non-sparse array. empty or unset values will become 462 | empty fields 463 | Valid values for "quote_type": 464 | "t": Quote all strings, do not quote numbers. This is the default 465 | "a": Quote all fields 466 | "m": Only quote fields with commas or quote characters in them 467 | 468 | qsplit(string, array [, sep [, qualifier] ]) 469 | a version of split() designed for CSV-like data. splits "string" on "sep" 470 | (,) if not provided, into array[1], array[2], ... array[n]. returns "n", or 471 | "-1 * n" if the line is incomplete (it has an uneven number of quotes). both 472 | "sep" and "qualifier" will use the first character in the provided string. 473 | uses "qualifier" (" if not provided) and ignores "sep" within quoted fields. 474 | doubled qualifiers are considered escaped, and a single qualifier character 475 | is used in its place. for example, foo,"bar,baz""blah",quux will be split as 476 | such: array[1] = "foo"; array[2] = "bar,baz\"blah"; array[3] = "quux"; 477 | 478 | 479 | 480 | options.awk 481 | 482 | getopts(optstring [, longopt_array ]) 483 | parses options, and deletes them from ARGV. "optstring" is of the form 484 | "ab:c". each letter is a possible option. if the letter is followed by a 485 | colon (:), then the option requires an argument. if an argument is not 486 | provided, or an invalid option is given, getopts will print the appropriate 487 | error message and return "?". returns each option as it's read, and -1 when 488 | no options are left. "optind" will be set to the index of the next 489 | non-option argument when finished. "optarg" will be set to the option's 490 | argument, when provided. if not provided, "optarg" will be empty. "optname" 491 | will be set to the current option, as provided. getopts will delete each 492 | option and argument that it successfully reads, so awk will be able to treat 493 | whatever's left as filenames/assignments, as usual. if provided, 494 | "longopt_array" is the name of an associative array that maps long options 495 | to the appropriate short option. (do not include the hyphens on either). 496 | sample usage can be found in the examples dir, with gawk extensions, or in 497 | the ogrep script for a POSIX example: https://github.com/e36freak/ogrep 498 | 499 | 500 | 501 | times.awk 502 | 503 | month_to_num(month) 504 | converts human readable month to the decimal representation 505 | returns the number, -1 if the month doesn't exist 506 | 507 | day_to_num(day) 508 | converts human readable day to the decimal representation 509 | returns the number, -1 if the day doesn't exist 510 | like date +%w, sunday is 0 511 | 512 | hr_to_sec(timestamp) 513 | converts HH:MM:SS to seconds, returns -1 if invalid format 514 | 515 | sec_to_hr(seconds) 516 | converts seconds to HH:MM:SS 517 | 518 | ms_to_hr(milliseconds) 519 | converts milliseconds to a "time(1)"-similar human readable format, such 520 | as 1m4.356s 521 | 522 | add_day_suff(day_of_month) 523 | prepends the appropriate suffix to "day_of_month". for example, 524 | add_day_suff(1) will return "1st", and add_day_suff(22) will return "22nd" 525 | returns -1 if "day_of_month" is not a positive integer 526 | 527 | 528 | 529 | colors.awk 530 | set_cols(array) 531 | sets the following values in "array" with tput. printing them will format 532 | any text afterwards. colors and formats are: 533 | bold - bold text (can be combined with a color) 534 | black - black text 535 | red - red text 536 | green - green text 537 | yellow - yellow text 538 | blue - blue text 539 | magenta - magenta text 540 | cyan - cyan text 541 | white - white text 542 | reset - resets to default settings 543 | 544 | 545 | You can do whatever you want with this stuff, but a thanks is always appreciated 546 | -------------------------------------------------------------------------------- /colors.awk: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | ## usage: set_cols(array) 4 | ## sets the following values in "array" with tput. printing them will format 5 | ## any text afterwards. colors and formats are: 6 | ## bold - bold text (can be combined with a color) 7 | ## black - black text 8 | ## red - red text 9 | ## green - green text 10 | ## yellow - yellow text 11 | ## blue - blue text 12 | ## magenta - magenta text 13 | ## cyan - cyan text 14 | ## white - white text 15 | ## reset - resets to default settings 16 | function set_cols(array) { 17 | # bold 18 | cmd = "tput bold"; 19 | cmd | getline array["bold"]; 20 | close(cmd); 21 | # black 22 | cmd = "tput setaf 0"; 23 | cmd | getline array["black"]; 24 | close(cmd); 25 | # red 26 | cmd = "tput setaf 1"; 27 | cmd | getline array["red"]; 28 | close(cmd); 29 | # green 30 | cmd = "tput setaf 2"; 31 | cmd | getline array["green"]; 32 | close(cmd); 33 | # yellow 34 | cmd = "tput setaf 3"; 35 | cmd | getline array["yellow"]; 36 | close(cmd); 37 | # blue 38 | cmd = "tput setaf 4"; 39 | cmd | getline array["blue"]; 40 | close(cmd); 41 | # magenta 42 | cmd = "tput setaf 5"; 43 | cmd | getline array["magenta"]; 44 | close(cmd); 45 | # cyan 46 | cmd = "tput setaf 6"; 47 | cmd | getline array["cyan"]; 48 | close(cmd); 49 | # white 50 | cmd = "tput setaf 7"; 51 | cmd | getline array["white"]; 52 | close(cmd); 53 | # reset 54 | cmd = "tput sgr0"; 55 | cmd | getline array["reset"]; 56 | close(cmd); 57 | } 58 | 59 | 60 | 61 | # You can do whatever you want with this stuff, but a thanks is always 62 | # appreciated 63 | -------------------------------------------------------------------------------- /csv.awk: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | ## usage: create_line(array, max [, sep [, qualifier [, quote_type] ] ]) 4 | ## Generates an output line in quoted CSV format, from the contents of "array" 5 | ## "array" is expected to be an indexed array (1-indexed). "max" is the highest 6 | ## index to be used. "sep", if provided, is the field separator. If it is more 7 | ## than one character, the first character in the string is used. By default, 8 | ## it is a comma. "qualifier", if provided, is the quote character. Like "sep", 9 | ## it is one character. The default value is `"'. "quote_type", if provided, is 10 | ## used to determine how the output fields are quoted. Valid values are given 11 | ## below. For example, the array: a[1]="foo"; a[2]="bar,quux"; a[3]="blah\"baz" 12 | ## when called with create_line(a, 3), will return: "foo","bar,quux","blah""baz" 13 | ## note: expects a non-sparse array. empty or unset values will become 14 | ## empty fields 15 | ## Valid values for "quote_type": 16 | ## "t": Quote all strings, do not quote numbers. This is the default 17 | ## "a": Quote all fields 18 | ## "m": Only quote fields with commas or quote characters in them 19 | function create_line(arr, len, sep, q, type, i, out, c, new) { 20 | # set "sep" if the arg was provided, using the first char 21 | if (length(sep)) { 22 | sep = substr(sep, 1, 1); 23 | # default 24 | } else { 25 | sep = ","; 26 | } 27 | 28 | # validate "type" 29 | if (!length(type) || type !~ /^[tam]$/) { 30 | type = "t"; 31 | } 32 | 33 | # set "q" if the arg was provided, using the first char 34 | if (length(q)) { 35 | q = substr(q, 1, 1); 36 | # default 37 | } else { 38 | q = "\""; 39 | } 40 | 41 | # empty the output string 42 | out = ""; 43 | 44 | # iterate over the array elements 45 | for (i=1; i<=len; i++) { 46 | # determine if the output string needs to be quoted 47 | toquote = 0; 48 | if (type == "t") { 49 | if (arr[i] ~ /[^0-9.]/ || index(arr[i], sep) || index(arr[i], q)) { 50 | toquote = 1; 51 | } 52 | } else if (type == "a") { 53 | toquote = 1; 54 | } else { 55 | if (index(arr[i], sep) || index(arr[i], q)) { 56 | toquote = 1; 57 | } 58 | } 59 | 60 | # create output string 61 | if (toquote) { 62 | new = ""; 63 | while (c = index(arr[i], q)) { 64 | new = new substr(arr[i], 1, c - 1) q q; 65 | arr[i] = substr(arr[i], c + 1); 66 | } 67 | new = new arr[i]; 68 | 69 | # quote escaped string, add to output with sep 70 | out = (i > 1) ? out sep q new q : q new q; 71 | 72 | # no quotes needed, just add to output with sep 73 | } else { 74 | out = (i > 1) ? out sep arr[i] : arr[i]; 75 | } 76 | } 77 | 78 | # return output string 79 | return out; 80 | } 81 | 82 | ## usage: qsplit(string, array [, sep [, qualifier] ]) 83 | ## a version of split() designed for CSV-like data. splits "string" on "sep" 84 | ## (,) if not provided, into array[1], array[2], ... array[n]. returns "n", or 85 | ## "-1 * n" if the line is incomplete (it has an uneven number of quotes). both 86 | ## "sep" and "qualifier" will use the first character in the provided string. 87 | ## uses "qualifier" (" if not provided) and ignores "sep" within quoted fields. 88 | ## doubled qualifiers are considered escaped, and a single qualifier character 89 | ## is used in its place. for example, foo,"bar,baz""blah",quux will be split as 90 | ## such: array[1] = "foo"; array[2] = "bar,baz\"blah"; array[3] = "quux"; 91 | function qsplit(str, arr, sep, q, a, len, cur, isin, c) { 92 | delete arr; 93 | 94 | # set "sep" if the argument was provided, using the first char 95 | if (length(sep)) { 96 | sep = substr(sep, 1, 1); 97 | # otherwise, use "," 98 | } else { 99 | sep = ","; 100 | } 101 | 102 | # set "q" if the argument was provided, using the first char 103 | if (length(q)) { 104 | q = substr(q, 1, 1); 105 | # otherwise, use '"' 106 | } else { 107 | q = "\""; 108 | } 109 | 110 | # split the string into the temporary array "a", one element per char 111 | len = split(str, a, ""); 112 | 113 | # "cur" contains the current element of 'arr' the function is assigning to 114 | cur = 1; 115 | # boolean, whether or not the iterator is in a quoted string 116 | isin = 0; 117 | # iterate over each character 118 | for (c=1; c<=len; c++) { 119 | # if the current char is a quote... 120 | if (a[c] == q) { 121 | # if the next char is a quote, and the previous character is not a 122 | # delimiter, it's an escaped literal quote (allows empty fields 123 | # that are quoted, such as "foo","","bar") 124 | if (a[c+1] == q && a[c-1] != sep) { 125 | arr[cur] = arr[cur] a[c]; 126 | c++; 127 | 128 | # otherwise, it's a qualifier. switch boolean 129 | } else { 130 | isin = ! isin; 131 | } 132 | 133 | # if the current char is the separator, and we're not within quotes 134 | } else if (a[c] == sep && !isin) { 135 | # increment array element 136 | cur++; 137 | 138 | # otherwise, just append to the current element 139 | } else { 140 | arr[cur] = arr[cur] a[c]; 141 | } 142 | } 143 | 144 | # return length 145 | return cur * (isin ? -1 : 1); 146 | } 147 | 148 | 149 | 150 | # You can do whatever you want with this stuff, but a thanks is always 151 | # appreciated 152 | -------------------------------------------------------------------------------- /examples/cfold: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see 4 | # the man page (since the path is relative, this assumes the lib dir is in 5 | # AWKPATH or the same dir) 6 | @include "strings.awk"; 7 | @include "options.awk"; 8 | 9 | 10 | # prints usage 11 | function usage() { 12 | printf("%s\n\n%s\n\n%s\n%s\n%s\n%s\n\n", 13 | "cfold -- [OPTIONS] [FILE...]", 14 | "the '--' is required, so AWK itself doesn't read the options", 15 | "Wraps input lines in each FILE (standard input if not provided), writing to", 16 | "standard output. The default width is that of the terminal, or 80 columns if", 17 | "standard output is not a terminal. If FILE is '-', also reads the standard", 18 | "input") > "/dev/stderr"; 19 | printf("%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n", 20 | " Options:", 21 | " -c, --center center each line on the terminal. assumes a width of 80", 22 | " columns if standard output is not a terminal", 23 | " -b, --break[=LIST] break lines at spaces. LIST, if provided, is a list of", 24 | " characters to break lines at instead of spaces. For", 25 | " example, --break=abc will break lines on \"a\" or \"b\"", 26 | " Note: an empty string for LIST will revert to the", 27 | " default behavior, it is not the same as omitting -b", 28 | " -i, --ignore-breaks convert existing single line breaks to spaces. multiple", 29 | " line breaks like those at the end of paragraphs will be", 30 | " truncated to a single empty line", 31 | " -w, --width WIDTH use WIDTH columns instead of the terminal's width", 32 | " -t, --trim trim leading and trailing whitespace from each line", 33 | " prior to folding", 34 | " -h, --help display this help and exit") > "/dev/stderr"; 35 | } 36 | 37 | BEGIN { 38 | # initialize variables to defaults 39 | toexit = err = 0; 40 | tocenter = toignore = totrim = 0; 41 | break_chars = ""; 42 | 43 | # get default width 44 | if (system("test -t 1")) { 45 | # stdout is not open on a tty 46 | width = 80 47 | } else { 48 | cmd = "tput cols"; 49 | cmd | getline width; 50 | close(cmd); 51 | } 52 | 53 | # map long options to short options 54 | longopts["center"] = "c"; 55 | longopts["break"] = "b"; 56 | longopts["ignore-breaks"] = "i"; 57 | longopts["width"] = "w"; 58 | longopts["trim"] = "t"; 59 | longopts["help"] = "h"; 60 | 61 | # parse the options 62 | while ((opt = getopts("cbiw:th", longopts)) != -1) { 63 | switch(opt) { 64 | # -c, --center 65 | case "c": 66 | tocenter = 1; break; 67 | 68 | # -b, --break 69 | case "b": 70 | if (length(optarg)) { 71 | break_chars = optarg; 72 | } else { 73 | break_chars = " \t\n"; 74 | } 75 | break; 76 | 77 | # -i, --ignore-breaks 78 | case "i": 79 | toignore = 1; break; 80 | 81 | # w, --width 82 | case "w": 83 | # make sure optarg is an integer 84 | if (optarg !~ /^[0-9]+$/) { 85 | printf("'%s' is not a valid argument for '%s', must be a number", 86 | optarg, optname) > "/dev/stderr"; 87 | err = toexit = 1; 88 | exit; 89 | } 90 | width = optarg; 91 | break; 92 | 93 | # -t, --trim 94 | case "t": 95 | totrim = 1; break; 96 | 97 | # -h, --help 98 | case "h": 99 | usage(); toexit = 1; exit; 100 | 101 | # error 102 | case "?": 103 | default: 104 | err = toexit = 1; 105 | exit; 106 | } 107 | } 108 | 109 | # if --ignore-breaks was used, set RS to null so that paragraphs are 110 | # treated as one line 111 | if (toignore) { 112 | RS = ""; 113 | } 114 | } 115 | 116 | ######## 117 | 118 | # if --ignore-breaks was used, print extra newline between records 119 | toignore && NR > 1 { 120 | print ""; 121 | } 122 | 123 | # fold each record (line, or paragraph) 124 | { 125 | 126 | # if --trim was used, reassign $0 with leading/trailing whitespace removed 127 | if (totrim) { 128 | $0 = trim($0); 129 | } 130 | 131 | out = fold($0, break_chars, width); 132 | 133 | # if text is to be centered, split out into an array of lines and center each 134 | if (tocenter) { 135 | len = split(out, lines, /\n/); 136 | 137 | for (i=1; i<=len; i++) { 138 | print center(lines[i]); 139 | } 140 | } else { 141 | print out; 142 | } 143 | } 144 | 145 | END { 146 | exit err; 147 | } 148 | 149 | 150 | 151 | # You can do whatever you want with this stuff, but a thanks is always 152 | # appreciated 153 | -------------------------------------------------------------------------------- /examples/colors: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see 4 | # the man page (since the path is relative, this assumes the lib dir is in 5 | # AWKPATH or the same dir) 6 | @include "colors.awk"; 7 | 8 | # usage: set_cols(array) 9 | # sets the following values in "array" with tput. printing them will format 10 | # any text afterwards. colors and formats are: 11 | # bold - bold text (can be combined with a color) 12 | # black - black text 13 | # red - red text 14 | # green - green text 15 | # yellow - yellow text 16 | # blue - blue text 17 | # magenta - magenta text 18 | # cyan - cyan text 19 | # white - white text 20 | # reset - resets to default settings 21 | BEGIN { 22 | # set colors 23 | set_cols(colors); 24 | 25 | # print colored text 26 | print colors["red"] "red\t", colors["bold"] "bold red" colors["reset"]; 27 | print colors["black"] "black\t", colors["bold"] "bold black" colors["reset"]; 28 | print colors["green"] "green\t", colors["bold"] "bold green" colors["reset"]; 29 | print colors["yellow"] "yellow\t", colors["bold"] "bold yellow" colors["reset"]; 30 | print colors["blue"] "blue\t", colors["bold"] "bold blue" colors["reset"]; 31 | print colors["magenta"] "magenta\t", colors["bold"] "bold magenta" colors["reset"]; 32 | print colors["cyan"] "cyan\t", colors["bold"] "bold cyan" colors["reset"]; 33 | print colors["white"] "white\t", colors["bold"] "bold white" colors["reset"]; 34 | } 35 | -------------------------------------------------------------------------------- /examples/csv: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see 4 | # the man page (since the path is relative, this assumes the lib dir is in 5 | # AWKPATH or the same dir) 6 | @include "csv.awk"; 7 | 8 | # usage: create_line(array, max [, sep [, qualifier [, quote_type] ] ]) 9 | # Generates an output line in quoted CSV format, from the contents of "array" 10 | # "array" is expected to be an indexed array (1-indexed). "max" is the highest 11 | # index to be used. "sep", if provided, is the field separator. If it is more 12 | # than one character, the first character in the string is used. By default, 13 | # it is a comma. "qualifier", if provided, is the quote character. Like "sep", 14 | # it is one character. The default value is `"'. "quote_type", if provided, is 15 | # used to determine how the output fields are quoted. Valid values are given 16 | # below. For example, the array: a[1]="foo"; a[2]="bar,quux"; a[3]="blah\"baz" 17 | # when called with create_line(a, 3), will return: "foo","bar,quux","blah""baz" 18 | # note: expects a non-sparse array. empty or unset values will become 19 | # empty fields 20 | # Valid values for "quote_type": 21 | # "t": Quote all strings, do not quote numbers. This is the default 22 | # "a": Quote all fields 23 | # "m": Only quote fields with commas or quote characters in them 24 | BEGIN { 25 | print "create_line:"; 26 | print ""; 27 | 28 | # populate array 29 | a[1] = "foo"; a[2] = "with,comma"; a[3] = ""; a[4] = "with\"quote"; 30 | 31 | # print array in CSV format 32 | print create_line(a, 4); 33 | 34 | print ""; 35 | print ""; 36 | } 37 | 38 | # usage: qsplit(string, array [, sep [, qualifier] ]) 39 | # a version of split() designed for CSV-like data. splits "string" on "sep" 40 | # (,) if not provided, into array[1], array[2], ... array[n]. returns "n", or 41 | # "-1 * n" if the line is incomplete (it has an uneven number of quotes). both 42 | # "sep" and "qualifier" will use the first character in the provided string. 43 | # uses "qualifier" (" if not provided) and ignores "sep" within quoted fields. 44 | # doubled qualifiers are considered escaped, and a single qualifier character 45 | # is used in its place. for example, foo,"bar,baz""blah",quux will be split as 46 | # such: array[1] = "foo"; array[2] = "bar,baz\"blah"; array[3] = "quux"; 47 | BEGIN { 48 | print "qsplit:"; 49 | print ""; 50 | 51 | # populate initial string 52 | str = "\"foo\",\"with,comma\",\"\",\"with\"\"quote\""; 53 | 54 | print "initial string: "; 55 | print str; 56 | print ""; 57 | 58 | # split string into array 59 | len = qsplit(str, b); 60 | # get the absolute value of the length (could also be done with abs() in 61 | # math.awk) 62 | len = len < 0 ? -len : len; 63 | 64 | print "one element per line:" 65 | 66 | # print one element per line 67 | for (i=1; i<=len; i++) { 68 | print b[i]; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /examples/math: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see 4 | # the man page (since the path is relative, this assumes the lib dir is in 5 | # AWKPATH or the same dir) 6 | @include "math.awk"; 7 | 8 | # usage: abs(number) 9 | # returns the absolute value of "number" 10 | BEGIN { 11 | print abs(-2.3); 12 | print abs(0); 13 | print abs(2.3); 14 | } 15 | 16 | # usage: ceil(number) 17 | # returns "number" rounded UP to the nearest int 18 | BEGIN { 19 | print ceil(2.3); 20 | print ceil(-2.3); 21 | } 22 | 23 | # usage: ceiling(multiple, number) 24 | # returns "number" rounded UP to the nearest multiple of "multiple" 25 | BEGIN { 26 | # nearest multiple of 5 above 6 is 10 27 | print ceiling(5, 6); 28 | } 29 | 30 | # usage: floor(multiple, number) 31 | # returns "number" rounded DOWN to the nearest multiple of "multiple" 32 | BEGIN { 33 | # nearest multiple of 5 below 9 is 5 34 | print floor(5, 9); 35 | } 36 | 37 | # usage: round(multiple, number) 38 | # returns "number" rounded to the nearest multiple of "multiple" 39 | BEGIN { 40 | # nearest multiple of 5 to 8 is 10 41 | print round(5, 8); 42 | } 43 | 44 | # usage: rint(number) 45 | # returns "number" rounded to the nearest integer 46 | BEGIN { 47 | # round to the nearest int 48 | print rint(1.3), rint(1.5), rint(2.8) 49 | } 50 | 51 | # usage: change_base(number, start_base, end_base) 52 | # converts "number" from "start_base" to "end_base" 53 | # bases must be between 2 and 64. the digits greater than 9 are represented 54 | # by the lowercase letters, the uppercase letters, @, and _, in that order. 55 | # if ibase is less than or equal to 36, lowercase and uppercase letters may 56 | # be used interchangeably to represent numbers between 10 and 35. 57 | # returns 0 if any argument is invalid 58 | BEGIN { 59 | # convert '3' to binary: 60 | print change_base(3, 10, 2); 61 | 62 | # convert '111' in binary to decimal: 63 | print change_base(111, 2, 10); 64 | 65 | # convert 111 in binary to hex: 66 | print change_base(111, 2, 16); 67 | 68 | # convert 1f in hex to decimal: 69 | print change_base("1f", 16, 10); 70 | } 71 | 72 | # usage: format_num(number) 73 | # adds commas to "number" to make it more readable. for example, 74 | # format_num(1000) will return "1,000", and format_num(123456.7891) will 75 | # return "123,456.7891". also trims leading zeroes 76 | # returns 0 if "number" is not a valid number 77 | BEGIN { 78 | num = "1000"; 79 | printf("unformatted: %s\nformatted: %s\n\n", num, format_num(num)); 80 | 81 | num = "123456.7891"; 82 | printf("unformatted: %s\nformatted: %s\n\n", num, format_num(num)); 83 | } 84 | 85 | # usage: str_to_num(string) 86 | # examines "string", and returns its numeric value. if "string" begins with a 87 | # leading 0, assumes that "string" is an octal number. if "string" begins with 88 | # a leading "0x" or "0X", assumes that "string" is a hexadecimal number. 89 | # otherwise, decimal is assumed. 90 | BEGIN { 91 | num = "12"; 92 | printf("str_to_num(\"%s\") == %s\n", num, str_to_num(num)); 93 | num = "012"; 94 | printf("str_to_num(\"%s\") == %s\n", num, str_to_num(num)); 95 | num = "0x12"; 96 | printf("str_to_num(\"%s\") == %s\n", num, str_to_num(num)); 97 | 98 | print ""; 99 | } 100 | 101 | # usage: isint(string) 102 | # returns 1 if "string" is a valid integer, otherwise 0 103 | BEGIN { 104 | var = "3"; 105 | if (isint(var)) { 106 | print var " is a valid integer"; 107 | } else { 108 | print var " is not a valid integer"; 109 | } 110 | 111 | 112 | var = "1.34"; 113 | if (isint(var)) { 114 | print var " is a valid integer"; 115 | } else { 116 | print var " is not a valid integer"; 117 | } 118 | 119 | var = "foo"; 120 | if (isint(var)) { 121 | print var " is a valid integer"; 122 | } else { 123 | print var " is not a valid integer"; 124 | } 125 | } 126 | 127 | # usage: isnum(string) 128 | # returns 1 if "string" is a valid number, otherwise 0 129 | BEGIN { 130 | var = "3"; 131 | if (isnum(var)) { 132 | print var " is a valid number"; 133 | } else { 134 | print var " is not a valid number"; 135 | } 136 | 137 | 138 | var = "1.34"; 139 | if (isnum(var)) { 140 | print var " is a valid number"; 141 | } else { 142 | print var " is not a valid number"; 143 | } 144 | 145 | var = "foo"; 146 | if (isnum(var)) { 147 | print var " is a valid number"; 148 | } else { 149 | print var " is not a valid number"; 150 | } 151 | } 152 | 153 | # usage: isprime(number) 154 | # returns 1 if "number" is a prime number, otherwise 0. "number" must be a 155 | # positive integer 156 | BEGIN { 157 | print "primes from 1 through 10:"; 158 | for (i=1; i<=10; i++) { 159 | if (isprime(i)) { 160 | printf("%s ", $i); 161 | } 162 | } 163 | print ""; 164 | } 165 | 166 | # usage: gcd(a, b) 167 | # returns the greatest common denominator (greatest common factor) of a and b. 168 | # both a and b must be positive integers. uses the recursive euclid algorithm. 169 | BEGIN { 170 | print "the greatest common factor of 3 and 6 is", gcd(3, 6); 171 | } 172 | 173 | # usage: lcm(a, b) 174 | # returns the least common multiple of a and b. both a and b must be positive 175 | # integers. 176 | BEGIN { 177 | print "the least common multiple of 4 and 16 is", lcm(4, 16); 178 | } 179 | 180 | # usage: calc_e() 181 | # approximates e by calculating the sumation from k=0 to k=50 of 1/k! 182 | # returns 10 decimal places 183 | BEGIN { 184 | # prints e 185 | print "e is approximately " calc_e(); 186 | } 187 | 188 | # usage: calc_pi() 189 | # returns pi, with an accuracy of 10 decimal places 190 | BEGIN { 191 | # prints pi 192 | print "pi is approximately " calc_pi(); 193 | } 194 | 195 | # usage: calc_tau() 196 | # returns tau, with an accuracy of 10 decimal places 197 | # http://tauday.com/tau-manifesto 198 | BEGIN { 199 | # prints tau 200 | print "pi is wrong! tau is approximately " calc_tau(); 201 | } 202 | 203 | # usage: deg_to_rad(degrees) 204 | # converts degrees to radians 205 | BEGIN { 206 | # convert 90 degrees to radians 207 | print "90 degrees is " deg_to_rad(90) " radians"; 208 | } 209 | 210 | # usage: rad_to_deg(radians) 211 | # converts radians to degrees 212 | BEGIN { 213 | # convert pi radians to degrees 214 | print "pi radians is " rad_to_deg(calc_pi()) " degrees"; 215 | } 216 | 217 | # usage: tan(expr) 218 | # returns the tangent of expr, which is in radians 219 | BEGIN { 220 | # print the tangent of pi radians 221 | print "the tan of pi radians is " tan(calc_pi()); 222 | } 223 | 224 | # usage: csc(expr) 225 | # returns the cosecant of expr, which is in radians 226 | BEGIN { 227 | # print the cosecant of pi radians 228 | print "the csc of pi radians is " csc(calc_pi()); 229 | } 230 | 231 | # usage: sec(expr) 232 | # returns the secant of expr, which is in radians 233 | BEGIN { 234 | # print the secant of pi radians 235 | print "the sec of pi radians is " sec(calc_pi()); 236 | } 237 | 238 | # usage: cot(expr) 239 | # returns the cotangent of expr, which is in radians 240 | BEGIN { 241 | # print the cotangent of pi radians 242 | print "the cot of pi radians is " cot(calc_pi()); 243 | } 244 | -------------------------------------------------------------------------------- /examples/options: -------------------------------------------------------------------------------- 1 | #!/usr/bin/gawk -f 2 | 3 | # example usage of getopts() from http://github.com/e36freak/awk-libs 4 | 5 | # include the lib with getopts. the path to the lib is not needed here if the 6 | # directory is in AWKPATH, on gawk (this examples assumes it is, or that the 7 | # lib is in the current working dir). otherwise, use the path to the lib, or 8 | # copy and paste the whole function here. 9 | @include "options.awk"; 10 | 11 | # prints usage information 12 | # to see this, make sure you use ./script -- -h. without the '--', awk will 13 | # treat the -h as an argument to awk itself, not the script 14 | function usage() { 15 | printf("%s\n\n%s\n\n%s\n%s\n\n", 16 | "example usage of getopts() from http://github.com/e36freak/awk-libs", 17 | "awk_getopts -- [OPTIONS] [FILE(s)]", 18 | "the \"--\" is required so that options are parsed by the script, and not", 19 | "awk itself") > "/dev/stderr"; 20 | 21 | printf("%s\n%s\n%s\n%s\n%s\n%s\n%s\n\n%s\n%s\n%s\n%s\n%s\n", 22 | " Options:", 23 | " -h, --help Display this help and exit", 24 | " -a, --arg ARG Option that requires an argument. prints the arg", 25 | " -b, --blah Option that doesn't require an arg. prints \"hello world\"", 26 | " -i, --info Displays optind and getopts' return value for each", 27 | " iteration from when the option is used, onward. This is to", 28 | " help give some insight into how the function works", 29 | " -p, --print[=ARG] This is an example of an OPTIONAL argument. if ARG is", 30 | " provided (and non-empty), it will be printed. Otherwise,", 31 | " the string \"no arg\" will be printed.", 32 | "This program then prints the name of each file passed, and the total number", 33 | "of files at the end") > "/dev/stderr"; 34 | } 35 | 36 | # we do the option parsing in the BEGIN block, of course 37 | BEGIN { 38 | # i'm using the associative array 'longopts' to map long options to the 39 | # appropriate short option. you could use any name you choose, or none at 40 | # all if you only want to use short options. 41 | longopts["help"] = "h"; # maps --help to -h 42 | longopts["arg"] = "a"; # --arg to -a 43 | longopts["blah"] = "b"; # --blah to -b 44 | longopts["info"] = "i"; # --info to -i 45 | longopts["print"] = "p"; # and --print to -p 46 | 47 | # now we do the actual option parsing 48 | # since -a requires an argument, it must be followed by a ':' in "optstring" 49 | # we pass 'longopts' as the second argument. Note that no ':' is used for 'p' 50 | # because the argument is OPTIONAL, not required 51 | while ((opt = getopts("ha:bip", longopts)) != -1) { 52 | # i'm using gawk's switch() to handle the various args. you could use 53 | # if/else if you wanted (see ogrep on the same github for an example), but 54 | # this is the cleanest and easiest when portability is not an issue, and is 55 | # the most common structure used in other languages (C, or bash's case) 56 | switch(opt) { 57 | # -h, or --help. both will return "h" 58 | case "h": 59 | usage(); 60 | toexit = 1; 61 | exit; 62 | 63 | # same with --arg or -a 64 | case "a": 65 | # this one requires an arg, so 'optarg' will be set to its argument 66 | # this version of getopts handles -aARG, or --arg=ARG, or --arg ARG, 67 | # and of course -a ARG. 68 | print optarg; 69 | break; 70 | 71 | # and --blah, -b 72 | case "b": 73 | print "hello world"; 74 | break; 75 | 76 | # --info, -i 77 | case "i": 78 | info = 1; 79 | break; 80 | 81 | # --print, -p 82 | case "p": 83 | # if ARG was provided, 'optarg' will be non-empty 84 | if (length(optarg)) { 85 | print optarg; 86 | } else { 87 | print "no arg"; 88 | } 89 | break; 90 | 91 | # getopts will return "?" on error 92 | case "?": 93 | default: 94 | err = toexit = 1; 95 | exit; 96 | } 97 | 98 | # if info is on... 99 | if (info) { 100 | printf("optind is now: %d, and opt (getopts' return value) is now: %s\n", 101 | optind, opt) > "/dev/stderr"; 102 | } 103 | } 104 | 105 | # again, if info is on 106 | if (info) { 107 | print "finished processing arguments" > "/dev/stderr"; 108 | 109 | # in case you want to use it, 'optind' will now be set to the correct index 110 | # for the first non-option argument in ARGV. the previous options WILL be 111 | # deleted from ARGV at this point. 112 | printf("optind is now: %d, ", optind) > "/dev/stderr"; 113 | 114 | # you'll see opt is -1 now, since there are no options left to process 115 | printf("and opt is now: %s\n", opt) > "/dev/stderr"; 116 | } 117 | } 118 | 119 | # just an example block that reads from the file(s) given 120 | { 121 | print FILENAME; 122 | files++; 123 | 124 | nextfile; 125 | } 126 | 127 | # END block. prints the number of files read. here to show that another exit 128 | # call is needed, because awk will run the END block after 'exit' is called 129 | # unless you exit again within the block. (toexit is used so -h|--help doesn't 130 | # cause the script to exit >0) 131 | END { 132 | if (toexit) { 133 | exit err; 134 | } 135 | 136 | print files; 137 | } 138 | -------------------------------------------------------------------------------- /examples/sort: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see 4 | # the man page (since the path is relative, this assumes the lib dir is in 5 | # AWKPATH or the same dir) 6 | @include "sort.awk"; 7 | 8 | # usage: qsort(s, d [, how]) 9 | # sorts the elements in the array "s" using awk's normal rules for comparing 10 | # values, creating a new sorted array "d" indexed with sequential integers 11 | # starting with 1. returns the length, or -1 if an error occurs.. leaves the 12 | # indices of the source array "s" unchanged. the optional string "how" controls 13 | # the direction and the comparison mode. uses the quick sort algorithm, with a 14 | # random pivot to avoid worst-case behavior on already sorted arrays. requires 15 | # the __compare() and __quicksort() functions. 16 | # valid values for "how" are: 17 | # "std asc" 18 | # use awk's standard rules for comparison, ascending. this is the default 19 | # "std desc" 20 | # use awk's standard rules for comparison, descending. 21 | # "str asc" 22 | # force comparison as strings, ascending. 23 | # "str desc" 24 | # force comparison as strings, descending. 25 | # "num asc" 26 | # force a numeric comparison, ascending. 27 | # "num desc" 28 | # force a numeric comparison, descending. 29 | BEGIN { 30 | # populate array 31 | for (i=10; i>0; i--) { 32 | a[i] = i; 33 | } 34 | 35 | # sort, numerically ascending 36 | len = qsort(a, b, "num asc"); 37 | 38 | # dump 39 | for (i=1; i<=len; i++) { 40 | print b[i]; 41 | } 42 | 43 | print ""; 44 | } 45 | 46 | # usage: iqsort(s [, how]) 47 | # the bevavior is the same as that of qsort(), except that the array "s" is 48 | # sorted in-place. the original indices are destroyed and replaced with 49 | # sequential integers. everything else is described in qsort() above. 50 | BEGIN { 51 | # populate array 52 | for (i=10; i>0; i--) { 53 | a[i] = i; 54 | } 55 | 56 | # sort in place, numerically ascending 57 | len = iqsort(a, "num asc"); 58 | 59 | # dump 60 | for (i=1; i<=len; i++) { 61 | print a[i]; 62 | } 63 | 64 | print ""; 65 | } 66 | 67 | # usage: qsorti(s, d [, how]) 68 | # the behavior is the same as that of qsort(), except that the array indices 69 | # are used for sorting, not the array values. when done, the new array is 70 | # indexed numerically, and the values are those of the original indices. 71 | # everything else is described in qsort() above. 72 | BEGIN { 73 | # populate array 74 | for (i=10; i>0; i--) { 75 | a[i]; 76 | } 77 | 78 | # sort indices, numerically ascending 79 | len = qsorti(a, b, "num asc"); 80 | 81 | # dump 82 | for (i=1; i<=len; i++) { 83 | print b[i]; 84 | } 85 | 86 | print ""; 87 | } 88 | 89 | # usage: iqsorti(s [, how]) 90 | # the bevavior is the same as that of qsorti(), except that the array "s" is 91 | # sorted in-place. the original indices are destroyed and replaced with 92 | # sequential integers. everything else is described in qsort() and qsorti() 93 | # above. 94 | BEGIN { 95 | # populate array 96 | for (i=10; i>0; i--) { 97 | a[i]; 98 | } 99 | 100 | # sort indices in place, numerically ascending 101 | len = iqsorti(a, "num asc"); 102 | 103 | # dump 104 | for (i=1; i<=len; i++) { 105 | print a[i]; 106 | } 107 | 108 | print ""; 109 | } 110 | 111 | # usage: qsortv(s, d [, how]) 112 | # sorts the indices in the array "s" based on the values, creating a new 113 | # sorted array "d" indexed with sequential integers starting with 1, and the 114 | # values the indices of "s". returns the length, or -1 if an error occurs. 115 | # leaves the source array "s" unchanged. the optional string "how" controls 116 | # the direction and the comparison mode. uses the quicksort algorithm, with a 117 | # random pivot to avoid worst-case behavior on already sorted arrays. requires 118 | # the __compare() and __vquicksort() functions. valid values for "how" are 119 | # explained in the qsort() function above. 120 | BEGIN { 121 | # populate array 122 | j=10 123 | for (i=1; i<=10; i++) { 124 | a[i] = j--; 125 | } 126 | 127 | # sort indices based on numeric values 128 | len = qsortv(a, b, "num asc"); 129 | 130 | # dump 131 | for (i=1; i<=len; i++) { 132 | print b[i], a[b[i]]; 133 | } 134 | 135 | print ""; 136 | } 137 | 138 | 139 | 140 | # usage: shuf(s, d) 141 | # shuffles the array "s", creating a new shuffled array "d" indexed with 142 | # sequential integers starting with one. returns the length, or -1 if an error 143 | # occurs. leaves the indices of the source array "s" unchanged. uses the knuth- 144 | # fisher-yates algorithm. requires the __shuffle() function. 145 | BEGIN { 146 | # populate array 147 | for (i=1; i<=10; i--) { 148 | a[i] = i; 149 | } 150 | 151 | # shuffle 152 | len = shuf(a, b); 153 | 154 | # dump 155 | for (i=1; i<=len; i++) { 156 | print b[i]; 157 | } 158 | 159 | print ""; 160 | } 161 | 162 | # usage: ishuf(s) 163 | # the behavior is the same as that of shuf(), except the array "s" is sorted 164 | # in-place. the original indices are destroyed and replaced with sequential 165 | # integers. everything else is described in shuf() above. 166 | BEGIN { 167 | # populate array 168 | for (i=1; i<=10; i--) { 169 | a[i] = i; 170 | } 171 | 172 | # shuffle in place 173 | len = ishuf(a); 174 | 175 | # dump 176 | for (i=1; i<=len; i++) { 177 | print a[i]; 178 | } 179 | 180 | print ""; 181 | } 182 | 183 | # usage: shufi(s, d) 184 | # the bevavior is the same as that of shuf(), except that the array indices 185 | # are shuffled, not the array values. when done, the new array is indexed 186 | # numerically, and the values are those of the original indices. everything 187 | # else is described in shuf() above. 188 | BEGIN { 189 | # populate array 190 | for (i=1; i<=10; i--) { 191 | a[i] = i; 192 | } 193 | 194 | # shuffle indices 195 | len = shufi(a, b); 196 | 197 | # dump 198 | for (i=1; i<=len; i++) { 199 | print b[i]; 200 | } 201 | 202 | print ""; 203 | } 204 | 205 | # usage: ishufi(s) 206 | # the behavior is tha same as that of shufi(), except that the array "s" is 207 | # sorted in-place. the original indices are destroyed and replaced with 208 | # sequential integers. everything else is describmed in shuf() and shufi() 209 | # above. 210 | BEGIN { 211 | # populate array 212 | for (i=1; i<=10; i--) { 213 | a[i] = i; 214 | } 215 | 216 | # shuffle indices in place 217 | len = ishufi(a); 218 | 219 | # dump 220 | for (i=1; i<=len; i++) { 221 | print a[i]; 222 | } 223 | 224 | print ""; 225 | } 226 | -------------------------------------------------------------------------------- /examples/strings: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see 4 | # the man page (since the path is relative, this assumes the lib dir is in 5 | # AWKPATH or the same dir) 6 | @include "strings.awk"; 7 | 8 | # usage: center(string [, width]) 9 | # returns "string" centered based on "width". if "width" is not provided (or 10 | # is 0), uses the width of the terminal, or 80 if standard output is not open 11 | # on a terminal. 12 | # note: does not check the length of the string. if it's wider than the 13 | # terminal, it will not center lines other than the first. for best results, 14 | # combine with fold(). 15 | BEGIN { 16 | print center("this string is centered"); 17 | 18 | print ""; 19 | } 20 | 21 | # usage: fold(string, sep [, width]) 22 | # returns "string", wrapped, with lines broken on "sep" to "width" columns. 23 | # "sep" is a list of characters to break at, similar to IFS in a POSIX shell. 24 | # if "sep" is empty, wraps at exactly "width" characters. if "width" is not 25 | # provided (or is 0), uses the width of the terminal, or 80 if standard output 26 | # is not open on a terminal. 27 | # note: currently, tabs are squeezed to a single space. this will be fixed 28 | BEGIN { 29 | # folds the alphabet on vowels, to 12 and 15 characters 30 | alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 31 | 32 | print "alphabet, folded to 12 characters on vowels:"; 33 | print ""; 34 | print fold(alphabet, "AEIOU", 12); 35 | print ""; 36 | 37 | print "alphabet, folded to 15 characters on vowels:"; 38 | print ""; 39 | print fold(alphabet, "AEIOU", 15); 40 | print ""; 41 | } 42 | 43 | # usage: ssub(ere, repl [, in]) 44 | # behaves like sub, except returns the result and doesn't modify the original 45 | BEGIN { 46 | string = "this is some string"; 47 | 48 | print "ssub: "; 49 | print ""; 50 | 51 | print string; 52 | print ssub("str.*", "replaced &", string); 53 | print string; 54 | 55 | print ""; 56 | } 57 | 58 | # usage: sgsub(ere, repl [, in]) 59 | # behaves like gsub, except returns the result and doesn't modify the original 60 | BEGIN { 61 | string = "this is some search string search"; 62 | 63 | print "sgsub: "; 64 | print ""; 65 | 66 | print string; 67 | print sgsub("search", "replace", string); 68 | print string; 69 | 70 | print ""; 71 | } 72 | 73 | # usage: lsub(str, repl [, in]) 74 | # substites the string "repl" in place of the first instance of "str" in the 75 | # string "in" and returns the result. does not modify the original string. 76 | # if "in" is not provided, uses $0. 77 | BEGIN { 78 | string = "string with special .* characters"; 79 | 80 | print "lsub: "; 81 | print ""; 82 | 83 | print string; 84 | print lsub(".*", "literal", string); 85 | print string; 86 | 87 | print ""; 88 | } 89 | 90 | # usage: glsub(str, repl [, in]) 91 | # behaves like lsub, except it replaces all occurances of "str" 92 | BEGIN { 93 | string = "string with .* special .* characters"; 94 | 95 | print "glsub: "; 96 | print ""; 97 | 98 | print string; 99 | print glsub(".*", "literal", string); 100 | print string; 101 | 102 | print ""; 103 | } 104 | 105 | # usage: shell_esc(string) 106 | # returns the string escaped so that it can be used in a shell command 107 | BEGIN { 108 | file = "some 'filename' with * special characters to be used in system()"; 109 | 110 | print "shell escape:" 111 | print ""; 112 | print "first is normal, second escaped: "; 113 | print file; 114 | print shell_esc(file); 115 | print ""; 116 | } 117 | 118 | # usage: str_to_arr(string, array) 119 | # converts string to an array, one char per element, 1-indexed 120 | # returns the array length 121 | BEGIN { 122 | string = "some string"; 123 | 124 | print "str_to_arr:"; 125 | print ""; 126 | 127 | print "initial string: "; 128 | print string; 129 | print ""; 130 | 131 | print "one character per line:" 132 | len = str_to_arr(string, array); 133 | 134 | for (i=1; i<=len; i++) { 135 | print array[i]; 136 | } 137 | 138 | print ""; 139 | } 140 | 141 | # usage: extract_range(string, start, stop) 142 | # extracts fields "start" through "stop" from "string", based on FS, with the 143 | # original field separators intact. returns the extracted fields. 144 | BEGIN { 145 | str = "foo bar baz\tblah quux"; 146 | FS = " "; 147 | 148 | printf("extract_range(str, 1, 3): %s\n", extract_range(str, 1, 3)); 149 | printf("extract_range(str, 2, NF): %s\n", extract_range(str, 2, NF)); 150 | printf("extract_range(str, 3, 8): %s\n", extract_range(str, 3, 8)); 151 | 152 | print ""; 153 | } 154 | 155 | # usage: fwidths(width_spec [, string]) 156 | # extracts substrings from "string" according to "width_spec" from left to 157 | # right and assigns them to $1, $2, etc. also assigns the NF variable. if 158 | # "string" is not supplied, uses $0. "width_spec" is a space separated list of 159 | # numbers that specify field widths, just like GNU awk's FIELDWIDTHS variable. 160 | # if there is data left over after the last width_spec, adds it to a final 161 | # field. returns the value for NF. 162 | BEGIN { 163 | str = "1234567890"; 164 | fwidths("3 2 5", str); 165 | 166 | for (i=1; i<=NF; i++) { 167 | print $i; 168 | } 169 | 170 | print ""; 171 | } 172 | 173 | # usage: fwidths_arr(width_spec, array [, string]) 174 | # the behavior is the same as that of fwidths(), except that the values are 175 | # assigned to "array", indexed with sequential integers starting with 1. 176 | # returns the length. everything else is described in fwidths() above. 177 | BEGIN { 178 | str = "1234567890"; 179 | len = fwidths("2 1 4 3", a, str); 180 | 181 | for (i=1; i<=len; i++) { 182 | print a[i]; 183 | } 184 | 185 | print ""; 186 | } 187 | 188 | # usage: lsplit(str, arr, sep) 189 | # splits the string "str" into array elements "arr[1]", "arr[2]", .., "arr[n]", 190 | # and returns "n". all elements of "arr" are deleted before the split is 191 | # performed. the separation is done on the literal string "sep". 192 | BEGIN { 193 | string = "foo.bar.baz" 194 | sep = "."; 195 | printf("original: <%s>\nsep: <%s>\n", string, sep); 196 | 197 | len = lsplit(string, a, sep); 198 | print " after: len = lsplit(string, a, sep)"; 199 | printf(" len: %d\n", len); 200 | for (i=1; i<=len; i++) { 201 | printf(" a[%d]: <%s>\n", i, a[i]); 202 | } 203 | 204 | print ""; 205 | } 206 | 207 | # usage: ssplit(str, arr, seps [, ere]) 208 | # similar to GNU awk 4's "seps" functionality for split(). splits the string 209 | # "str" into the array "arr" and the separators array "seps" on the regular 210 | # expression "ere", and returns the number of fields. the value of "seps[i]" 211 | # is the separator that appeared in front of "arr[i+1]". if "ere" is omitted or 212 | # empty, FS is used instead. if "ere" is a single space, leading whitespace in 213 | # "str" will go into the extra array element "seps[0]" and trailing whitespace 214 | # will go into the extra array element "seps[len]", where "len" is the return 215 | # value. 216 | # note: /regex/ style quoting cannot be used for "ere". 217 | BEGIN { 218 | string = " one two three "; 219 | printf("original: <%s>\n", string); 220 | 221 | len = ssplit(string, a, s, " "); 222 | print " after: len = ssplit(string, a, s, \" \")"; 223 | printf(" len: %d\n s[0]: <%s>\n", len, s[0]); 224 | for (i=1; i<=len; i++) { 225 | printf(" a[%d]: <%s>, s[%d]: <%s>\n", i, a[i], i, s[i]); 226 | } 227 | 228 | print ""; 229 | } 230 | 231 | # usage: ends_with(string, substring) 232 | # returns 1 if "strings" ends with "substring", otherwise 0 233 | BEGIN { 234 | string = "foobar"; s = "bar"; 235 | if (ends_with(string, s)) { 236 | printf("%s ends with %s\n", string, s); 237 | } else { 238 | printf("%s does not end with %s\n", string, s); 239 | } 240 | 241 | string = "foobarfoo"; s = "bar"; 242 | if (ends_with(string, s)) { 243 | printf("%s ends with %s\n", string, s); 244 | } else { 245 | printf("%s does not end with %s\n", string, s); 246 | } 247 | 248 | print ""; 249 | } 250 | 251 | # usage: trim(string) 252 | # returns "string" with leading and trailing whitespace trimmed 253 | BEGIN { 254 | string = " whitespace "; 255 | 256 | print "normal: <" string ">"; 257 | print "after trim(): <" trim(string) ">"; 258 | 259 | print ""; 260 | } 261 | 262 | # usage: rev(string) 263 | # returns "string" backwards 264 | BEGIN { 265 | string = "forwards"; 266 | 267 | print "normal: " string; 268 | print "after rev(): " rev(string); 269 | } 270 | 271 | # usage: max(array [, how ]) 272 | # returns the maximum value in "array", 0 if the array is empty, or -1 if an 273 | # error occurs. the optional string "how" controls the comparison mode. 274 | # requires the __mcompare() function. 275 | # valid values for "how" are: 276 | # "std" 277 | # use awk's standard rules for comparison. this is the default 278 | # "str" 279 | # force comparison as strings 280 | # "num" 281 | # force a numeric comparison 282 | BEGIN { 283 | # populate array with random numbers 284 | for (i=0; i<10; i++) { 285 | a[i] = int(rand() * 10); 286 | } 287 | 288 | # print the max 289 | print "max: " max(a, "num"); 290 | } 291 | 292 | # usage: maxi(array [, how ]) 293 | # the behavior is the same as that of max(), except that the array indices are 294 | # used, not the array values. everything else is explained in max() above. 295 | BEGIN { 296 | # populate array with random numbers 297 | for (i=0; i<10; i++) { 298 | a[int(rand() * 10)]; 299 | } 300 | 301 | # print the max 302 | print "max: " maxi(a, "num"); 303 | } 304 | 305 | # usage: min(array [, how ]) 306 | # the behavior is the same as that of max(), except that the minimum value is 307 | # returned instead of the maximum. everything else is explained in max() above. 308 | BEGIN { 309 | # populate array with random numbers 310 | for (i=0; i<10; i++) { 311 | a[i] = int(rand() * 10); 312 | } 313 | 314 | # print the min 315 | print "min: " min(a, "num"); 316 | } 317 | 318 | # usage: mini(array [, how ]) 319 | # the behavior is the same as that of min(), except that the array indices are 320 | # used instead of the array values. everything else is explained in min() and 321 | # max() above. 322 | BEGIN { 323 | # populate array with random numbers 324 | for (i=0; i<10; i++) { 325 | a[int(rand() * 10)]; 326 | } 327 | 328 | # print the min 329 | print "min: " mini(a, "num"); 330 | } 331 | -------------------------------------------------------------------------------- /examples/sys: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see 4 | # the man page (since the path is relative, this assumes the lib dir is in 5 | # AWKPATH or the same dir) 6 | @include "sys.awk"; 7 | 8 | # usage: isatty(fd) 9 | # Checks if "fd" is open on a tty. Returns 1 if so, 0 if not, and -1 if an 10 | # error occurs 11 | BEGIN { 12 | if (isatty(0)) { 13 | print "stdin is open on a tty"; 14 | } else { 15 | print "stdin is not open on a tty"; 16 | } 17 | 18 | if (isatty(1)) { 19 | print "stdout is open on a tty"; 20 | } else { 21 | print "stdout is not open on a tty"; 22 | } 23 | 24 | if (isatty(2)) { 25 | print "stderr is open on a tty"; 26 | } else { 27 | print "stderr is not open on a tty"; 28 | } 29 | } 30 | 31 | # usage: mktemp(template [, type]) 32 | # creates a temporary file or directory, safely, and returns its name. 33 | # if template is not a pathname, the file will be created in ENVIRON["TMPDIR"] 34 | # if set, otherwise /tmp. the last six characters of template must be "XXXXXX", 35 | # and these are replaced with a string that makes the filename unique. type, if 36 | # supplied, is either "f", "d", or "u": for file, directory, or dry run (just 37 | # returns the name, doesn't create a file), respectively. If template is not 38 | # provided, uses "tmp.XXXXXX". Files are created u+rw, and directories u+rwx, 39 | # minus umask restrictions. returns -1 if an error occurs. 40 | BEGIN { 41 | print mktemp("foo.XXXXXX", "u"); 42 | print mktemp("./bar.XXXXXX"); 43 | print mktemp("./dir.XXXXXX", "d"); 44 | print mktemp("broken.XXX", "u"); 45 | } 46 | -------------------------------------------------------------------------------- /examples/times: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | # include library. gawk can use AWKPATH so the actual path isn't needed, see 4 | # the man page (since the path is relative, this assumes the lib dir is in 5 | # AWKPATH or the same dir) 6 | @include "times.awk"; 7 | 8 | # usage: month_to_num(month) 9 | # converts human readable month to the decimal representation 10 | # returns the number, -1 if the month doesn't exist 11 | BEGIN { 12 | print "month_to_num(\"January\"): " month_to_num("January"); 13 | print "month_to_num(\"Jan\"): " month_to_num("Jan"); 14 | 15 | print ""; 16 | } 17 | 18 | # usage: day_to_num(day) 19 | # converts human readable day to the decimal representation 20 | # returns the number, -1 if the day doesn't exist 21 | # like date +%w, sunday is 0 22 | BEGIN { 23 | print "day_to_num(\"Monday\"): " day_to_num("Monday"); 24 | print "day_to_num(\"Mon\"): " day_to_num("Mon"); 25 | 26 | print ""; 27 | } 28 | 29 | # usage: hr_to_sec(timestamp) 30 | # converts HH:MM:SS or MM:SS to seconds 31 | # returns -1 if invalid format 32 | BEGIN { 33 | print "hr_to_sec(\"00:05:03\"): " hr_to_sec("00:05:03"); 34 | 35 | print ""; 36 | } 37 | 38 | # usage: sec_to_hr(seconds) 39 | # converts seconds to HH:MM:SS 40 | BEGIN { 41 | print "sec_to_hr(500): " sec_to_hr(500); 42 | 43 | print ""; 44 | } 45 | 46 | # usage: ms_to_hr(milliseconds) 47 | # converts milliseconds to a "time(1)"-similar human readable format, such 48 | # as 1m4.356s 49 | BEGIN { 50 | print "ms_to_hr(116529): " ms_to_hr(116529); 51 | 52 | print ""; 53 | } 54 | 55 | # usage: add_day_suff(day_of_month) 56 | # prepends the appropriate suffix to "day_of_month". for example, 57 | # add_day_suff(1) will return "1st", and add_day_suff(22) will return "22nd" 58 | # returns -1 if "day_of_month" is not a positive integer 59 | BEGIN { 60 | print "adding suffixes for days 1-31:"; 61 | for (d=1; d<=31; d++) { 62 | printf("add_day_suff(%s) -> %s\n", d, add_day_suff(d)); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /math.awk: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | ## usage: abs(number) 4 | ## returns the absolute value of "number" 5 | function abs(num) { 6 | return num < 0 ? -num : num; 7 | } 8 | 9 | ## usage: ceil(number) 10 | ## returns "number" rounded UP to the nearest int 11 | function ceil(num) { 12 | if (num < 0) { 13 | return int(num); 14 | } else { 15 | return int(num) + (num == int(num) ? 0 : 1); 16 | } 17 | } 18 | 19 | ## usage: ceiling(multiple, number) 20 | ## returns "number" rounded UP to the nearest multiple of "multiple" 21 | function ceiling(mult, num, r) { 22 | return (r = num % mult) ? num + (mult - r) : num; 23 | } 24 | 25 | ## usage: change_base(number, start_base, end_base) 26 | ## converts "number" from "start_base" to "end_base" 27 | ## bases must be between 2 and 64. the digits greater than 9 are represented 28 | ## by the lowercase letters, the uppercase letters, @, and _, in that order. 29 | ## if ibase is less than or equal to 36, lowercase and uppercase letters may 30 | ## be used interchangeably to represent numbers between 10 and 35. 31 | ## returns 0 if any argument is invalid 32 | function change_base(num, ibase, obase, 33 | chars, c, l, i, j, cur, b10, f, fin, isneg) { 34 | # convert number to lowercase if ibase <= 36 35 | if (ibase <= 36) { 36 | num = tolower(num); 37 | } 38 | 39 | # determine if number is negative. if so, set isneg=1 and remove the '-' 40 | if (sub(/^-/, "", num)) { 41 | isneg = 1; 42 | } 43 | 44 | # determine if inputs are valid 45 | if (num ~ /[^[:xdigit:]]/ || ibase != int(ibase) || obase != int(obase) || 46 | ibase < 2 || ibase > 64 || obase < 2 || obase > 64) { 47 | return 0; 48 | } 49 | 50 | # set letters to numbers conversion array 51 | if (ibase > 10 || obase > 10) { 52 | # set chars[] array to convert letters to numbers 53 | c = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@_"; 54 | l = length(c); 55 | 56 | j = 10; 57 | for (i=1; i<=l; i++) { 58 | cur = substr(c, i, 1); 59 | chars[cur] = j; 60 | chars[j] = cur; 61 | 62 | j++; 63 | } 64 | } 65 | 66 | # convert to base 10 67 | if (ibase != 10) { 68 | l = length(num); 69 | 70 | j = b10 = 0; 71 | for (i=l; i>0; i--) { 72 | c = substr(num, i, 1); 73 | 74 | # if char is a non-digit convert to dec 75 | if (c !~ /^[0-9]$/) { 76 | c = chars[c]; 77 | } 78 | 79 | # check to make sure value isn't too great for base 80 | if (+c >= ibase) { 81 | return 0; 82 | } 83 | 84 | b10 += c * (ibase ^ j++); 85 | } 86 | } else { 87 | # num is already base 10 88 | b10 = num; 89 | } 90 | 91 | # convert from base 10 to obase 92 | if (obase != 10) { 93 | # build number backwards 94 | j = 0; 95 | do { 96 | f[++j] = (c = b10 % obase) > 9 ? chars[c] : c; 97 | b10 = int(b10 / obase); 98 | } while (b10); 99 | 100 | # reverse number 101 | fin = f[j]; 102 | for (i=j-1; i>0; i--) { 103 | fin = fin f[i]; 104 | } 105 | } else { 106 | # num has already been converted to base 10 107 | fin = b10; 108 | } 109 | 110 | # add '-' if number was negative 111 | if (isneg) { 112 | fin = "-" fin; 113 | } 114 | 115 | return fin; 116 | } 117 | 118 | ## usage: format_num(number) 119 | ## adds commas to "number" to make it more readable. for example, 120 | ## format_num(1000) will return "1,000", and format_num(123456.7890) will 121 | ## return "123,456.7890". also trims leading zeroes 122 | ## returns 0 if "number" is not a valid number 123 | function format_num(num, is_float, b, e, i, len, r, out) { 124 | # trim leading zeroes 125 | sub(/^0+/, "", num); 126 | 127 | # make sure "num" is a valid number 128 | if (num ~ /[^0-9.]/ || num ~ /\..*\./) { 129 | return 0; 130 | } 131 | 132 | # if "num" is not an int, split it into pre and post decimal parts. 133 | # use sub() instead of int() because int() can be funny for float arithmetic 134 | # results 135 | if (num ~ /\./) { 136 | is_float = 1; # flag "num" as a float 137 | b = e = num; 138 | sub(/\..*/, "", b); 139 | sub(/.*\./, "", e); 140 | 141 | # otherwise, just assign the number to "b" 142 | } else { 143 | is_float = 0; 144 | b = num; 145 | } 146 | 147 | len = length(b) 148 | 149 | # only do anything if the pre-decimal section is greater than 3 digits 150 | if (len < 3) { 151 | return num; 152 | } 153 | 154 | # start by assigning the last 3 pre-decimal digits to out 155 | out = substr(b, len - 2); 156 | 157 | # loop backwards over each grouping of 3 numbers after that, prepending 158 | # each to out (with a comma) 159 | for (i=len-5; i>0; i-=3) { 160 | out = substr(b, i, 3) "," out; 161 | } 162 | 163 | # if the length is not a multiple of 3, prepend the remaining digits 164 | if (r = len % 3) { 165 | out = substr(b, 1, r) "," out; 166 | } 167 | 168 | # if number was a float, add the post-decimal digits back on 169 | if (is_float) { 170 | out = out "." e; 171 | } 172 | 173 | # return the formatted number 174 | return out; 175 | } 176 | 177 | ## usage: str_to_num(string) 178 | ## examines "string", and returns its numeric value. if "string" begins with a 179 | ## leading 0, assumes that "string" is an octal number. if "string" begins with 180 | ## a leading "0x" or "0X", assumes that "string" is a hexadecimal number. 181 | ## otherwise, decimal is assumed. 182 | function str_to_num(str, base, isneg, l, i, j, chars, c, num) { 183 | # convert to all lowercase 184 | str = tolower(str); 185 | 186 | # determine if number is negative. if so, set isneg=1 and remove the '-' 187 | if (sub(/^-/, "", num)) { 188 | isneg = 1; 189 | } 190 | 191 | # examine the string, to determine the base and trim said base information 192 | if (sub(/^0x/, "", str)) { 193 | base = 16; 194 | } else if (sub(/^0/, "", str)) { 195 | base = 8; 196 | } else { 197 | base = 10; 198 | } 199 | 200 | # trim everything from the first non-number character to the end 201 | if (base == 16) { 202 | sub(/[^[:xdigit:]].*/, "", str); 203 | } else { 204 | sub(/[^[:digit:]].*/, "", str); 205 | } 206 | 207 | # if the base is octal, but there's a number >= 8, set it to decimal instead 208 | if (base == 8 && str ~ /[89]/) { 209 | base = 10; 210 | } 211 | 212 | # don't need to convert if the base is 10 213 | if (base == 10) { 214 | return isneg ? -str : +str; 215 | } 216 | 217 | # set letters for hex 218 | if (base == 16) { 219 | chars["a"] = 10; chars["b"] = 11; chars["c"] = 12; 220 | chars["d"] = 13; chars["e"] = 14; chars["f"] = 15; 221 | } 222 | 223 | # convert to base 10 224 | l = length(str); 225 | 226 | j = num = 0; 227 | for (i=l; i>0; i--) { 228 | c = substr(str, i, 1); 229 | 230 | # if char is a non-digit convert to dec 231 | if (c !~ /^[0-9]$/) { 232 | c = chars[c]; 233 | } 234 | 235 | num += c * (base ^ j++); 236 | } 237 | 238 | # return the number 239 | return isneg ? -num : +num; 240 | } 241 | 242 | ## usage: floor(multiple, number) 243 | ## returns "number" rounded DOWN to the nearest multiple of "multiple" 244 | function floor(mult, num) { 245 | return num - (num % mult); 246 | } 247 | 248 | ## usage: round(multiple, number) 249 | ## returns "number" rounded to the nearest multiple of "multiple" 250 | function round(mult, num, r) { 251 | if (num % mult < mult / 2) { 252 | return num - (num % mult); 253 | } else { 254 | return (r = num % mult) ? num + (mult - r) : num; 255 | } 256 | } 257 | 258 | ## usage: rint(number) 259 | ## returns "number" rounded to the nearest integer 260 | function rint(num, n) { 261 | if (num < 0) { 262 | return (num - (n = int(num)) < -.5) ? n - 1 : n; 263 | } else { 264 | return (num - (n = int(num)) >= .5) ? n + 1 : n; 265 | } 266 | } 267 | 268 | ## usage: isint(string) 269 | ## returns 1 if "string" is a valid integer, otherwise 0 270 | function isint(str) { 271 | if (str !~ /^-?[0-9]+$/) { 272 | return 0; 273 | } 274 | 275 | return 1; 276 | } 277 | 278 | ## usage: isnum(string) 279 | ## returns 1 if "string" is a valid number, otherwise 0 280 | function isnum(str) { 281 | # use a regex comparison because 'str == str + 0' has issues with some floats 282 | if (str !~ /^-?[0-9.]+$/ || str ~ /\..*\./) { 283 | return 0; 284 | } 285 | 286 | return 1; 287 | } 288 | 289 | ## usage: isprime(number) 290 | ## returns 1 if "number" is a prime number, otherwise 0. "number" must be a 291 | ## positive integer greater than one 292 | function isprime(num, i, s) { 293 | # check to make sure "num" is a valid positive int (and not 1) 294 | if (num !~ /^[0-9]+$/ || num <= 1) { 295 | return 0; 296 | } 297 | 298 | # 1, 2, and 3 are prime 299 | if (num <= 3) { 300 | return 1; 301 | } 302 | 303 | # check if even or divisible by 3 304 | if (!(num % 2) || !(num % 3)) { 305 | return 0; 306 | } 307 | 308 | # use naive method, fermats little theorem had overflow and did not work 309 | # for primes larger than 1021 310 | s = sqrt(num); 311 | for (i=5; i<=s; i+=2) { 312 | if (!(num % i)) { 313 | return 0; 314 | } 315 | } 316 | 317 | return 1; 318 | } 319 | 320 | ## usage: gcd(a, b) 321 | ## returns the greatest common denominator (greatest common factor) of a and b. 322 | ## both a and b must be positive integers. uses the recursive euclid algorithm. 323 | function gcd(a, b, f) { 324 | # check to make sure both numbers are positive ints 325 | if (!f) { 326 | if (a !~ /^[0-9]+$/ || !a || b !~ /^[0-9]+$/ || !b) { 327 | return 0; 328 | } 329 | } 330 | 331 | if (b) { 332 | return gcd(b, a % b, 1); 333 | 334 | } else { 335 | # return the absolute value 336 | return a < 0 ? -a : a; 337 | } 338 | } 339 | 340 | ## usage: lcm(a, b) 341 | ## returns the least common multiple of a and b. both a and b must be positive 342 | ## integers. 343 | function lcm(a, b, m, l) { 344 | # check to make sure both numbers are positive ints 345 | if (a !~ /^[0-9]+$/ || !a || b !~ /^[0-9]+$/ || !b) { 346 | return 0; 347 | } 348 | 349 | m = 0; 350 | while ((l = ++m * a) % b); 351 | 352 | return l; 353 | } 354 | 355 | ## usage: calc_e() 356 | ## approximates e by calculating the sumation from k=0 to k=50 of 1/k! 357 | ## returns 10 decimal places 358 | function calc_e(lim, e, k, i, f) { 359 | for (k=0; k<=50; k++) { 360 | # calculate factorial 361 | f = 1; 362 | for (i=1; i<=k; i++) { 363 | f = f * i; 364 | } 365 | 366 | # add to e 367 | e += 1 / f; 368 | } 369 | 370 | return sprintf("%0.10f", e); 371 | } 372 | 373 | 374 | ## usage: calc_pi() 375 | ## returns pi, with an accuracy of 10 decimal places 376 | function calc_pi() { 377 | return sprintf("%0.10f", atan2(0, -1)); 378 | } 379 | 380 | ## usage: calc_tau() 381 | ## returns tau, with an accuracy of 10 decimal places 382 | function calc_tau() { 383 | return sprintf("%0.10f", 2 * atan2(0, -1)); 384 | } 385 | 386 | ## usage: deg_to_rad(degrees) 387 | ## converts degrees to radians 388 | function deg_to_rad(deg, tau) { 389 | tau = 8 * atan2(1,1); 390 | 391 | return (deg/360) * tau; 392 | } 393 | 394 | ## usage: rad_to_deg(radians) 395 | ## converts radians to degrees 396 | function rad_to_deg(rad, tau) { 397 | tau = 8 * atan2(1,1); 398 | 399 | return (rad/tau) * 360; 400 | } 401 | 402 | ## usage: tan(expr) 403 | ## returns the tangent of expr, which is in radians 404 | function tan(ang) { 405 | return sin(ang)/cos(ang); 406 | } 407 | 408 | ## usage: csc(expr) 409 | ## returns the cosecant of expr, which is in radians 410 | function csc(ang) { 411 | return 1/sin(ang); 412 | } 413 | 414 | ## usage: sec(expr) 415 | ## returns the secant of expr, which is in radians 416 | function sec(ang) { 417 | return 1/cos(ang); 418 | } 419 | 420 | ## usage: cot(expr) 421 | ## returns the cotangent of expr, which is in radians 422 | function cot(ang) { 423 | return cos(ang)/sin(ang); 424 | } 425 | 426 | 427 | 428 | # You can do whatever you want with this stuff, but a thanks is always 429 | # appreciated 430 | -------------------------------------------------------------------------------- /msort.awk: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | # comparison function 4 | # usage: __compare(a, b, how) 5 | # compares "a" and "b" based on "how", returning 0 for false and 1 for true. 6 | # required for all of the msort() functions below 7 | function __compare(a, b, how) { 8 | # standard comparisons 9 | if (how == "std asc") { 10 | return a < b; 11 | } else if (how == "std desc") { 12 | return a > b; 13 | 14 | # force string comps 15 | } else if (how == "str asc") { 16 | return "a" a < "a" b; 17 | } else if (how == "str desc") { 18 | return "a" a > "a" b; 19 | 20 | # force numeric 21 | } else if (how == "num asc") { 22 | return +a < +b; 23 | } else if (how == "num desc") { 24 | return +a > +b; 25 | } 26 | } 27 | 28 | # actual sorting function 29 | # usage: __mergesort(array, len, how) 30 | # sorts the values in "array" in-place, from indices 1 to "len", based 31 | # on the comparison mode "how" (see the msort() description). 32 | # required for all of the msort() functions below 33 | function __mergesort(array, len, how, 34 | tmpa, alen, a, tmpb, blen, b, half, cur, pos, tmp) { 35 | # if there are 10 elements or less, use an insertion sort and return 36 | if (len <= 10) { 37 | # loop over each item, starting with the second 38 | for (cur=2; cur<=len; cur++) { 39 | pos = cur; 40 | # shift the item down the list into position 41 | while (pos > 1 && __compare(array[pos], array[pos-1], how)) { 42 | tmp = array[pos]; 43 | array[pos] = array[pos-1]; 44 | array[pos-1] = tmp; 45 | 46 | pos--; 47 | } 48 | } 49 | 50 | # return 51 | return len; 52 | } 53 | 54 | # determine the halfway point of the indices 55 | half = int(len / 2); 56 | 57 | # create temp arrays of the two halves 58 | a = 0; 59 | for (i=1; i<=half; i++) { 60 | tmpa[++a] = array[i]; 61 | 62 | # remove the index from the original array 63 | delete array[i]; 64 | } 65 | b = 0; 66 | for (i=half+1; i<=len; i++) { 67 | tmpb[++b] = array[i]; 68 | 69 | # remove the index from the original array 70 | delete array[i]; 71 | } 72 | 73 | # sort the two halves with recursive calls 74 | alen = __mergesort(tmpa, a, how); 75 | blen = __mergesort(tmpb, b, how); 76 | 77 | # merge the two halves 78 | len = 0; 79 | a = b = 1; 80 | # loop while there is still an element in either array 81 | while (a <= alen || b <= blen) { 82 | # a sorts first 83 | if (a <= alen && (b > blen || __compare(tmpa[a], tmpb[b], how))) { 84 | array[++len] = tmpa[a]; 85 | delete tmpa[a++]; # remove the index from the temp array 86 | 87 | # b sorts first 88 | } else { 89 | array[++len] = tmpb[b]; 90 | delete tmpb[b++]; # remove the index from the temp array 91 | } 92 | } 93 | 94 | # return the length 95 | return len; 96 | } 97 | 98 | # actual sorting function for the msortv() function 99 | # usage: __mergesortv(array, values, len, how) 100 | # sorts the values in "array" on the original values in "values", from indices 101 | # 1 through "len", based on the comparison mode "how" (see the msortv() 102 | # description). required for all of the msortv() functions below 103 | function __mergesortv(array, values, len, how, 104 | tmpa, tmpva, alen, a, tmpb, tmpvb, blen, b, 105 | half, cur, pos, tmp) { 106 | # if there are 10 elements or less, use an insertion sort and return 107 | if (len <= 10) { 108 | # loop over each item, starting with the second 109 | for (cur=2; cur<=len; cur++) { 110 | pos = cur; 111 | # shift the item down the list into position 112 | while (pos > 1 && __compare(values[pos], values[pos-1], how)) { 113 | tmp = array[pos]; 114 | array[pos] = array[pos-1]; 115 | array[pos-1] = tmp; 116 | tmp = values[pos]; 117 | values[pos] = values[pos-1]; 118 | values[pos-1] = tmp; 119 | 120 | pos--; 121 | } 122 | } 123 | 124 | # return 125 | return len; 126 | } 127 | 128 | # determine the halfway point of the indices 129 | half = int(len / 2); 130 | 131 | # create temp arrays of the two halves 132 | a = 0; 133 | for (i=1; i<=half; i++) { 134 | tmpa[++a] = array[i]; 135 | tmpva[a] = values[i]; 136 | 137 | # remove the index from the original array 138 | delete array[i]; 139 | } 140 | b = 0; 141 | for (i=half+1; i<=len; i++) { 142 | tmpb[++b] = array[i]; 143 | tmpvb[b] = values[i]; 144 | 145 | # remove the index from the original array 146 | delete array[i]; 147 | } 148 | 149 | # sort the two halves with recursive calls 150 | alen = __mergesortv(tmpa, tmpva, a, how); 151 | blen = __mergesortv(tmpb, tmpvb, b, how); 152 | 153 | # merge the two halves 154 | len = 0; 155 | a = b = 1; 156 | # loop while there is still an element in either array 157 | while (a <= alen || b <= blen) { 158 | # a sorts first 159 | if (a <= alen && (b > blen || __compare(tmpva[a], tmpvb[b], how))) { 160 | array[++len] = tmpa[a]; 161 | values[len] = tmpva[a]; 162 | delete tmpva[a]; 163 | delete tmpa[a++]; # remove the index from the temp array 164 | 165 | # b sorts first 166 | } else { 167 | array[++len] = tmpb[b]; 168 | values[len] = tmpvb[b]; 169 | delete tmpvb[b]; 170 | delete tmpb[b++]; # remove the index from the temp array 171 | } 172 | } 173 | 174 | # return the length 175 | return len; 176 | } 177 | 178 | 179 | 180 | ## usage: msort(s, d [, how]) 181 | ## sorts the elements in the array "s" using awk's normal rules for comparing 182 | ## values, creating a new sorted array "d" indexed with sequential integers 183 | ## starting with 1. returns the length, or -1 if an error occurs.. leaves the 184 | ## indices of the source array "s" unchanged. the optional string "how" controls 185 | ## the direction and the comparison mode. uses the merge sort algorithm, with an 186 | ## insertion sort when the list size gets small enough. this is not a stable 187 | ## sort. requires the __compare() and __mergesort() functions. 188 | ## valid values for "how" are: 189 | ## "std asc" 190 | ## use awk's standard rules for comparison, ascending. this is the default 191 | ## "std desc" 192 | ## use awk's standard rules for comparison, descending. 193 | ## "str asc" 194 | ## force comparison as strings, ascending. 195 | ## "str desc" 196 | ## force comparison as strings, descending. 197 | ## "num asc" 198 | ## force a numeric comparison, ascending. 199 | ## "num desc" 200 | ## force a numeric comparison, descending. 201 | function msort(array, out, how, count, i) { 202 | # make sure how is correct 203 | if (length(how)) { 204 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 205 | return -1; 206 | } 207 | 208 | # how was not passed, use the default 209 | } else { 210 | how = "std asc"; 211 | } 212 | 213 | # loop over each index, and generate a new array with the same values and 214 | # sequential indices 215 | count = 0; 216 | for (i in array) { 217 | out[++count] = array[i]; 218 | } 219 | 220 | # actually sort 221 | return __mergesort(out, count, how); 222 | } 223 | 224 | ## usage: imsort(s [, how]) 225 | ## the bevavior is the same as that of msort(), except that the array "s" is 226 | ## sorted in-place. the original indices are destroyed and replaced with 227 | ## sequential integers. everything else is described in msort() above. 228 | function imsort(array, how, tmp, count, i) { 229 | # make sure how is correct 230 | if (length(how)) { 231 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 232 | return -1; 233 | } 234 | 235 | # how was not passed, use the default 236 | } else { 237 | how = "std asc"; 238 | } 239 | 240 | # loop over each index, and generate a new array with the same values and 241 | # sequential indices 242 | count = 0; 243 | for (i in array) { 244 | tmp[++count] = array[i]; 245 | delete array[i]; 246 | } 247 | 248 | # copy tmp back over array 249 | for (i=1; i<=count; i++) { 250 | array[i] = tmp[i]; 251 | delete tmp[i]; 252 | } 253 | 254 | # actually sort 255 | return __mergesort(array, count, how); 256 | } 257 | 258 | ## usage: msorti(s, d [, how]) 259 | ## the behavior is the same as that of msort(), except that the array indices 260 | ## are used for sorting, not the array values. when done, the new array is 261 | ## indexed numerically, and the values are those of the original indices. 262 | ## everything else is described in msort() above. 263 | function msorti(array, out, how, count, i) { 264 | # make sure how is correct 265 | if (length(how)) { 266 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 267 | return -1; 268 | } 269 | 270 | # how was not passed, use the default 271 | } else { 272 | how = "std asc"; 273 | } 274 | 275 | # loop over each index, and generate a new array with the original indices 276 | # mapped to new numeric ones 277 | count = 0; 278 | for (i in array) { 279 | out[++count] = i; 280 | } 281 | 282 | # actually sort 283 | return __mergesort(out, count, how); 284 | } 285 | 286 | ## usage: imsorti(s [, how]) 287 | ## the bevavior is the same as that of msorti(), except that the array "s" is 288 | ## sorted in-place. the original indices are destroyed and replaced with 289 | ## sequential integers. everything else is described in msort() and msorti() 290 | ## above. 291 | function imsorti(array, how, tmp, count, i) { 292 | # make sure how is correct 293 | if (length(how)) { 294 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 295 | return -1; 296 | } 297 | 298 | # how was not passed, use the default 299 | } else { 300 | how = "std asc"; 301 | } 302 | 303 | # loop over each index, and generate a new array with the original indices 304 | # mapped to new numeric ones 305 | count = 0; 306 | for (i in array) { 307 | tmp[++count] = i; 308 | delete array[i]; 309 | } 310 | 311 | # copy tmp back over the original array 312 | for (i=1; i<=count; i++) { 313 | array[i] = tmp[i]; 314 | delete tmp[i]; 315 | } 316 | 317 | # actually sort 318 | return __mergesort(array, count, how); 319 | } 320 | 321 | ## usage: msortv(s, d [, how]) 322 | ## sorts the indices in the array "s" based on the values, creating a new 323 | ## sorted array "d" indexed with sequential integers starting with 1, and the 324 | ## values the indices of "s". returns the length, or -1 if an error occurs. 325 | ## leaves the source array "s" unchanged. the optional string "how" controls 326 | ## the direction and the comparison mode. uses the merge sort algorithm, with 327 | ## an insertion sort when the list size gets small enough. this is not a stable 328 | ## sort. requires the __compare() and __mergesortv() functions. valid values for 329 | ## "how" are explained in the msort() function above. 330 | function msortv(array, out, how, values, count, i) { 331 | # make sure how is correct 332 | if (length(how)) { 333 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 334 | return -1; 335 | } 336 | 337 | # how was not passed, use the default 338 | } else { 339 | how = "std asc"; 340 | } 341 | 342 | # loop over each index, and generate two new arrays: the original indices 343 | # mapped to numeric ones, and the values mapped to the same indices 344 | count = 0; 345 | for (i in array) { 346 | count++; 347 | out[count] = i; 348 | values[count] = array[i]; 349 | } 350 | 351 | # actually sort 352 | return __mergesortv(out, values, count, how); 353 | } 354 | 355 | 356 | 357 | # You can do whatever you want with this stuff, but a thanks is always 358 | # appreciated 359 | -------------------------------------------------------------------------------- /options.awk: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | ## usage: getopts(optstring [, longopt_array ]) 4 | ## Parses options, and deletes them from ARGV. "optstring" is of the form 5 | ## "ab:c". Each letter is a possible option. If the letter is followed by a 6 | ## colon (:), then the option requires an argument. If an argument is not 7 | ## provided, or an invalid option is given, getopts will print the appropriate 8 | ## error message and return "?". Returns each option as it's read, and -1 when 9 | ## no options are left. "optind" will be set to the index of the next 10 | ## non-option argument when finished. "optarg" will be set to the option's 11 | ## argument, when provided. If not provided, "optarg" will be empty. "optname" 12 | ## will be set to the current option, as provided. Getopts will delete each 13 | ## option and argument that it successfully reads, so awk will be able to treat 14 | ## whatever's left as filenames/assignments, as usual. If provided, 15 | ## "longopt_array" is the name of an associative array that maps long options to 16 | ## the appropriate short option (do not include the hyphens on either). 17 | ## Sample usage can be found in the examples dir, with gawk extensions, or in 18 | ## the ogrep script for a POSIX example: https://github.com/e36freak/ogrep 19 | function getopts(optstring, longarr, opt, trimmed, hasarg, repeat) { 20 | hasarg = repeat = 0; 21 | optarg = ""; 22 | # increment optind 23 | optind++; 24 | 25 | # return -1 if the current arg is not an option or there are no args left 26 | if (ARGV[optind] !~ /^-/ || optind >= ARGC) { 27 | return -1; 28 | } 29 | 30 | # if option is "--" (end of options), delete arg and return -1 31 | if (ARGV[optind] == "--") { 32 | for (i=1; i<=optind; i++) { 33 | delete ARGV[i]; 34 | } 35 | return -1; 36 | } 37 | 38 | # if the option is a long argument... 39 | if (ARGV[optind] ~ /^--/) { 40 | # trim hyphens 41 | trimmed = substr(ARGV[optind], 3); 42 | # if of the format --foo=bar, split the two. assign "bar" to optarg and 43 | # set hasarg to 1 44 | if (trimmed ~ /=/) { 45 | optarg = trimmed; 46 | sub(/=.*/, "", trimmed); sub(/^[^=]*=/, "", optarg); 47 | hasarg = 1; 48 | } 49 | 50 | # invalid long opt 51 | if (!(trimmed in longarr)) { 52 | printf("unrecognized option -- '%s'\n", ARGV[optind]) > "/dev/stderr"; 53 | return "?"; 54 | } 55 | 56 | opt = longarr[trimmed]; 57 | # set optname by prepending dashes to the trimmed argument 58 | optname = "--" trimmed; 59 | 60 | # otherwise, it is a short option 61 | } else { 62 | # remove the hyphen, and get just the option letter 63 | opt = substr(ARGV[optind], 2, 1); 64 | # set trimmed to whatevers left 65 | trimmed = substr(ARGV[optind], 3); 66 | 67 | # invalid option 68 | if (!index(optstring, opt)) { 69 | printf("invalid option -- '%s'\n", opt) > "/dev/stderr"; 70 | return "?"; 71 | } 72 | 73 | # if there is more to the argument than just -o 74 | if (length(trimmed)) { 75 | # if option requires an argument, set the rest to optarg and hasarg to 1 76 | if (index(optstring, opt ":")) { 77 | optarg = trimmed; 78 | hasarg = 1; 79 | 80 | # otherwise, prepend a hyphen to the rest and set repeat to 1, so the 81 | # same arg is processed again without the first option 82 | } else { 83 | ARGV[optind] = "-" trimmed; 84 | repeat = 1; 85 | } 86 | } 87 | 88 | # set optname by prepending a hypen to opt 89 | optname = "-" opt; 90 | } 91 | 92 | # if the option requires an arg and hasarg is 0 93 | if (index(optstring, opt ":") && !hasarg) { 94 | # increment optind, check if no arguments are left 95 | if (++optind >= ARGC) { 96 | printf("option requires an argument -- '%s'\n", optname) > "/dev/stderr"; 97 | return "?"; 98 | } 99 | 100 | # set optarg 101 | optarg = ARGV[optind]; 102 | 103 | # if repeat is set, decrement optind so we process the same arg again 104 | # mutually exclusive to needing an argument, otherwise hasarg would be set 105 | } else if (repeat) { 106 | optind--; 107 | } 108 | 109 | # delete all arguments up to this point, just to make sure 110 | for (i=1; i<=optind; i++) { 111 | delete ARGV[i]; 112 | } 113 | 114 | # return the option letter 115 | return opt; 116 | } 117 | 118 | 119 | 120 | # You can do whatever you want with this stuff, but a thanks is always 121 | # appreciated 122 | -------------------------------------------------------------------------------- /psort.awk: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | # comparison function for the *psort* functions 4 | # usage: __pcompare(a, b, patterns, max, how) 5 | # compares "a" and "b" based on "patterns" and "how", returning 0 for false and 6 | # 1 for true. "patterns" is an indexed array of regexes, from 1 through "max". 7 | # each regex takes priority over subsequent regexes, followed by non-matching 8 | # values. required for all of the psort() functions below 9 | function __pcompare(a, b, pattens, plen, how, p) { 10 | # loop over each regex in order, and check if either value matches 11 | for (p=1; p<=plen; p++) { 12 | # if the first matches... 13 | if (a ~ p) { 14 | # check if the second also matches. if so, do a normal comparison 15 | if (b ~ p) { 16 | # standard comparisons 17 | if (how == "std asc") { 18 | return a < b; 19 | } else if (how == "std desc") { 20 | return a > b; 21 | 22 | # force string comps 23 | } else if (how == "str asc") { 24 | return "a" a < "a" b; 25 | } else if (how == "str desc") { 26 | return "a" a > "a" b; 27 | 28 | # force numeric 29 | } else if (how == "num asc") { 30 | return +a < +b; 31 | } else if (how == "num desc") { 32 | return +a > +b; 33 | } 34 | 35 | # if the second doesn't match, the first sorts higher 36 | } else { 37 | return 1; 38 | } 39 | 40 | # if the second matches but the first didn't, the second sorts higher 41 | } else if (b ~ p) { 42 | return 0; 43 | } 44 | } 45 | 46 | # no patterns matched, do a normal comparison 47 | return __compare(a, b, how); 48 | } 49 | 50 | 51 | # actual sorting function for the *psort* functions 52 | # sorts the values in "array" in-place, from indices "left" to "right", based 53 | # on "how" and the array "patterns" (see the psort() description) 54 | # required for all of the psort() functions below 55 | function __pquicksort(array, left, right, patterns, plen, how, 56 | piv, mid, tmp) { 57 | # return if array contains one element or less 58 | if ((right - left) <= 0) { 59 | return; 60 | } 61 | 62 | # choose random pivot 63 | piv = int(rand() * (right - left + 1)) + left; 64 | 65 | # swap left and pivot 66 | tmp = array[piv]; 67 | array[piv] = array[left]; 68 | array[left] = tmp; 69 | 70 | mid = left; 71 | # iterate over each element from the second to the last, and compare 72 | for (piv=left+1; piv<=right; piv++) { 73 | # if the comparison based on "how" is true... 74 | if (__pcompare(array[piv], array[left], patterns, plen, how)) { 75 | # increment mid 76 | mid++; 77 | 78 | # swap mid and pivot 79 | tmp = array[piv]; 80 | array[piv] = array[mid]; 81 | array[mid] = tmp; 82 | } 83 | } 84 | 85 | # swap left and mid 86 | tmp = array[mid]; 87 | array[mid] = array[left]; 88 | array[left] = tmp; 89 | 90 | # recursively sort the two halves 91 | __pquicksort(array, left, mid - 1, patterns, plen, how); 92 | __pquicksort(array, mid + 1, right, patterns, plen, how); 93 | } 94 | 95 | 96 | ## usage: psort(s, d, patts, max [, how]) 97 | ## sorts the values of the array "s", based on the rules below. creates a new 98 | ## sorted array "d" indexed with sequential integers starting with 1. "patts" 99 | ## is a compact (*non-sparse) 1-indexed array containing regular expressions. 100 | ## "max" is the length of the "patts" array. returns the length of the "d" 101 | ## array. valid values for "how" are explained below. uses the quicksort 102 | ## algorithm, with a random pivot to avoid worst-case behavior on already sorted 103 | ## arrays. requires the __pcompare() and __pquicksort() functions. 104 | ## 105 | ## Sorting rules: 106 | ## - When sorting, values matching an expression in the "patts" array will 107 | ## take priority over any other values 108 | ## - Each expression in the "patts" array will have priority in ascending 109 | ## order by index. "patts[1]" will have priority over "patts[2]" and 110 | ## "patts[3]", etc 111 | ## - Values both matching the same regex will be compared as usual 112 | ## - All non-matching values will be compared as usual 113 | ## 114 | ## valid values for "how" are: 115 | ## "std asc" 116 | ## use awk's standard rules for comparison, ascending. this is the default 117 | ## "std desc" 118 | ## use awk's standard rules for comparison, descending. 119 | ## "str asc" 120 | ## force comparison as strings, ascending. 121 | ## "str desc" 122 | ## force comparison as strings, descending. 123 | ## "num asc" 124 | ## force a numeric comparison, ascending. 125 | ## "num desc" 126 | ## force a numeric comparison, descending. 127 | function psort(array, out, patterns, plen, how, count, i) { 128 | # make sure how is correct 129 | if (length(how)) { 130 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 131 | return -1; 132 | } 133 | 134 | # how was not passed, use the default 135 | } else { 136 | how = "std asc"; 137 | } 138 | 139 | # loop over each index, and generate a new array with the same values and 140 | # sequential indices 141 | count = 0; 142 | for (i in array) { 143 | out[++count] = array[i]; 144 | } 145 | 146 | # seed the random number generator 147 | srand(); 148 | 149 | # actually sort 150 | __pquicksort(out, 1, count, patterns, plen, how); 151 | 152 | # return the length 153 | return count; 154 | } 155 | 156 | ## usage: ipsort(s, patts, max [, how]) 157 | ## the bevavior is the same as that of psort(), except that the array "s" is 158 | ## sorted in-place. the original indices are destroyed and replaced with 159 | ## sequential integers. everything else is described in psort() above. 160 | function ipsort(array, patterns, plen, how, tmp, count, i) { 161 | # make sure how is correct 162 | if (length(how)) { 163 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 164 | return -1; 165 | } 166 | 167 | # how was not passed, use the default 168 | } else { 169 | how = "std asc"; 170 | } 171 | 172 | # loop over each index, and generate a new array with the same values and 173 | # sequential indices 174 | count = 0; 175 | for (i in array) { 176 | tmp[++count] = array[i]; 177 | delete array[i]; 178 | } 179 | 180 | # copy tmp back over array 181 | for (i=1; i<=count; i++) { 182 | array[i] = tmp[i]; 183 | delete tmp[i]; 184 | } 185 | 186 | # seed the random number generator 187 | srand(); 188 | 189 | # actually sort 190 | __pquicksort(array, 1, count, patterns, plen, how); 191 | 192 | # return the length 193 | return count; 194 | } 195 | 196 | ## usage: psorti(s, d, patts, max [, how]) 197 | ## the behavior is the same as that of psort(), except that the array indices 198 | ## are used for sorting, not the array values. when done, the new array is 199 | ## indexed numerically, and the values are those of the original indices. 200 | ## everything else is described in psort() above. 201 | function psorti(array, out, patterns, plen, how, count, i) { 202 | # make sure how is correct 203 | if (length(how)) { 204 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 205 | return -1; 206 | } 207 | 208 | # how was not passed, use the default 209 | } else { 210 | how = "std asc"; 211 | } 212 | 213 | # loop over each index, and generate a new array with the original indices 214 | # mapped to new numeric ones 215 | count = 0; 216 | for (i in array) { 217 | out[++count] = i; 218 | } 219 | 220 | # seed the random number generator 221 | srand(); 222 | 223 | # actually sort 224 | __pquicksort(out, 1, count, patterns, plen, how); 225 | 226 | # return the length 227 | return count; 228 | } 229 | 230 | ## usage: ipsorti(s, patts, max [, how]) 231 | ## the bevavior is the same as that of psorti(), except that the array "s" is 232 | ## sorted in-place. the original indices are destroyed and replaced with 233 | ## sequential integers. everything else is described in psort() and psorti() 234 | ## above. 235 | function ipsorti(array, patterns, plen, how, tmp, count, i) { 236 | # make sure how is correct 237 | if (length(how)) { 238 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 239 | return -1; 240 | } 241 | 242 | # how was not passed, use the default 243 | } else { 244 | how = "std asc"; 245 | } 246 | 247 | # loop over each index, and generate a new array with the original indices 248 | # mapped to new numeric ones 249 | count = 0; 250 | for (i in array) { 251 | tmp[++count] = i; 252 | delete array[i]; 253 | } 254 | 255 | # copy tmp back over the original array 256 | for (i=1; i<=count; i++) { 257 | array[i] = tmp[i]; 258 | delete tmp[i]; 259 | } 260 | 261 | # seed the random number generator 262 | srand(); 263 | 264 | # actually sort 265 | __pquicksort(array, 1, count, patterns, plen, how); 266 | 267 | # return the length 268 | return count; 269 | } 270 | 271 | 272 | # You can do whatever you want with this stuff, but a thanks is always 273 | # appreciated 274 | -------------------------------------------------------------------------------- /qsort.awk: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | # comparison function 4 | # usage: __compare(a, b, how) 5 | # compares "a" and "b" based on "how", returning 0 for false and 1 for true. 6 | # required for all of the qsort() functions below 7 | function __compare(a, b, how) { 8 | # standard comparisons 9 | if (how == "std asc") { 10 | return a < b; 11 | } else if (how == "std desc") { 12 | return a > b; 13 | 14 | # force string comps 15 | } else if (how == "str asc") { 16 | return "a" a < "a" b; 17 | } else if (how == "str desc") { 18 | return "a" a > "a" b; 19 | 20 | # force numeric 21 | } else if (how == "num asc") { 22 | return +a < +b; 23 | } else if (how == "num desc") { 24 | return +a > +b; 25 | } 26 | } 27 | 28 | # actual sorting function 29 | # sorts the values in "array" in-place, from indices "left" to "right", based 30 | # on the comparison mode "how" (see the qsort() description). 31 | # required for all of the qsort() functions below 32 | function __quicksort(array, left, right, how, piv, mid, tmp) { 33 | # return if array contains one element or less 34 | if ((right - left) <= 0) { 35 | return; 36 | } 37 | 38 | # choose random pivot 39 | piv = int(rand() * (right - left + 1)) + left; 40 | 41 | # swap left and pivot 42 | tmp = array[piv]; 43 | array[piv] = array[left]; 44 | array[left] = tmp; 45 | 46 | mid = left; 47 | # iterate over each element from the second to the last, and compare 48 | for (piv=left+1; piv<=right; piv++) { 49 | # if the comparison based on "how" is true... 50 | if (__compare(array[piv], array[left], how)) { 51 | # increment mid 52 | mid++; 53 | 54 | # swap mid and pivot 55 | tmp = array[piv]; 56 | array[piv] = array[mid]; 57 | array[mid] = tmp; 58 | } 59 | } 60 | 61 | # swap left and mid 62 | tmp = array[mid]; 63 | array[mid] = array[left]; 64 | array[left] = tmp; 65 | 66 | # recursively sort the two halves 67 | __quicksort(array, left, mid - 1, how); 68 | __quicksort(array, mid + 1, right, how); 69 | } 70 | 71 | # actual sorting function for the qsortv() function 72 | # sorts the indices in "array" on the original values in "values", from indices 73 | # "left" to "right", based on the comparison mode "how" (see the qsortv() 74 | # description) 75 | # required for the qsortv() function below 76 | function __vquicksort(array, values, left, right, how, piv, mid, tmp) { 77 | # return if array contains one element or less 78 | if ((right - left) <= 0) { 79 | return; 80 | } 81 | 82 | # choose random pivot 83 | piv = int(rand() * (right - left + 1)) + left; 84 | 85 | # swap left and pivot 86 | tmp = array[piv]; 87 | array[piv] = array[left]; 88 | array[left] = tmp; 89 | tmp = values[piv]; 90 | values[piv] = values[left]; 91 | values[left] = tmp; 92 | 93 | mid = left; 94 | # iterate over each element from the second to the last, and compare 95 | for (piv=left+1; piv<=right; piv++) { 96 | # if the comparison based on "how" is true... 97 | if (__compare(values[piv], values[left], how)) { 98 | # increment mid 99 | mid++; 100 | 101 | # swap mid and pivot 102 | tmp = array[piv]; 103 | array[piv] = array[mid]; 104 | array[mid] = tmp; 105 | tmp = values[piv]; 106 | values[piv] = values[mid]; 107 | values[mid] = tmp; 108 | } 109 | } 110 | 111 | # swap left and mid 112 | tmp = array[mid]; 113 | array[mid] = array[left]; 114 | array[left] = tmp; 115 | tmp = values[mid]; 116 | values[mid] = values[left]; 117 | values[left] = tmp; 118 | 119 | # recursively sort the two halves 120 | __vquicksort(array, values, left, mid - 1, how); 121 | __vquicksort(array, values, mid + 1, right, how); 122 | } 123 | 124 | 125 | 126 | ## usage: qsort(s, d [, how]) 127 | ## sorts the elements in the array "s" using awk's normal rules for comparing 128 | ## values, creating a new sorted array "d" indexed with sequential integers 129 | ## starting with 1. returns the length, or -1 if an error occurs.. leaves the 130 | ## indices of the source array "s" unchanged. the optional string "how" controls 131 | ## the direction and the comparison mode. uses the quick sort algorithm, with a 132 | ## random pivot to avoid worst-case behavior on already sorted arrays. this is 133 | ## not a stable sort. requires the __compare() and __quicksort() functions. 134 | ## valid values for "how" are: 135 | ## "std asc" 136 | ## use awk's standard rules for comparison, ascending. this is the default 137 | ## "std desc" 138 | ## use awk's standard rules for comparison, descending. 139 | ## "str asc" 140 | ## force comparison as strings, ascending. 141 | ## "str desc" 142 | ## force comparison as strings, descending. 143 | ## "num asc" 144 | ## force a numeric comparison, ascending. 145 | ## "num desc" 146 | ## force a numeric comparison, descending. 147 | function qsort(array, out, how, count, i) { 148 | # make sure how is correct 149 | if (length(how)) { 150 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 151 | return -1; 152 | } 153 | 154 | # how was not passed, use the default 155 | } else { 156 | how = "std asc"; 157 | } 158 | 159 | # loop over each index, and generate a new array with the same values and 160 | # sequential indices 161 | count = 0; 162 | for (i in array) { 163 | out[++count] = array[i]; 164 | } 165 | 166 | # seed the random number generator 167 | srand(); 168 | 169 | # actually sort 170 | __quicksort(out, 1, count, how); 171 | 172 | # return the length 173 | return count; 174 | } 175 | 176 | ## usage: iqsort(s [, how]) 177 | ## the bevavior is the same as that of qsort(), except that the array "s" is 178 | ## sorted in-place. the original indices are destroyed and replaced with 179 | ## sequential integers. everything else is described in qsort() above. 180 | function iqsort(array, how, tmp, count, i) { 181 | # make sure how is correct 182 | if (length(how)) { 183 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 184 | return -1; 185 | } 186 | 187 | # how was not passed, use the default 188 | } else { 189 | how = "std asc"; 190 | } 191 | 192 | # loop over each index, and generate a new array with the same values and 193 | # sequential indices 194 | count = 0; 195 | for (i in array) { 196 | tmp[++count] = array[i]; 197 | delete array[i]; 198 | } 199 | 200 | # copy tmp back over array 201 | for (i=1; i<=count; i++) { 202 | array[i] = tmp[i]; 203 | delete tmp[i]; 204 | } 205 | 206 | # seed the random number generator 207 | srand(); 208 | 209 | # actually sort 210 | __quicksort(array, 1, count, how); 211 | 212 | # return the length 213 | return count; 214 | } 215 | 216 | ## usage: qsorti(s, d [, how]) 217 | ## the behavior is the same as that of qsort(), except that the array indices 218 | ## are used for sorting, not the array values. when done, the new array is 219 | ## indexed numerically, and the values are those of the original indices. 220 | ## everything else is described in qsort() above. 221 | function qsorti(array, out, how, count, i) { 222 | # make sure how is correct 223 | if (length(how)) { 224 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 225 | return -1; 226 | } 227 | 228 | # how was not passed, use the default 229 | } else { 230 | how = "std asc"; 231 | } 232 | 233 | # loop over each index, and generate a new array with the original indices 234 | # mapped to new numeric ones 235 | count = 0; 236 | for (i in array) { 237 | out[++count] = i; 238 | } 239 | 240 | # seed the random number generator 241 | srand(); 242 | 243 | # actually sort 244 | __quicksort(out, 1, count, how); 245 | 246 | # return the length 247 | return count; 248 | } 249 | 250 | ## usage: iqsorti(s [, how]) 251 | ## the bevavior is the same as that of qsorti(), except that the array "s" is 252 | ## sorted in-place. the original indices are destroyed and replaced with 253 | ## sequential integers. everything else is described in qsort() and qsorti() 254 | ## above. 255 | function iqsorti(array, how, tmp, count, i) { 256 | # make sure how is correct 257 | if (length(how)) { 258 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 259 | return -1; 260 | } 261 | 262 | # how was not passed, use the default 263 | } else { 264 | how = "std asc"; 265 | } 266 | 267 | # loop over each index, and generate a new array with the original indices 268 | # mapped to new numeric ones 269 | count = 0; 270 | for (i in array) { 271 | tmp[++count] = i; 272 | delete array[i]; 273 | } 274 | 275 | # copy tmp back over the original array 276 | for (i=1; i<=count; i++) { 277 | array[i] = tmp[i]; 278 | delete tmp[i]; 279 | } 280 | 281 | # seed the random number generator 282 | srand(); 283 | 284 | # actually sort 285 | __quicksort(array, 1, count, how); 286 | 287 | # return the length 288 | return count; 289 | } 290 | 291 | ## usage: qsortv(s, d [, how]) 292 | ## sorts the indices in the array "s" based on the values, creating a new 293 | ## sorted array "d" indexed with sequential integers starting with 1, and the 294 | ## values the indices of "s". returns the length, or -1 if an error occurs. 295 | ## leaves the source array "s" unchanged. the optional string "how" controls 296 | ## the direction and the comparison mode. uses the quicksort algorithm, with a 297 | ## random pivot to avoid worst-case behavior on already sorted arrays. this is 298 | ## not a stable sort. requires the __compare() and __vquicksort() functions. 299 | ## valid values for "how" are explained in the qsort() function above. 300 | function qsortv(array, out, how, values, count, i) { 301 | # make sure how is correct 302 | if (length(how)) { 303 | if (how !~ /^(st[rd]|num) (a|de)sc$/) { 304 | return -1; 305 | } 306 | 307 | # how was not passed, use the default 308 | } else { 309 | how = "std asc"; 310 | } 311 | 312 | # loop over each index, and generate two new arrays: the original indices 313 | # mapped to numeric ones, and the values mapped to the same indices 314 | count = 0; 315 | for (i in array) { 316 | count++; 317 | out[count] = i; 318 | values[count] = array[i]; 319 | } 320 | 321 | # seed the random number generator 322 | srand(); 323 | 324 | # actually sort 325 | __vquicksort(out, values, 1, count, how); 326 | 327 | # return the length 328 | return count; 329 | } 330 | 331 | 332 | 333 | # You can do whatever you want with this stuff, but a thanks is always 334 | # appreciated 335 | -------------------------------------------------------------------------------- /shuf.awk: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | # actual shuffle function 4 | # shuffles the values in "array" in-place, from indices "left" to "right". 5 | # required for all of the shuf() functions below 6 | function __shuffle(array, left, right, r, i, tmp) { 7 | # loop backwards over the elements 8 | for (i=right; i>left; i--) { 9 | # generate a random number between the start and current element 10 | r = int(rand() * (i - left + 1)) + left; 11 | 12 | # swap current element and randomly generated one 13 | tmp = array[i]; 14 | array[i] = array[r]; 15 | array[r] = tmp; 16 | } 17 | } 18 | 19 | 20 | 21 | ## usage: shuf(s, d) 22 | ## shuffles the array "s", creating a new shuffled array "d" indexed with 23 | ## sequential integers starting with one. returns the length, or -1 if an error 24 | ## occurs. leaves the indices of the source array "s" unchanged. uses the knuth- 25 | ## fisher-yates algorithm. requires the __shuffle() function. 26 | function shuf(array, out, count, i) { 27 | # loop over each index, and generate a new array with the same values and 28 | # sequential indices 29 | count = 0; 30 | for (i in array) { 31 | out[++count] = array[i]; 32 | } 33 | 34 | # seed the random number generator 35 | srand(); 36 | 37 | # actually shuffle 38 | __shuffle(out, 1, count); 39 | 40 | # return the length 41 | return count; 42 | } 43 | 44 | ## usage: ishuf(s) 45 | ## the behavior is the same as that of shuf(), except the array "s" is sorted 46 | ## in-place. the original indices are destroyed and replaced with sequential 47 | ## integers. everything else is described in shuf() above. 48 | function ishuf(array, tmp, count, i) { 49 | # loop over each index, and generate a new array with the same values and 50 | # sequential indices 51 | count = 0; 52 | for (i in array) { 53 | tmp[++count] = array[i]; 54 | delete array[i]; 55 | } 56 | 57 | # copy tmp back over array 58 | for (i=1; i<=count; i++) { 59 | array[i] = tmp[i]; 60 | delete tmp[i]; 61 | } 62 | 63 | # seed the random number generator 64 | srand(); 65 | 66 | # actually shuffle 67 | __shuffle(array, 1, count); 68 | 69 | # return the length 70 | return count; 71 | } 72 | 73 | ## usage: shufi(s, d) 74 | ## the bevavior is the same as that of shuf(), except that the array indices 75 | ## are shuffled, not the array values. when done, the new array is indexed 76 | ## numerically, and the values are those of the original indices. everything 77 | ## else is described in shuf() above. 78 | function shufi(array, out, count, i) { 79 | # loop over each index, and generate a new array with the original indices 80 | # mapped to new numeric ones 81 | count = 0; 82 | for (i in array) { 83 | out[++count] = i; 84 | } 85 | 86 | # seed the random number generator 87 | srand(); 88 | 89 | # actually shuffle 90 | __shuffle(out, 1, count); 91 | 92 | # return the length 93 | return count; 94 | } 95 | 96 | ## usage: ishufi(s) 97 | ## the behavior is tha same as that of shufi(), except that the array "s" is 98 | ## sorted in-place. the original indices are destroyed and replaced with 99 | ## sequential integers. everything else is describmed in shuf() and shufi() 100 | ## above. 101 | function ishufi(array, tmp, count, i) { 102 | # loop over each index, and generate a new array with the original indices 103 | # mapped to new numeric ones 104 | count = 0; 105 | for (i in array) { 106 | tmp[++count] = i; 107 | delete array[i]; 108 | } 109 | 110 | # copy tmp back over the original array 111 | for (i=1; i<=count; i++) { 112 | array[i] = tmp[i]; 113 | delete tmp[i]; 114 | } 115 | 116 | # seed the random number generator 117 | srand(); 118 | 119 | # actually shuffle 120 | __shuffle(array, 1, count); 121 | 122 | # return the length 123 | return count; 124 | } 125 | 126 | 127 | 128 | # You can do whatever you want with this stuff, but a thanks is always 129 | # appreciated 130 | -------------------------------------------------------------------------------- /strings.awk: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | # comparison function 4 | # compares "A" and "b" based on "how", returning 0 for false and 1 for true 5 | # required for all max() and min() functions below 6 | function __mcompare(a, b, how) { 7 | # standard comparison 8 | if (how == "std") { 9 | return a > b; 10 | 11 | # force string comp 12 | } else if (how == "str") { 13 | return "a" a > "a" b; 14 | 15 | # force numeric 16 | } else if (how == "num") { 17 | return +a > +b; 18 | } 19 | } 20 | 21 | 22 | 23 | ## usage: center(string [, width]) 24 | ## returns "string" centered based on "width". if "width" is not provided (or 25 | ## is 0), uses the width of the terminal, or 80 if standard output is not open 26 | ## on a terminal. 27 | ## note: does not check the length of the string. if it's wider than the 28 | ## terminal, it will not center lines other than the first. for best results, 29 | ## combine with fold() (see the cfold script in the examples directory for a 30 | ## script that does exactly this) 31 | function center(str, cols, off, cmd) { 32 | if (!cols) { 33 | # checks if stdout is a tty 34 | if (system("test -t 1")) { 35 | cols = 80; 36 | } else { 37 | cmd = "tput cols"; 38 | cmd | getline cols; 39 | close(cmd); 40 | } 41 | } 42 | 43 | off = int((cols/2) + (length(str)/2)); 44 | 45 | return sprintf("%*s", off, str); 46 | } 47 | 48 | ## usage: delete_arr(array) 49 | ## deletes every element in "array" 50 | function delete_arr(arr) { 51 | split("", arr); 52 | } 53 | 54 | ## usage: fold(string, sep [, width]) 55 | ## returns "string", wrapped, with lines broken on "sep" to "width" columns. 56 | ## "sep" is a list of characters to break at, similar to IFS in a POSIX shell. 57 | ## if "sep" is empty, wraps at exactly "width" characters. if "width" is not 58 | ## provided (or is 0), uses the width of the terminal, or 80 if standard output 59 | ## is not open on a terminal. 60 | ## note: currently, tabs are squeezed to a single space. this will be fixed 61 | function fold(str, sep, cols, out, cmd, i, len, chars, c, last, f, first) { 62 | if (!cols) { 63 | # checks if stdout is a tty 64 | if (system("test -t 1")) { 65 | cols = 80; 66 | } else { 67 | cmd = "tput cols"; 68 | cmd | getline cols; 69 | close(cmd); 70 | } 71 | } 72 | 73 | # squeeze tabs and newlines to spaces 74 | gsub(/[\t\n]/, " ", str); 75 | 76 | # if "sep" is empty, just fold on cols with substr 77 | if (!length(sep)) { 78 | len = length(str); 79 | 80 | out = substr(str, 1, cols); 81 | for (i=cols+1; i<=len; i+=cols) { 82 | out = out "\n" substr(str, i, cols); 83 | } 84 | 85 | return out; 86 | 87 | # otherwise, we have to loop over every character (can't split() on sep, it 88 | # would destroy the existing separators) 89 | } else { 90 | # split string into char array 91 | len = split(str, chars, ""); 92 | # set boolean, used to assign the first line differently 93 | first = 1; 94 | 95 | for (i=1; i<=len; i+=last) { 96 | f = 0; 97 | for (c=i+cols-1; c>=i; c--) { 98 | if (index(sep, chars[c])) { 99 | last = c - i + 1; 100 | f = 1; 101 | break; 102 | } 103 | } 104 | 105 | if (!f) { 106 | last = cols; 107 | } 108 | 109 | if (first) { 110 | out = substr(str, i, last); 111 | first = 0; 112 | } else { 113 | out = out "\n" substr(str, i, last); 114 | } 115 | } 116 | } 117 | 118 | # return the output 119 | return out; 120 | } 121 | 122 | ## usage: ssub(ere, repl [, in]) 123 | ## behaves like sub, except returns the result and doesn't modify the original 124 | function ssub(ere, repl, str) { 125 | # if "in" is not provided, use $0 126 | if (!length(str)) { 127 | str = $0; 128 | } 129 | 130 | # substitute 131 | sub(ere, repl, str); 132 | return str; 133 | } 134 | 135 | ## usage: sgsub(ere, repl [, in]) 136 | ## behaves like gsub, except returns the result and doesn't modify the original 137 | function sgsub(ere, repl, str) { 138 | # if "in" is not provided, use $0 139 | if (!length(str)) { 140 | str = $0; 141 | } 142 | 143 | # substitute 144 | gsub(ere, repl, str); 145 | return str; 146 | } 147 | 148 | ## usage: lsub(str, repl [, in]) 149 | ## substites the string "repl" in place of the first instance of "str" in the 150 | ## string "in" and returns the result. does not modify the original string. 151 | ## if "in" is not provided, uses $0. 152 | function lsub(str, rep, val, len, i) { 153 | # if "in" is not provided, use $0 154 | if (!length(val)) { 155 | val = $0; 156 | } 157 | 158 | # get the length of val, in order to know how much of the string to remove 159 | if (!(len = length(str))) { 160 | # if "str" is empty, just prepend "rep" and return 161 | val = rep val; 162 | return val; 163 | } 164 | 165 | # substitute val for rep 166 | if (i = index(val, str)) { 167 | val = substr(val, 1, i - 1) rep substr(val, i + len); 168 | } 169 | 170 | # return the result 171 | return val; 172 | } 173 | 174 | ## usage: glsub(str, repl [, in]) 175 | ## behaves like lsub, except it replaces all occurances of "str" 176 | function glsub(str, rep, val, out, len, i, a, l) { 177 | # if "in" is not provided, use $0 178 | if (!length(val)) { 179 | val = $0; 180 | } 181 | # empty the output string 182 | out = ""; 183 | 184 | # get the length of val, in order to know how much of the string to remove 185 | if (!(len = length(str))) { 186 | # if "str" is empty, adds "rep" between every character and returns 187 | l = split(val, a, ""); 188 | for (i=1; i<=l; i++) { 189 | out = out rep a[i]; 190 | } 191 | 192 | return out rep; 193 | } 194 | 195 | # loop while 'val' is in 'str' 196 | while (i = index(val, str)) { 197 | # append everything up to the search string, and the replacement, to out 198 | out = out substr(val, 1, i - 1) rep; 199 | # remove everything up to and including the first instance of str from val 200 | val = substr(val, i + len); 201 | } 202 | 203 | # append whatever is left in val to out and return 204 | return out val; 205 | } 206 | 207 | ## usage: shell_esc(string) 208 | ## returns the string escaped so that it can be used in a shell command 209 | function shell_esc(str) { 210 | gsub(/'/, "'\\''", str); 211 | 212 | return "'" str "'"; 213 | } 214 | 215 | ## usage: str_to_arr(string, array) 216 | ## converts string to an array, one char per element, 1-indexed 217 | ## returns the array length 218 | function str_to_arr(str, arr) { 219 | return split(str, arr, ""); 220 | } 221 | 222 | ## usage: extract_range(string, start, stop) 223 | ## extracts fields "start" through "stop" from "string", based on FS, with the 224 | ## original field separators intact. returns the extracted fields. 225 | function extract_range(str, start, stop, i, re, out) { 226 | # if FS is the default, trim leading and trailing spaces from "string" and 227 | # set "re" to the appropriate regex 228 | if (FS == " ") { 229 | gsub(/^[[:space:]]+|[[:space:]]+$/, "", str); 230 | re = "[[:space:]]+"; 231 | } else { 232 | re = FS; 233 | } 234 | 235 | # remove fields 1 through start - 1 from the beginning 236 | for (i=1; i0; i--) { 443 | o = o a[i]; 444 | } 445 | 446 | return o; 447 | } 448 | 449 | ## usage: max(array [, how ]) 450 | ## returns the maximum value in "array", 0 if the array is empty, or -1 if an 451 | ## error occurs. the optional string "how" controls the comparison mode. 452 | ## requires the __mcompare() function. 453 | ## valid values for "how" are: 454 | ## "std" 455 | ## use awk's standard rules for comparison. this is the default 456 | ## "str" 457 | ## force comparison as strings 458 | ## "num" 459 | ## force a numeric comparison 460 | function max(array, how, m, i, f) { 461 | # make sure how is correct 462 | if (length(how)) { 463 | if (how !~ /^(st[rd]|num)$/) { 464 | return -1; 465 | } 466 | 467 | # how was not passed, use the default 468 | } else { 469 | how = "std"; 470 | } 471 | 472 | m = 0; 473 | f = 1; 474 | 475 | # loop over each array value 476 | for (i in array) { 477 | # if this is the first iteration, use the value as m 478 | if (f) { 479 | m = array[i]; 480 | f = 0; 481 | 482 | continue; 483 | } 484 | 485 | # otherwise, if it's greater than "m", reassign it 486 | if (__mcompare(array[i], m, how)) { 487 | m = array[i]; 488 | } 489 | } 490 | 491 | return m; 492 | } 493 | 494 | ## usage: maxi(array [, how ]) 495 | ## the behavior is the same as that of max(), except that the array indices are 496 | ## used, not the array values. everything else is explained in max() above. 497 | function maxi(array, how, m, i, f) { 498 | # make sure how is correct 499 | if (length(how)) { 500 | if (how !~ /^(st[rd]|num)$/) { 501 | return -1; 502 | } 503 | 504 | # how was not passed, use the default 505 | } else { 506 | how = "std"; 507 | } 508 | 509 | m = 0; 510 | f = 1; 511 | 512 | # loop over each index 513 | for (i in array) { 514 | # if this is the first iteration, use the value as m 515 | if (f) { 516 | m = i; 517 | f = 0; 518 | 519 | continue; 520 | } 521 | 522 | # otherwise, if it's greater than "m", reassign it 523 | if (__mcompare(i, m, how)) { 524 | m = i; 525 | } 526 | } 527 | 528 | return m; 529 | } 530 | 531 | ## usage: min(array [, how ]) 532 | ## the behavior is the same as that of max(), except that the minimum value is 533 | ## returned instead of the maximum. everything else is explained in max() above. 534 | function min(array, how, m, i, f) { 535 | # make sure how is correct 536 | if (length(how)) { 537 | if (how !~ /^(st[rd]|num)$/) { 538 | return -1; 539 | } 540 | 541 | # how was not passed, use the default 542 | } else { 543 | how = "std"; 544 | } 545 | 546 | m = 0; 547 | f = 1; 548 | 549 | # loop over each index 550 | for (i in array) { 551 | # if this is the first iteration, use the value as m 552 | if (f) { 553 | m = array[i]; 554 | f = 0; 555 | 556 | continue; 557 | } 558 | 559 | # otherwise, if it's less than "m", reassign it 560 | if (__mcompare(m, array[i], how)) { 561 | m = array[i]; 562 | } 563 | } 564 | 565 | return m; 566 | } 567 | 568 | ## usage: mini(array [, how ]) 569 | ## the behavior is the same as that of min(), except that the array indices are 570 | ## used instead of the array values. everything else is explained in min() and 571 | ## max() above. 572 | function mini(array, how, m, i, f) { 573 | # make sure how is correct 574 | if (length(how)) { 575 | if (how !~ /^(st[rd]|num)$/) { 576 | return -1; 577 | } 578 | 579 | # how was not passed, use the default 580 | } else { 581 | how = "std"; 582 | } 583 | 584 | m = 0; 585 | f = 1; 586 | 587 | # loop over each index 588 | for (i in array) { 589 | # if this is the first iteration, use the value as m 590 | if (f) { 591 | m = i; 592 | f = 0; 593 | 594 | continue; 595 | } 596 | 597 | # otherwise, if it's less than "m", reassign it 598 | if (__mcompare(m, i, how)) { 599 | m = i; 600 | } 601 | } 602 | 603 | return m; 604 | } 605 | 606 | 607 | 608 | # You can do whatever you want with this stuff, but a thanks is always 609 | # appreciated 610 | -------------------------------------------------------------------------------- /sys.awk: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | ## usage: isatty(fd) 4 | ## Checks if "fd" is open on a tty. Returns 1 if so, 0 if not, and -1 if an 5 | ## error occurs 6 | function isatty(fd) { 7 | # make sure fd is an int 8 | if (fd !~ /^[0-9]+$/) { 9 | return -1; 10 | } 11 | 12 | # actually test 13 | return !system("test -t " fd); 14 | } 15 | 16 | ## usage: mktemp(template [, type]) 17 | ## creates a temporary file or directory, safely, and returns its name. 18 | ## if template is not a pathname, the file will be created in ENVIRON["TMPDIR"] 19 | ## if set, otherwise /tmp. the last six characters of template must be "XXXXXX", 20 | ## and these are replaced with a string that makes the filename unique. type, if 21 | ## supplied, is either "f", "d", or "u": for file, directory, or dry run (just 22 | ## returns the name, doesn't create a file), respectively. If template is not 23 | ## provided, uses "tmp.XXXXXX". Files are created u+rw, and directories u+rwx, 24 | ## minus umask restrictions. returns -1 if an error occurs. 25 | function mktemp(template, type, 26 | c, chars, len, dir, dir_esc, rstring, i, out, out_esc, umask, 27 | cmd) { 28 | # portable filename characters 29 | c = "012345689ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; 30 | len = split(c, chars, ""); 31 | 32 | # make sure template is valid 33 | if (length(template)) { 34 | if (template !~ /XXXXXX$/) { 35 | return -1; 36 | } 37 | 38 | # template was not supplied, use the default 39 | } else { 40 | template = "tmp.XXXXXX"; 41 | } 42 | 43 | # make sure type is valid 44 | if (length(type)) { 45 | if (type !~ /^[fdu]$/) { 46 | return -1; 47 | } 48 | 49 | # type was not supplied, use the default 50 | } else { 51 | type = "f"; 52 | } 53 | 54 | # if template is a path... 55 | if (template ~ /\//) { 56 | dir = template; 57 | sub(/\/[^/]*$/, "", dir); 58 | sub(/.*\//, "", template); 59 | 60 | # template is not a path, determine base dir 61 | } else { 62 | if (length(ENVIRON["TMPDIR"])) { 63 | dir = ENVIRON["TMPDIR"]; 64 | } else { 65 | dir = "/tmp"; 66 | } 67 | } 68 | 69 | # escape dir for shell commands 70 | esc_dir = dir; 71 | sub(/'/, "'\\''", esc_dir); 72 | esc_dir = "'" esc_dir "'"; 73 | 74 | # if this is not a dry run, make sure the dir exists 75 | if (type != "u" && system("test -d " esc_dir)) { 76 | return -1; 77 | } 78 | 79 | # get the base of the template, sans Xs 80 | template = substr(template, 0, length(template) - 6); 81 | 82 | # generate the filename 83 | do { 84 | rstring = ""; 85 | for (i=0; i<6; i++) { 86 | c = chars[int(rand() * len) + 1]; 87 | rstring = rstring c; 88 | } 89 | 90 | out_esc = out = dir "/" template rstring; 91 | sub(/'/, "'\\''", out_esc); 92 | out_esc = "'" out_esc "'"; 93 | } while (!system("test -e " out_esc)); 94 | 95 | # if needed, create the filename 96 | if (type == "f") { 97 | system("touch " out_esc); 98 | cmd = "umask"; 99 | cmd | getline umask; 100 | close(cmd); 101 | umask = substr(umask, 2, 1); 102 | system("chmod 0" 6 - umask "00 " out_esc); 103 | } else if (type == "d") { 104 | system("mkdir " out_esc); 105 | cmd = "umask"; 106 | cmd | getline umask; 107 | close(cmd); 108 | umask = substr(umask, 2, 1); 109 | system("chmod 0" 7 - umask "00 " out_esc); 110 | } 111 | 112 | # return the filename 113 | return out; 114 | } 115 | 116 | 117 | 118 | # You can do whatever you want with this stuff, but a thanks is always 119 | # appreciated 120 | -------------------------------------------------------------------------------- /times.awk: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | 3 | ## usage: month_to_num(month) 4 | ## converts human readable month to the decimal representation 5 | ## returns the number, -1 if the month doesn't exist 6 | function month_to_num(mon, months, m) { 7 | # populate months[] array 8 | months["january"] = 1; months["february"] = 2; months["march"] = 3; 9 | months["april"] = 4; months["may"] = 5; months["june"] = 6; 10 | months["july"] = 7; months["august"] = 8; months["september"] = 9; 11 | months["october"] = 10; months["november"] = 11; months["december"] = 12; 12 | 13 | # also populate abbreviations 14 | for (m in months) { 15 | months[substr(m, 1, 3)] = months[m]; 16 | } 17 | 18 | # convert month to lowercase 19 | mon = tolower(mon); 20 | 21 | # check if month exists 22 | if (mon in months) { 23 | return months[mon]; 24 | } else { 25 | return -1; 26 | } 27 | } 28 | 29 | ## usage: day_to_num(day) 30 | ## converts human readable day to the decimal representation 31 | ## returns the number, -1 if the day doesn't exist 32 | ## like date +%w, sunday is 0 33 | function day_to_num(day, days, d) { 34 | # populate days[] array 35 | days["sunday"] = 0; days["monday"] = 1; days["tuesday"] = 2; 36 | days["wednesday"] = 3; days["thursday"] = 4; days["friday"] = 5; 37 | days["saturday"] = 6; 38 | 39 | # also populate abbreviations 40 | days["sun"] = 0; days["mon"] = 1; days["tues"] = 2; days["wed"] = 3; 41 | days["thurs"] = 4; days["fri"] = 5; days["sat"] = 6; 42 | 43 | # convert day to lowercase 44 | day = tolower(day); 45 | 46 | # check if day exists 47 | if (day in days) { 48 | return days[day]; 49 | } else { 50 | return -1; 51 | } 52 | } 53 | 54 | ## usage: hr_to_sec(timestamp) 55 | ## converts HH:MM:SS or MM:SS to seconds 56 | ## returns -1 if invalid format 57 | function hr_to_sec(time, t, l, i, j) { 58 | # check for valid format 59 | if (time !~ /^[0-9]+(:[0-9][0-9])?:[0-9][0-9]$/) { 60 | return -1; 61 | } 62 | 63 | # convert 64 | l = split(time, t, /:/); 65 | 66 | j = time = 0; 67 | for (i=l; i>0; i--) { 68 | time += t[i] * (60 ^ j++); 69 | } 70 | 71 | return time; 72 | } 73 | 74 | ## usage: sec_to_hr(seconds) 75 | ## converts seconds to HH:MM:SS 76 | function sec_to_hr(sec, m, s) { 77 | s = sec % 60; 78 | sec = int(sec / 60); 79 | m = sec % 60; 80 | sec = int(sec / 60); 81 | 82 | return sprintf("%02d:%02d:%02d", sec, m, s); 83 | } 84 | 85 | ## usage: ms_to_hr(milliseconds) 86 | ## converts milliseconds to a "time(1)"-similar human readable format, such 87 | ## as 1m4.356s 88 | function ms_to_hr(ms, m, s, ns) { 89 | ms = ms / 1000; 90 | s = int(ms); 91 | m = int(s / 60); 92 | ns = s % 60; 93 | 94 | return sprintf("%dm%0.3fs", m, ns + (ms - s)); 95 | } 96 | 97 | ## usage: add_day_suff(day_of_month) 98 | ## prepends the appropriate suffix to "day_of_month". for example, 99 | ## add_day_suff(1) will return "1st", and add_day_suff(22) will return "22nd" 100 | ## returns -1 if "day_of_month" is not a positive integer 101 | function add_day_suff(day) { 102 | # make sure day is a positive int 103 | if (day !~ /^[0-9]+$/ || day <= 0) { 104 | return -1; 105 | } 106 | 107 | # append prefix 108 | if ((day > 3 && day < 21) || day ~ /[04-9]$/) { 109 | return day "th"; 110 | } else if (day ~ /1$/) { 111 | return day "st"; 112 | } else if (day ~ /2$/) { 113 | return day "nd"; 114 | } else { 115 | return day "rd"; 116 | } 117 | } 118 | 119 | 120 | 121 | 122 | # You can do whatever you want with this stuff, but a thanks is always 123 | # appreciated 124 | --------------------------------------------------------------------------------