├── nim.cfg
├── catz.nim
├── doc
    ├── blur1.png
    ├── claw.png
    ├── pwise.png
    ├── simul.png
    ├── tubes.png
    ├── vipK.gif
    ├── vipO.gif
    ├── dfrDark.png
    ├── dfrLight.png
    ├── tFping.gif
    ├── consisCvg.png
    ├── wsz.md
    ├── rr.md
    ├── cfold.md
    ├── colSort.md
    ├── fkindc.md
    ├── emin.md
    ├── tslice.md
    ├── fpr.md
    ├── fage.md
    ├── tmpls.md
    ├── jointr.md
    ├── ww.md
    ├── ru.md
    ├── niom.md
    ├── fsids.md
    ├── nrel.md
    ├── dirt.md
    ├── flow.md
    ├── since.md
    ├── holes.md
    ├── unfold.md
    ├── adorn.md
    ├── okpaths.md
    ├── ndelta.md
    ├── tmath.md
    ├── memlat.md
    ├── chom.md
    ├── lncs.md
    ├── uce.md
    ├── notIn.md
    ├── sr.md
    ├── cstats.md
    ├── rs.md
    ├── newest.md
    ├── METAPKG.md
    ├── wgt.md
    ├── only.md
    ├── noc.md
    ├── keydowns.md
    ├── zeh.md
    ├── dups.md
    ├── tw.md
    ├── du.md
    ├── topn.md
    ├── cols.md
    ├── crp.md
    ├── ft.md
    ├── stripe.md
    ├── fread.md
    ├── thermctl.md
    ├── eve.md
    ├── pid2.md
    ├── noa.md
    ├── dirq.md
    └── cbtm.md
├── wsz.nim
├── tests
    ├── consisCvg.gpi
    ├── fage.sh
    ├── noa.sh
    └── tcatz.sh
├── ww.nim
├── bu
    ├── labFloats.nim
    ├── colSort.nim
    ├── esquo.nim
    ├── emin.nim
    ├── testf.nim
    └── rs.nim
├── keydowns.nim
├── pid2.nim
├── noc.nim
├── tslice.nim
├── LICENSE
├── widths.nim
├── rr.nim
├── flow.nim
├── fage.nim
├── uce.nim
├── tmpls.nim
├── tattr.nim
├── cfold.nim
├── noa.nim
├── notIn.nim
├── fsids.nim
├── holes.nim
├── fpr.nim
├── jointr.nim
├── sr.nim
├── fkindc.nim
├── cols.nim
├── niom.nim
├── fread.nim
├── newest.nim
├── oft.nim
├── okpaths.nim
├── ndelta.nim
├── unfold.nim
├── since.nim
├── adorn.nim
├── wits.nim
├── memlat.nim
├── man
    └── catz.1
├── dirt.nim
├── topn.nim
├── lncs.nim
└── tw.nim


/nim.cfg:
--------------------------------------------------------------------------------
1 | --path="."
2 | 


--------------------------------------------------------------------------------
/catz.nim:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c-blake/bu/HEAD/catz.nim


--------------------------------------------------------------------------------
/doc/blur1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/blur1.png


--------------------------------------------------------------------------------
/doc/claw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/claw.png


--------------------------------------------------------------------------------
/doc/pwise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/pwise.png


--------------------------------------------------------------------------------
/doc/simul.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/simul.png


--------------------------------------------------------------------------------
/doc/tubes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/tubes.png


--------------------------------------------------------------------------------
/doc/vipK.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/vipK.gif


--------------------------------------------------------------------------------
/doc/vipO.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/vipO.gif


--------------------------------------------------------------------------------
/doc/dfrDark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/dfrDark.png


--------------------------------------------------------------------------------
/doc/dfrLight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/dfrLight.png


--------------------------------------------------------------------------------
/doc/tFping.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/tFping.gif


--------------------------------------------------------------------------------
/doc/consisCvg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/consisCvg.png


--------------------------------------------------------------------------------
/doc/wsz.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | Sometimes you want to know how big your terminal window is.
 5 | 
 6 | Usage (***NOT*** a cligen utility)
 7 | -----
 8 | ```sh
 9 | $ wsz
10 | cells: 80 x 72 pixels: 1280 x 2160 charCell: 16 x 30
11 | ```
12 | `wsz` takes no arguments.
13 | 


--------------------------------------------------------------------------------
/wsz.nim:
--------------------------------------------------------------------------------
 1 | import std/termios
 2 | proc terminalSize(): IOctl_WinSize =
 3 |   for fd in [0, 1, 2]:
 4 |     if ioctl(fd.cint, TIOCGWINSZ, result.addr) != -1: return
 5 | let t = terminalSize()
 6 | echo "cells: "    , t.ws_col   , " x ", t.ws_row   ,
 7 |      " pixels: "  , t.ws_xpixel, " x ", t.ws_ypixel,
 8 |      " charCell: ", t.ws_xpixel div max(1.cushort, t.ws_col),
 9 |      " x "        , t.ws_ypixel div max(1.cushort, t.ws_row)
10 | 


--------------------------------------------------------------------------------
/doc/rr.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | This is really just `rm -rf` but able to use `cligen/dents.forPath` to
 5 | maybe access faster OS interfaces for file tree traversal on Linux.
 6 | 
 7 | Usage
 8 | -----
 9 | ```
10 |   rr [optional-params] [roots: string...]
11 | 
12 | Like rm -rf but a bit faster.  Does nothing if no roots specified.
13 | 
14 |   -x, --xdev    bool false block recursion across device boundaries
15 |   -e, --eof0    bool false set eof0
16 | ```
17 | 


--------------------------------------------------------------------------------
/tests/consisCvg.gpi:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/gnuplot
 2 | set term png size 1920,1080 font "Helvetica,20"
 3 | set output "consisCvg.png"
 4 | set title 'Consistency-Convergence Plot'
 5 | set xrange [9:1003]
 6 | set log x
 7 | set xlab 'sample size (staggered only for plotting)'
 8 | set ylab "1e-4 seconds of overhead ('')"
 9 | # Manually hacked data files to be like n=10.1,10.2,..
10 | plot 1.88 t '60kRunSampleMin', \
11 |      'consisCvg'  u 1:2:4 w yerror t 'Low Tech', \
12 |      'consisCvgB' u ($1+2):2:4 w yerror t 'EVT-Boot'
13 | 


--------------------------------------------------------------------------------
/tests/fage.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | now=$(date +%s)          # now as epoch seconds
 4 | touch -t 01010101.01 -m p1
 5 | touch -t 01010101.02 -a p1
 6 | touch -t 01010101.03 -m p2
 7 | touch -t 01010101.04 -a p2
 8 | sec=$(stat -c%Y p1)      # mtime as seconds
 9 | echo $((now - sec)) $(fage -fm p1) should be within rounding
10 | echo $(fage -fb p1) should be \< 1 but ultimately FS dependent
11 | echo $(fage -R/S -ra -fm p1) should be 1.0
12 | echo $(fage -Rp2 -rm -fm p1) should be 2.0
13 | echo $(fage -v2 -Rp2 -ra -fm p1) Basis for 3.0
14 | rm -f p1 p2
15 | 


--------------------------------------------------------------------------------
/ww.nim:
--------------------------------------------------------------------------------
 1 | import strutils, cligen, cligen/textUt
 2 | when not declared(stdin): import std/syncio
 3 | 
 4 | proc ww(maxWidth=0, power=3) =
 5 |   ## Multi-paragraph with indent=>pre-formatted optimal line wrapping using
 6 |   ## badness metric *sum excess space^power*.
 7 |   let maxWidth = if maxWidth != 0: maxWidth else: ttyWidth
 8 |   stdout.write wrap(stdin.readAll, maxWidth, power)
 9 | 
10 | include cligen/mergeCfgEnv
11 | dispatch ww, help={"maxWidth": "maximum line width; *0* => tty width",
12 |                    "power"   : "power of excess space for badness"}
13 | 


--------------------------------------------------------------------------------
/doc/cfold.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | Rather than just word-wrapping at width boundaries/word boundaries/etc., it can
 5 | sometimes be useful to wrap when a pattern is seen in the input.
 6 | 
 7 | Usage
 8 | -----
 9 | ```
10 |   cfold [optional-params] [pattern: string...]
11 | 
12 | cfold is to fold as csplit(1) is to split(1).  pattern is an rx at which to
13 | segment input lines in file.
14 | 
15 |   -s, --suppress bool   false exclude matched strings
16 |   -i, --ignore   bool   false add ignore case to re flags
17 |   -e, --extended bool   false nim re 'extended' syntax
18 |   -f=, --file=   string "-"   input file ("-" == stdin)
19 | ```
20 | 
21 | Related Work
22 | ------------
23 | This can also be done with GNU sed, but ergonomics of getting \n into
24 | expressions are poor.
25 | 


--------------------------------------------------------------------------------
/bu/labFloats.nim:
--------------------------------------------------------------------------------
 1 | import cligen/mslice, std/parseutils; export initSep
 2 | when not declared(File): import std/syncio
 3 | 
 4 | proc labFloats*(f: File, sep: Sep): (seq[string], seq[seq[float]]) =
 5 |   ## Read lines of a file separating into float|non-float and saving all floats
 6 |   ## but only the last textual context.
 7 |   var cols: seq[TextFrame]
 8 |   for line in lines(f):
 9 |     let ms = line.toMSlice
10 |     let m = ms.frame(cols, sep)
11 |     if (let dm = m - result[0].len; dm > 0):
12 |       result[0].setLen m
13 |       result[1].setLen m
14 |     for j in 0..<m:
15 |       result[0][j] = $cols[j].ms
16 |       var f: float
17 |       if (let n = parseFloat(result[0][j], f); n == result[0][j].len):
18 |         result[1][j].add f
19 | 
20 | when isMainModule:
21 |   let (labs, nums) = labFloats(stdin, initSep("white"))
22 |   echo "labs: ", labs
23 |   echo "nums: ", nums
24 | 


--------------------------------------------------------------------------------
/doc/colSort.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | Sometimes you have some calculation with an indeterminate order of columns that
 5 | you would like to make deterministic.  A concrete example is in the comment at
 6 | the end of [`ndup/`](https://github.com/c-blake/ndup/blob/main/sh/ndup).
 7 | 
 8 | That is where `colSort` comes in.  The `--skip` facility lets you skip over
 9 | an initial block of in-row header text you do not want to sort.
10 | 
11 | Usage
12 | -----
13 | colSort [optional-params]
14 | 
15 | Copy input->output lines, sorting columns [skip:] within each row.
16 | 
17 |   -p=, --pi=    string ""   path to input ; "" => stdin
18 |   --po=         string ""   path to output; "" => stdout
19 |   -i=, --iDlm=  string "\t" input delimiter; w => repeated whitespace
20 |   -o=, --oDlm=  char   '\t' output delimiter byte
21 |   -s=, --skip=  int    0    initial columns to NOT sort within rows
22 | 


--------------------------------------------------------------------------------
/keydowns.nim:
--------------------------------------------------------------------------------
 1 | when not declared(stdin): import std/syncio
 2 | proc keydowns(shift="~!@#$%^&*()_+|}{:\"?><", v=false): int =
 3 |   ## Return min key downs needed to enter all lines on stdin, optimizing SHIFTs.
 4 |   proc initSetChar(s: string): set[char] =
 5 |     for c in {'A'..'Z'}: result.incl c
 6 |     for c in s: result.incl c
 7 |   let shift = shift.initSetChar
 8 |   for str in stdin.lines:
 9 |     var down = false    # BETWEEN strs, SHIFT goes key up
10 |     let r0 = result
11 |     for c in str:
12 |       if c in shift:    # Need shift
13 |         if not down: down = true; inc result # Cnt key down
14 |       else: down = false
15 |       inc result
16 |     if v: stderr.write result - r0, " ", str, "\n"
17 | 
18 | when isMainModule:
19 |   import cligen; include cligen/mergeCfgEnv; dispatch keydowns, echoResult=true,
20 |     help={"shift": "in addition to 'A'..'Z'", "v": "err log counts & strings"}
21 | 


--------------------------------------------------------------------------------
/doc/fkindc.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | People often want to know what kind of files are within some file tree.  This
 5 | produces a nice little histogram of file(1)/libmagic(3) file types.  See the
 6 | [only doc](only.md) for more background.
 7 | 
 8 | Usage
 9 | -----
10 | ```
11 |   fkindc [optional-params]
12 | 
13 | Use gen and dlr1 to generate paths and histogram by file(1) type.
14 | 
15 |   -g=, --gen=   string    "find $1 -print0" generator cmd with dlr1 -> $1
16 |   -d=, --dlr1=  string    "."               $1 for gen fmt; Eg. ". -type f"
17 |   -x=, --excl=  set(Excl) {}                tests to exclude like file(1)
18 |   -j=, --jobs=  int       0                 use this many kids (0=auto)
19 | ```
20 | 
21 | Related Work
22 | ------------
23 | This could probably have just been a new flag to `only`, but the code to do
24 | just this is quite a bit simpler.  New option or new program is often a tough
25 | judgement call.
26 | 


--------------------------------------------------------------------------------
/tests/noa.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | : ${idx:="-1"}
 3 | n='
 4 | '
 5 | T() {
 6 |   d=""                      # Only Zsh needs this to avoid appending to $d
 7 |   n='
 8 | '                           # read -rd works in Bash|Zsh, but not POSIX.  So,
 9 |   while IFS= read -r line   #..loop which works for all text but input with no
10 |   do d="$d${line}$n"        #..final newline where we add one "erroneously".
11 |   done; d=${d%?}            # Chop extra newline
12 |   echo "$d"                 # This adds \n back here for test output.
13 | }
14 | noa "$idx" -- cp -a F -f "/x/maybe/mis${n}sing" | T
15 | noa "$idx" cp -- -a F -f "/x/maybe/mis${n}sing" | T
16 | noa "$idx" cp -a -- F -f "/x/maybe/mis${n}sing" | T
17 | noa "$idx" cp -a F -- -f "/x/maybe/mis${n}sing" | T
18 | noa "$idx" cp -a F -f -- "/x/maybe/mis${n}sing" | T
19 | noa "$idx" cp -a F -f "/x/maybe/mis${n}sing" -- | T
20 | echo ----
21 | noa "$idx" cp -a F -f "${n}/x/maybe/m${n}issing${n}${n}" -- | T
22 | 


--------------------------------------------------------------------------------
/pid2.nim:
--------------------------------------------------------------------------------
 1 | import std/[os, posix, parseutils]
 2 | 
 3 | var pid  = getpid()
 4 | var last = 0.Pid
 5 | var t    = 300  # Linux starts Process ID table @300
 6 | var cpu  = 1
 7 | var xSt: cint
 8 | if paramCount()>0 and parseInt(1.paramStr,t)!=1.paramStr.len:
 9 |   quit "Expecting integer to try to wrap PID table to", 1
10 | if paramCount()>1 and parseInt(2.paramStr,cpu)!=2.paramStr.len:
11 |   quit "Expecting CPU to set affinity for", 2
12 | let tgt = t.Pid
13 | 
14 | template nextPid =
15 |   last = pid
16 |   pid = vfork()
17 |   case pid      # -1 quit leaves whole program
18 |   of -1: quit "pid2: %s " & $errno.strerror, 2
19 |   of  0: quit 0                       # kid => die
20 |   else : discard waitpid(pid, xSt, 0) # parent=>wait
21 | 
22 | when defined(linux):
23 |   import cligen/osUt;setAffinity([cpu.cint]) # Alder 14X faster
24 | 
25 | while pid > tgt and last < pid: nextPid() # Get (last>=pid)
26 | while pid < tgt: nextPid()                # 1st free past tgt
27 | 


--------------------------------------------------------------------------------
/noc.nim:
--------------------------------------------------------------------------------
 1 | when not declared(stdin): import std/syncio
 2 | import cligen/[mfile, osUt, sysUt, textUt], std/terminal
 3 | 
 4 | if stdin.isatty: quit ("Usage:\n    noc < someInput\n" &
 5 |   "strips ANSI CSI/OSC/SGR color escape sequences"), 1
 6 | 
 7 | if (let mf = mopen("/dev/stdin", err=nil); mf.mem != nil):
 8 |   discard c_setvbuf(stdout, nil, IOFBF, 32768) # Boost
 9 |   for c in toOa[char](mf.mem, 0, mf.len - 1).noCSI_OSC:
10 |     putchar c
11 | else:
12 |   var io = newSeq[char](32768)  # (i)nput-(o)utput buffer
13 |   var nc: NoCSI_OSC             # call-to-call parser state
14 |   while not stdin.eof:
15 |     let nI = stdin.ureadBuffer(io[0].addr, io.len)
16 |     var nO = 0
17 |     for c in toOa[char](io[0].addr, 0, nI-1).noCSI_OSC(nc):
18 |       io[nO] = c        # seq[char] faster than string here
19 |       nO.inc            # Clobber input w/stripped output
20 |     if nO > 0:          # 0 => Neither progress nor clobber
21 |       if stdout.uriteBuffer(io[0].addr, nO) < nO:
22 |         quit "stdout write fail; out of space?", 1
23 | 


--------------------------------------------------------------------------------
/doc/emin.md:
--------------------------------------------------------------------------------
 1 | # Motivation / Example / Usage
 2 | 
 3 | Sometimes a program spends a non-negligible time doing set up before some inner
 4 | phase which is what you want to time.  For this, [`tim`](tim.md) is
 5 | inappropriate since there is more "overhead" to subtract than shell overhead,
 6 | yet [`eve`](eve.md) seems more general than desirable, since you might still be
 7 | using [`tim`](tim.md) to drive the experiment.  So, for the case when you really
 8 | just have a list of numbers in "tim-compatible layout" (Re: `--warmup`, `--k`, `--n`,
 9 | `--m`), it's nice to say something like this:
10 | 
11 | ```
12 | tim="-k2 -o14 -n14 -m14"
13 | tim $tim "$prog 2>>/tmp/dts"
14 | emin $tim `</tmp/dts`
15 | ```
16 | where $prog is some program that emits a single delta-time value. More formally,
17 | since $prog decides what number it prints out which could be something other
18 | than wall time, this can estimate the true minimum of anything you'd like to
19 | estimate the minimum of which varies in a way you'd like to model as "random",
20 | such as memory used.
21 | 


--------------------------------------------------------------------------------
/doc/tslice.md:
--------------------------------------------------------------------------------
 1 | # Motivation
 2 | 
 3 | Sometimes you want to use some column (e.g. the first) of a colorful report
 4 | to filter it (say with grep) and then post-process this to slice out a subset of
 5 | interest.  If the report is "text tabular", then this utility can be useful.
 6 | 
 7 | To be more concrete, the inspiring example to publish this was in the [release
 8 | notes](https://github.com/c-blake/procs/blob/master/RELEASE-NOTES.md#version-080)
 9 | for procs 0.8.0 in the shell script example.
10 | 
11 | # Usage
12 | ```
13 |   tslice [a]:[b]
14 | 
15 | does UTF8-SGR aware Py-like slices of terminal columns on stdin
16 | ```
17 | 
18 | More specifically, either `a` or `b` or both can be omitted or negative.
19 | Negative indices are added to the length (in character cells).  If `a`
20 | is missing, `0` is used.  If `b` is missing (there is no upper bound),
21 | the rest of the row is selected.
22 | 
23 | # Example
24 | 
25 | On a system where `wc -c < /proc/sys/kernel/pid_max` reports "6",
26 | ```sh
27 | pd | tslice 6:
28 | ```
29 | will slice out the NON-pid portion of the listing.
30 | 


--------------------------------------------------------------------------------
/doc/fpr.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | Device IO can be either very cheap (e.g. NVMe esp gen5) or very costly (e.g.
 4 | a network filesystem maybe capped 125 MiB/s {gigE}).  Either before or after
 5 | costly operations it can be interesting to know how well the OS has buffered
 6 | the data to prevent future reads.
 7 | 
 8 | Usage
 9 | -----
10 | ```
11 |   fpr [optional-params] [paths: string...]
12 | 
13 | File Pages Resident. Examine UNION of paths & optional delim-delimited input
14 | file (stdin if "-"|"" & stdin not a tty). Eg., find -print0 | fpr -d\0.  Like
15 | util-linux fincore, but more Unix-portable & summarizing.
16 | 
17 |   -f=, --file=  string    ""      optional input ("-"|!tty=stdin)
18 |   -d=, --delim= char      '\n'    input file delimiter (0->NUL)
19 |   -e=, --emit=  set(Emit) summary Stuff to emit: summary detail
20 | ```
21 | 
22 | Related Work
23 | ------------
24 | util-linux has `fincore`, but often I just want summary information and this is
25 | easy to compute in-program than as a wrapper program.  Also, this program should
26 | work fine on OS X or many BSDs where util-linux is probably not installed.
27 | 


--------------------------------------------------------------------------------
/tslice.nim:
--------------------------------------------------------------------------------
 1 | when isMainModule:
 2 |   import std/[os, syncio, strutils], cligen/[osUt, textUt]
 3 |   let ac = paramCount()
 4 |   let av1   = if ac >= 1: paramStr(1)   else: ""
 5 |   var colon = if ac >= 1: av1.find(':') else: -1
 6 |   if ac < 1 or colon == -1:
 7 |     quit "Usage:\n  "&paramStr(0)&" [a]:[b]\n" &
 8 |          "does UTF8-SGR aware Py-like slices of terminal columns on stdin", 1
 9 |   let A = av1[0 ..< colon]
10 |   let B = av1[colon+1..^1]
11 |   let a = if A.len > 0: A.parseInt else: 0
12 |   let b = if B.len > 0: B.parseInt else: int.high
13 |   let nl = "\n"
14 |   if a < 0 or b < 0:
15 |     for line in stdin.lines:
16 |       var tot = 0
17 |       for (_, w) in printedChars(line): inc tot, w
18 |       let a = if a < 0: tot + a else: a
19 |       let b = if b < 0: tot + b else: b
20 |       let n = b - a
21 |       for (s, _) in printedChars(line, a, n): stdout.urite line, s
22 |       discard stdout.uriteBuffer(nl[0].addr, 1)
23 |   else:
24 |     let n = b - a
25 |     for line in stdin.lines:
26 |       for (s, _) in printedChars(line, a, n): stdout.urite line, s
27 |       discard stdout.uriteBuffer(nl[0].addr, 1)
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 c-blake
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/doc/fage.md:
--------------------------------------------------------------------------------
 1 | This program will be unneeded if coreutils `stat` ever grows an option to make
 2 | %[WXYZ] emit full precision and/or Bash/Dash grow floating point arithmetic.
 3 | (I would not hold your breath about either.)
 4 | 
 5 | Usage
 6 | -----
 7 | ```
 8 |   fage [optional-params] [paths: string...]
 9 | 
10 | Print max resolution age (`fileTime(Ref|self,rT) - fileTime(path,fT)`) for
11 | paths.  "now" =~ program start-up.  Examples:
12 | 
13 |   `fage x y`           v-age of *x* & *y* relative to "now"
14 |   `fage -fb x`         b-age of *x* relative to "now"
15 |   `fage -Rlog logDir`  v-age of *log* rel.to its *logDir*
16 |   `fage -srm -fb x y`  **mtime - btime** for both *x* & *y*
17 |   `fage -ra -R/ ''`    Like `stat -c%X /`, but high-res
18 | 
19 | Last works since missing files are given time stamps of 0 (start of 1970).
20 | 
21 | Options:
22 |   -R=, --Ref=     string  ""     path to ref file
23 |   -r=, --refTm=   char    'v'    ref file stamp [bamcv]
24 |   -f=, --fileTm=  char    'v'    file time stamp [bamcv]
25 |   -s, --self      bool    false  take ref time from file itself
26 |   -v=, --verb=    int     0      0: Deltas; 1: Also paths; 2: diff-ends (ns)
27 | ```
28 | 


--------------------------------------------------------------------------------
/doc/tmpls.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ==========
 3 | `tmpls` is largely similar to a sub-shell such as:
 4 | ```sh
 5 | while read a; do printf "i/%s.c\no/%s.o\n" "$s" "$s"; done
 6 | ```
 7 | but it is much faster.[^1]
 8 | 
 9 | Usage
10 | =====
11 | ```
12 |   tmpls [optional-params] templates...
13 | 
14 | Interpolate { %s)tring | %n)eed quoted | always %q)uoted | %e)scaped } into
15 | as many templates as given, writing back-to-back template-filled-in batches to
16 | stdout, with each individual template terminated by term.
17 | 
18 | E.g.:
19 |   find . -name '*.c' -print|sed 's/.c$//' | tmpls %s.c %s.o %n.c %e.o
20 | 
21 | Options:
22 |   -f=, --file= string "/dev/stdin" input file of name stubs
23 |   -n=, --nl=   char   '\n'         input string terminator
24 |   -t=, --term= char   '\n'         output string terminator
25 |   -m=, --meta= char   '%'          self-quoting meta for %sub
26 | ```
27 | 
28 | [^1]: I get 25X-75X improvements.  As always, this depends on a lot, such as if
29 | /bin/sh is dash, bash, zsh, etc. as well as what the CPU is.  /bin/sh variation
30 | is large enough, and the implementation of `tmpls.nim` simple enough that real
31 | benchmarking does not seem very pointful.
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/tests/tcatz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | if [ $# -ne 2 ]; then cat <<EOF
 4 | Run with exactly 2 args, INPUTFILE CORRECTOUTPUT (e.g. foo.gz foo), this script
 5 | tests auto-decode for many cases - with|without filename extensions, file|pipe
 6 | input|output, and recognizing|not a magic number data header.
 7 | EOF
 8 |     exit 1
 9 | fi
10 | 
11 | : ${SEEK:=8}
12 | i="$(pwd)/$1"
13 | o="$(pwd)/$2"
14 | t="$(mktemp -d /dev/shm/tcatzXXX)"
15 | cd "$t"
16 | 
17 | test1() {
18 |     eval $4
19 |     cmp o "$2" || echo "$3" FAIL AT "$4"
20 | }
21 | 
22 | test6() {    # With a pathname (& *maybe* extension)
23 |     for tst in 'catz $1>o'  'catz $1|cat>o' \
24 |                'catz<$1>o'  'catz<$1|cat>o' 'cat<$1|catz>o' 'cat<$1|catz|cat>o'
25 | #               iFile,oFile  iFile,oPipe     iPipe,oFile     iPipe,oPipe
26 |     do test1 "$1" "$2" "$3" "$tst"
27 |     done
28 | }
29 | ln -s "$i" noExt
30 | test1 noExt "$o" "NO-EXTEN" 'catz $1>o'
31 | test1 noExt "$o" "NO-EXTEN" 'catz $1|cat>o'
32 | 
33 | test6 "$i" "$o" DECODED
34 | 
35 | dd seek="$SEEK" if="$i" of=noMag 2>/dev/null
36 | test6 noMag noMag PASS-THROUGH
37 | 
38 | printf '' > len0
39 | test6 len0 len0 PASS-THROUGH-0
40 | 
41 | rm -rf "$t"
42 | echo "SUCCESS"
43 | 


--------------------------------------------------------------------------------
/widths.nim:
--------------------------------------------------------------------------------
 1 | import std/terminal, cligen, cligen/[sysUt, mfile], nio
 2 | 
 3 | proc widths(outKind='\0', distro=false, paths: seq[string]) =
 4 |   ## Emit width/line lengths in bytes of all lines in files `paths`.
 5 |   ##
 6 |   ## If `histo` emit an exact histogram of such widths.
 7 |   ##
 8 |   ## Emits text if `outKind==NUL`, else binary in that NIO format.
 9 | 
10 |   if outKind != '\0' and stdout.isatty: Help!!"stdout is a terminal; Full $HELP"
11 |   let kout = try: kindOf(outKind) except CatchableError: 0.IOKind
12 |   var obuf: array[16, char]
13 |   var cnts: seq[int]
14 |   var mf: MFile
15 |   for path in paths:
16 |     for ms in mSlices(path, mf=mf):
17 |       if distro:
18 |         if ms.len + 1 > cnts.len: cnts.setLen ms.len + 1
19 |         inc cnts[ms.len]
20 |       elif outKind == '\0':
21 |         echo ms.len
22 |       else:             # Convert & then emit line len as `kout`
23 |         var n = ms.len
24 |         convert kout, lIk, obuf[0].addr, n.addr
25 |         stdout.nurite kout, obuf[0].addr
26 |     mf.close
27 |   if distro:
28 |     for i, c in cnts:
29 |       if c != 0: echo i, " ", c
30 | 
31 | when isMainModule: include cligen/mergeCfgEnv; dispatch widths, help={
32 |   "distro" : "emit a histogram, not individual widths",
33 |   "outKind": "emit binary stream with this NIO format"}
34 | 


--------------------------------------------------------------------------------
/doc/jointr.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | This is utility to make it easier to read `strace -f` output.  If you do
 5 | something like
 6 | ```sh
 7 | strace --decode-fds -fvs8192 -oFoo multi-process-program
 8 | ```
 9 | and there is significant clone/fork spawning then you are likely to see
10 | a great many system calls which are reported twice - once at initiation,
11 | are then suspended and then again at resumption.
12 | 
13 | The problem with this is that there can be a lot of other intervening text
14 | between these two points and the parameters of the initial call are not repeated
15 | by `strace` upon resumption.
16 | 
17 | So, what `jointr` does is act as a filter to either stitch the two halves
18 | together or at least repeat the call parameters at the continuation to help
19 | make sense of things.
20 | 
21 | Never resumed calls are just printed in hash order at the bottom.
22 | 
23 | Usage
24 | -----
25 | 
26 | ```
27 | Usage:
28 |   jointr [optional-params] strace log path (or none for stdin)
29 | 
30 |   -c=, --cont=  string " <unfinished ...>" line suffix saying it continues
31 |   -b=, --boc=   string "<... "             beg of contin. indication to eat
32 |   -e=, --eoc=   string " resumed>"         end of contin. indication to eat
33 |   -a, --all     bool   false               retain "unfinished ..." in-place
34 | ```
35 | 


--------------------------------------------------------------------------------
/doc/ww.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | You may want more evenly sized right margins than delivered by a greedy word
 5 | wrap algorithm (filling as much as possible before breaking).  cligen/textUt
 6 | has this built into it (for help message formatting).  It minimizes a penalty
 7 | formula that is the sum of the p-th power of right margin space sizes.  So this
 8 | program is a razor thin CLI wrapper that is easy to use from, e.g. just :!ww
 9 | from a vim visual select as I did for this very paragraph.  Higher powers will
10 | penalize non-uniformity more.
11 | 
12 | Usage
13 | -----
14 | ```
15 |   ww [optional-params]
16 | Multi-paragraph with indent=>pre-formatted optimal line wrapping using badness
17 | metric sum excess space^power.
18 |   -h, --help             print this cligen-erated help
19 |   --help-syntax          advanced: prepend,plurals,..
20 |   -m=, --maxWidth= int 0 maximum line width; 0 => tty width
21 |   -p=, --power=    int 3 power of excess space for badness
22 | ```
23 | 
24 | Related Work
25 | ------------
26 | Donald Knuth did some impressive work in his TeX layout engine on the much
27 | harder problem that combines kerning adjustment of proportionally spaced fonts
28 | and word wrap.  This program is the kind of high school version of that, but
29 | the concept of "badness" does still show up in tex/latex error messages and is
30 | in the same general dimension.
31 | 


--------------------------------------------------------------------------------
/bu/colSort.nim:
--------------------------------------------------------------------------------
 1 | import std/algorithm, cligen/[mslice, osUt]
 2 | when not declared(stdout): import std/syncio
 3 | 
 4 | proc colSort*(fi, fo: File; iDlm="\t", oDlm='\t', skip=0) =
 5 |   let sep = initSep(iDlm)
 6 |   var cols: seq[MSlice]
 7 |   for (cs, nP1) in fi.getDelims:
 8 |     sep.split(MSlice(mem: cs, len: nP1 - 1), cols)
 9 |     var wrote = false   # flag saying we wrote & so need to delimit
10 |     for i in 0 ..< min(skip, cols.len):
11 |       if wrote: outu oDlm else: wrote = true
12 |       outu cols[i]
13 |     if cols.len > skip:
14 |       var cols = cols[skip..^1]
15 |       cols.sort
16 |       for c in cols:
17 |         if wrote: outu oDlm else: wrote = true
18 |         outu c
19 |     outu '\n'
20 | 
21 | proc colSort*(pi="", po="", iDlm="\t", oDlm='\t', skip=0) =
22 |   ## Copy input->output lines, sorting columns [skip:] within each row.
23 |   colSort if pi.len == 0: stdin  else: open(pi),
24 |              if po.len == 0: stdout else: open(po, fmWrite), iDlm, oDlm, skip
25 | 
26 | when isMainModule:
27 |   import cligen; include cligen/mergeCfgEnv
28 |   dispatch (proc(pi,po,iDlm:string; oDlm:char; skip:int))colSort, help={
29 |     "pi"  : "path to input ; \"\" => stdin",
30 |     "po"  : "path to output; \"\" => stdout",
31 |     "iDlm": "input delimiter; w* => repeated whitespace",
32 |     "oDlm": "output delimiter byte",
33 |     "skip": "initial columns to NOT sort within rows"}
34 | 


--------------------------------------------------------------------------------
/rr.nim:
--------------------------------------------------------------------------------
 1 | when not declared(stderr): import std/syncio
 2 | include cligen/unsafeAddr
 3 | import std/[strformat, posix], cligen, cligen/[dents, posixUt, statx]
 4 | 
 5 | proc rr*(roots: seq[string], xdev=false, eof0=false): int =
 6 |   ## Like rm -rf but a bit faster.  Does nothing if no ``roots`` specified.
 7 |   if roots.len == 0: return
 8 |   var dfds: seq[cint]
 9 |   for root in roots:
10 |     forPath(root, 0, false, false, xdev, eof0, stderr,
11 |             depth, path, nmAt, ino, dt, lst, dfd, dst, did):
12 |       if dt != DT_DIR:
13 |         if unlinkat(dfd, path[nmAt..^1].cstring, 0) != 0:
14 |           stderr.log &"rr({path}): {strerror(errno)}\n"
15 |       elif dfds.len > 0 and dfds[^1] == dfd: discard
16 |       else: dfds.add dfd
17 |     do: discard                   # Pre-recurse
18 |     do:                           # Post-recurse (dt == DT_DIR guaranteed)
19 |       if unlinkat(dfds.pop, path[nmAt..^1].cstring, AT_REMOVEDIR) != 0:
20 |         stderr.log &"rr({path}): {strerror(errno)}\n"
21 |         # Future dir-unlinks are doomed to fail ENOTEMPTY except if ENOENT here
22 |         # IF racing other unlinker(s).  quit here forfeits any such races.
23 |         quit(1)
24 |     do: recFailDefault("rr", path)  # Cannot recurse
25 |   return 0
26 | 
27 | when isMainModule:
28 |   include cligen/mergeCfgEnv
29 |   dispatch(rr, help = { "xdev" : "block recursion across device boundaries" })
30 | 


--------------------------------------------------------------------------------
/doc/ru.md:
--------------------------------------------------------------------------------
 1 | I wrote this because /usr/bin/time is very low time resolution (10 ms) with a
 2 | very hard to read default format and for a very long time (early 90s?) various
 3 | OSes have provided better.  When faced with the question "What is CPU?", perhaps
 4 | the getrusage/wait4 answer of `ru` can be a first step.
 5 | 
 6 | ```
 7 | Usage: (***NOT*** a cligen utility)
 8 | 
 9 |   ru [-whatiscpu] <prog> [prog args...]
10 | 
11 | No options => as if -hit; else selected subset.
12 | 
13 | Flags all in arg 1 & mean:
14 |   w  w)rapped output without row labels (to get fields by row, e.g. grep)
15 |   h  h)uman readable formats with (h)our:minute:seconds, MiB, etc. units
16 |   a  a)ll of the below, in the same order
17 |   t  t)ime,mem (wall, user, system time, CPU utilization, max Resident)
18 |   i  i)o (inBlocks, outBlocks, swaps, majorFaults, minorFaults)
19 |   s  s)witch/stack/sharing (volCtxSw, involSw, stack, txtResShr, datResShr)
20 |   c  interprocess (c)ommunications (signals, IPC sent, IPC received)
21 |   p  p)lain output (no ANSI SGR color escapes)
22 |   u  u)nwrapped output with field labels (to get fields by column, e.g. awk)
23 | ```
24 | 
25 | `man getrusage` | `man time` give more details on the various stats this small
26 | Nim program can print.
27 | 
28 | You can put options in the `RU` environment variable.  Compared to time(1), this
29 | is higher precision with more modern and controlled units.
30 | 


--------------------------------------------------------------------------------
/doc/niom.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ==========
 3 | This little 50-liner mostly only exists & lives here since I try to keep some
 4 | core library packages like `cligen`, `adix, and `nio` hard-dependency-free.[^1]
 5 | 
 6 | Usage
 7 | =====
 8 | ```
 9 |   niom [optional-params] [paths: 1|more paths to NIO files]
10 | Print selected statistics over all columns of all paths.
11 | 
12 |   -f=, --fmt=   string       ".4g"   Nim floating point output format
13 |   -s=, --stats= set(MomKind) min,max n min max sum avg sdev skew kurt histo
14 |   -q=, --qs=    floats       {}      desired quantiles
15 |   -a=, --a=     float        1e-16   min absolute value histo-bin edge
16 |   -b=, --b=     float        1e+20   max absolute value histo-bin edge
17 |   -n=, --n=     int          8300    number of lg-spaced histo bins
18 | ```
19 | 
20 | An Example
21 | ==========
22 | 
23 | ```sh
24 | $ zipf -n10_000_000 -fbg 1..3 | niom -s,= -sh .Nl
25 | .Nl:0 n: 8300   a: 1e-16        b: 1e+20
26 | aLn: -36.841361487904734        h: 0.00998831947798357  hInv: 100.11694181430796
27 | bins,cnts:
28 |   [ -1e-16 , 1e-16 ): 6467866
29 |   [ 0.9955705858181852 , 1.0055644909629682 ): 2287419
30 |   [ 1.9832854562249305 , 2.003194407942553 ): 1244715
31 | totalCount: 10000000 nonZeroBins: 3
32 | ```
33 | Note that (2287419/6467866)**-(2./3) = 1.999601833743278, thus also a spot check
34 | of [zipf](zipf.md) with a default alpha=3/2.
35 | 
36 | [^1]: If someone is living life with a few git clone's per year they are still
37 | cool to try out my packages even if `nimble` fails them.
38 | 


--------------------------------------------------------------------------------
/doc/fsids.md:
--------------------------------------------------------------------------------
 1 | Usage:
 2 | ```
 3 |   fsids [optional-params] [roots: string...]
 4 | 
 5 | Print a histogram of uids and/or gids used by a file tree
 6 | 
 7 |   -k=, --kind=    IdKind both  kind of ids to report user, group, both
 8 |   -o=, --order=   Order  id    sort order: up by id or down by count
 9 |   -r=, --recurse= int    0     recursion limit for dirs in roots; 0=unbounded
10 |   -f, --follow    bool   false follow symbolic links to dirs in recursion
11 |   -x, --xdev      bool   false block recursion from crossing devices
12 |   -e, --eof0      bool   false set eof0
13 | ```
14 | 
15 | This produces a very simple filesystem id histogram.  E.g., you might run `pwck`
16 | and get a report about misconfigured users and then have the question: should
17 | these users just be garbage collected?  Or you might otherwise be interested in
18 | diversity of file ownership under various sub-trees.
19 | 
20 | For example,
21 | ```
22 | fsids -r0 /etc
23 | ```
24 | might produce
25 | 
26 | ```
27 | #Uid    Nentry  Name
28 | 0       1845    root
29 | 23      8       www
30 | 70      5       postgres
31 | 102     2       openvpn
32 | 250     77      portage
33 | 439     4       ldap
34 | 13615   3       MISSING
35 | #Gid    Nentry  Name
36 | 0       1833    root
37 | 7       8       lp
38 | 8       3       mem
39 | 23      8       www
40 | 70      5       postgres
41 | 110     5       fcron
42 | 250     91      portage
43 | 391     1       unbound
44 | 439     4       ldap
45 | ```
46 | which indicates there are 3 files with archaic/obsolete UIDs (labeled
47 | "MISSING" here).
48 | 


--------------------------------------------------------------------------------
/flow.nim:
--------------------------------------------------------------------------------
 1 | when not declared stdin: import std/syncio
 2 | import std/[algorithm, sugar], cligen/[textUt, tab], cligen
 3 | 
 4 | proc flow*(input="", output="", pfx="", width=0, gap=1, byLen=false, maxPad=99)=
 5 |   ## Read maybe utf8-colored lines from `input` & flow them into shortest height
 6 |   ## table of top-to-bottom, left-to-right columns & write to `output`.
 7 |   let i = if input.len  > 0: open(input) else: stdin
 8 |   var strs = collect(for line in i.lines: line)
 9 |   if byLen: strs.sort cmp=proc(a, b: string): int = a.printedLen - b.printedLen
10 |   let wids = collect(for str in strs: -str.printedLen) # - here means left-align
11 |   let o = if output.len > 0: open(output, fmWrite) else: stdout
12 |   if gap < 0: (for x in strs: o.write x)
13 |   else:
14 |     let W = if width == 0: ttyWidth elif width < 0: ttyWidth + width else: width
15 |     let w = W - pfx.len
16 |     var nrow, ncol: int; let m = 1
17 |     var colWs = layout(wids, w, gap, maxPad, m, nrow, ncol)
18 |     colPad(colWs, w, maxPad, m)
19 |     o.write(strs, wids, colWs, m, nrow, ncol, 0, pfx)
20 | 
21 | dispatch flow, help={"input" : "use this input file; \"\"=>stdin",
22 |                      "output": "use this output file; \"\"=>stdout",
23 |                      "pfx"   : "pre-line prefix (e.g. indent)",
24 |                      "width" : "rendered width; 0: auto; <0: auto+THAT",
25 |                      "gap"   : "max inter-column gap; <0: 1-column",
26 |                      "byLen" : "sort by printed-length of row",
27 |                      "maxPad": "max per-column padding"}
28 | 


--------------------------------------------------------------------------------
/fage.nim:
--------------------------------------------------------------------------------
 1 | import std/[strutils, times], cligen/[sysUt, statx], cligen
 2 | when not declared(stdout): import std/syncio
 3 | 
 4 | proc fage(Ref="", refTm='v', fileTm='v', self=false, verb=0, paths:seq[string])=
 5 |   ## Print max resolution age (`fileTime(Ref|self,rT) - fileTime(path,fT)`)
 6 |   ## for paths.  "now" =~ program start-up.  Examples:
 7 |   ##   `fage x y`           v-age of *x* & *y* relative to "now"
 8 |   ##   `fage -fb x`         b-age of *x* relative to "now"
 9 |   ##   `fage -Rlog logDir`  v-age of *log* rel.to its *logDir*
10 |   ##   `fage -srm -fb x y`  **mtime - btime** for both *x* & *y*
11 |   ##   `fage -ra -R/ ''`    Like `stat -c%X /`, but high-res
12 |   ## Last works since missing files are given time stamps of 0 (start of 1970).
13 |   if paths.len == 0: Help !! "Need >= 1 path; $HELP"
14 |   let tR = if self: 0i64        # just to skip unneeded syscall(s)
15 |            else: Ref.fileTime(refTm, int64(epochTime() * 1e9))
16 |   for path in paths:
17 |     let tR  = if self: path.fileTime(refTm) else: tR
18 |     let tF  = fileTime(path, fileTm)
19 |     let age = float(tR - tF) * 1e-9
20 |     stdout.write formatFloat(age, ffDecimal, 9)
21 |     if verb > 1:
22 |       stdout.write " ", tR, " ", tF
23 |     if verb > 0:
24 |       echo " ", path else: echo ""
25 | 
26 | when isMainModule: include cligen/mergeCfgEnv; dispatch fage, help={
27 |   "Ref"   : "path to ref file",
28 |   "refTm" : "ref file stamp [bamcv]",
29 |   "fileTm": "file time stamp [bamcv]",
30 |   "self"  : "take ref time from file itself",
31 |   "verb"  : "0: Deltas; 1: Also paths; 2: diff-ends (ns)"}
32 | 


--------------------------------------------------------------------------------
/bu/esquo.nim:
--------------------------------------------------------------------------------
 1 | when not declared(stderr): import std/syncio
 2 | import std/strutils, cligen/[sysUt, strUt, mslice, osUt]
 3 | 
 4 | type EsQuo* = enum eqNeed, eqAlways, eqEscape ## Quoting mode enum
 5 | 
 6 | proc esQuoParse*(q: string): EsQuo =
 7 |   ## Parse a quoting mode string into its enum or raise `ValueError`.
 8 |   case (if q.len > 0: q[0].toLowerAscii else: 'X')
 9 |   of 'n': result = eqNeed
10 |   of 'q': result = eqAlways
11 |   of 'e': result = eqEscape
12 |   else: Value !! "Unknown quote mode: \"" & q & "\"."
13 | 
14 | const needQuo* = {'\t', '\n', ' ', '!', '"', '#', '$', '&' , '\'', '(', ')',
15 |                   '*', ';', '<', '=', '>', '?', '?', '[', '`' , '{', '|', '~'}
16 | 
17 | # Can save empty string ('') catenation if you can *know* starts|ends with '
18 | var quoHunks: seq[MSlice]
19 | proc sQuote*(f: File, s: SomeString; hunks: var seq[MSlice] = quoHunks) =
20 |   ## Shell Single-Quoter.  `hunks` is just for MT-safety if you need that.
21 |   f.urite '\''
22 |   discard s.msplit(hunks, '\'', 0)
23 |   for i, hunk in hunks:
24 |     f.urite hunk
25 |     if i != 0: f.urite "'\\''"
26 |   f.urite '\''
27 | 
28 | proc escape*(f: File, s: SomeString, esc='\\', need={'\0'..'\x7F'}) =
29 |   ## Escape every byte with `esc`.  Not very unicode-friendly.
30 |   for c in s:
31 |     if c in need: f.urite esc
32 |     f.urite c
33 | 
34 | proc emit*(f: File, s: SomeString, qmode=eqNeed, esc='\\') =
35 |   ## Emit `s` to `f`, quoting or escaping as specified.
36 |   case qmode
37 |   of eqNeed: (if needQuo in s: stdout.sQuote s else: stdout.urite s)
38 |   of eqAlways: stdout.sQuote s
39 |   of eqEscape: stdout.escape s
40 | 


--------------------------------------------------------------------------------
/doc/nrel.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | The Nim package manager nimble identifies versions by the most recent git tag.
 4 | This must match in the .nimble file and the git repository.  It is pretty easy
 5 | to forget changing it one place or the other when making new releases.
 6 | 
 7 | Usage
 8 | -----
 9 | ```
10 |   nrel [NEED,optional-params]
11 | Bump version in .nimble, commit, tag & push using just nim, this prog & git.
12 | Final optional stage uses github-cli's gh release creation.
13 | 
14 |   -v=, --vsn=   string ""    New version; "": auto bump
15 |   -b=, --bump=  VSlot  patch Version slot to bump: Major, minor, patch
16 |   -m=, --msg=   string ""    .nimble commit; "": Bump versions pre-release
17 |   -s=, --stage= Stage  push  nimble, commit, tag, push, release
18 |   -t=, --title= string ""    Release title
19 |   -n=, --notes= string ""    Path to release notes markdown
20 | ```
21 | 
22 | Examples
23 | --------
24 | ```sh
25 | cd myRepo
26 | nrel
27 | # Now go to github and draft a release
28 | ```
29 | or if you have `gh` installed from github-cli
30 | ```
31 | cd myRepo
32 | edit /tmp/RELNOTE   # add release notes
33 | nrel -sr -t 'This is my new release title' -n /tmp/RELNOTE
34 | ```
35 | 
36 | Future Work
37 | -----------
38 | It would be nice to also update all dependency versions in `requires` in
39 | the nimble file to whatever their latest versions are since this is the most
40 | likely testing case by far.  That is a bit more work, though.
41 | 
42 | Related Work
43 | ------------
44 | I feel just assuming & using a command-line `git` program is a simpler approach
45 | than done in https://github.com/disruptek/bump
46 | 


--------------------------------------------------------------------------------
/doc/dirt.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | File times on directories are funny things.  On the one hand, it can be nice
 5 | to see when you last renamed something inside or deleted an entry.  On the
 6 | other hand, you may prefer after several to many such edits, that the hierarchy
 7 | of directories "represent" what it contains and maybe only the ctime reflects
 8 | the last edit.  This latter conceptual mode is what motivates `dirt`.
 9 | 
10 | Maybe a simpler way to describe it is operationally: it makes `ls -lt` show
11 | things in the order of what is "most recently modified below", recursively.
12 | 
13 | This can be a divisive transformation.  Some will decry it as ruining the
14 | utility of `ls -lt`.  Others will praise it as making it much more useful.
15 | The right response varies with use case-specific, but without this tool it's
16 | not easy to even have a choice.  Only "never useful" hardliners can truly
17 | object to its existence.
18 | 
19 | Usage
20 | ------
21 | ```
22 |   dirt [optional-params] [roots: string...]
23 | 
24 | Set mtimes of dirs under roots to mtime of its newest kid.
25 | 
26 | This makes directory mtimes "represent" content age at the expense of erasing
27 | evidence of change which can be nice for time-sorted ls in some archival file
28 | areas.
29 | 
30 |   -v, --verbose bool    false print utimes calls as they happen
31 |   -q, --quiet   bool    false suppress most OS error messages
32 |   -n, --dry-run bool    false only print what system calls are needed
33 |   -p=, --prune= strings {}    prune exactly matching paths from recursion
34 |   -x, --xdev    bool    false block recursion across device boundaries
35 | ```
36 | 


--------------------------------------------------------------------------------
/doc/flow.md:
--------------------------------------------------------------------------------
 1 | ## Motivation
 2 | 
 3 | This is a height-optimizing "tabulator" program that can reduce output terminal
 4 | scrolling by several dozen times.  (E.g., 40..80 per row for 1 byte columns.)
 5 | 
 6 | ## Example With 28X Improvement
 7 | 
 8 | ```sh
 9 | $ seq 1 84|flow     # Run on an 80-column terminal
10 | 1 4 7 10 13 16 19 22 25 28 31 34 37 40 43 46 49 52 55 58 61 64 67 70 73 76 79 82
11 | 2 5 8 11 14 17 20 23 26 29 32 35 38 41 44 47 50 53 56 59 62 65 68 71 74 77 80 83
12 | 3 6 9 12 15 18 21 24 27 30 33 36 39 42 45 48 51 54 57 60 63 66 69 72 75 78 81 84
13 | ```
14 | 
15 | 3 rows instead of 84.  84/3 = 28.0.
16 | 
17 | Another example might be with ls[^1] on a wide terminal:
18 | ```
19 | ls -lt /var/log|flow
20 | ```
21 | 
22 | ## Usage
23 | ```
24 |   flow [optional-params] 
25 | 
26 | Read maybe utf8-colored lines from input & flow them into shortest height table
27 | of top-to-bottom, left-to-right columns & write to output.
28 | 
29 | Options:
30 |   -i=, --input=  string ""    use this input file; ""=>stdin
31 |   -o=, --output= string ""    use this output file; ""=>stdout
32 |   -p=, --pfx=    string ""    pre-line prefix (e.g. indent)
33 |   -w=, --width=  int    0     rendered width; 0: auto; <0: auto+THAT
34 |   -g=, --gap=    int    1     max inter-column gap; <0: 1-column
35 |   -b, --byLen    bool   false sort by printed-length of row
36 |   -m=, --maxPad= int    99    max per-column padding
37 | ```
38 | ## Related Work
39 | 
40 | GNU/BSD `column` does something similar but does not support a concept of
41 | printed/rendered length (i.e. utf8/ANSI SGR color escape sequences).
42 | 
43 | [^1]: Though [lc](https://github.com/c-blake/lc) is nicer in many ways.
44 | 


--------------------------------------------------------------------------------
/uce.nim:
--------------------------------------------------------------------------------
 1 | when not declared(addFloat): import std/formatfloat
 2 | import cligen/[mfile, mslice, strUt], adix/uniqce
 3 | 
 4 | proc uce*(input="/dev/stdin", k=1024, re=0..5, fmt1="$val0 +- $err0",
 5 |           expF="($valMan +- $errV)$valExp") =
 6 |   ## Emit Unique Count Estimate of `input` lines to stdout.  Algo is fast, low
 7 |   ## space 1-pass KMV over mmap | stream input. (For exact, see `lfreq`.)
 8 |   var uce = initUniqCe[float](k)
 9 |   for line in mSlices(input, eat='\0'): # RO mmap | slices from stdio
10 |     when defined(cHash):
11 |       let h = float(cast[uint64](hash(line)))*(1.0/1.8446744073709551615e19)
12 |     else:                               # std/hashes(data) sadly only 32-bits!
13 |       let h = float(cast[uint32](hash(line)))*(1.0/4294967295.0)
14 |     uce.push h
15 |   if fmt1.len == 0:                     # The 2 estimates to full float prec
16 |     echo uce.nUnique, " ", uce.nUniqueErr
17 |   else: # Near-exact to fmt as "15.00"; Err is technically hash-collision rate
18 |     echo fmtUncertain(uce.nUnique, max(uce.nUniqueErr, 0.1), fmt1, expF, re)
19 | 
20 | when isMainModule:
21 |   import cligen                                       # Wide defaults => drop
22 |   clCfg.hTabCols = @[clOptKeys, clDflVal, clDescrip]  #..the data type column
23 |   include cligen/mergeCfgEnv                          # Allow cfg files for +-
24 |   dispatch uce, help={"input": "input data path",
25 |                       "k"    : "size of the sketch in float64 elts",
26 |                       "re"   : "range of 10expon defining 'near 1'",
27 |                       "fmt1" : "fmt for uncertain num near 1",
28 |                       "expF" : "fmt for uncertain num beyond `re`"}
29 | 


--------------------------------------------------------------------------------
/doc/since.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | This is (mostly) a convenience program for something I often want to know or
 4 | do in scripts.
 5 | 
 6 | Usage
 7 | -----
 8 | ```
 9 |   since [NEED,optional-params] [paths: string...]
10 | 
11 | Print files whose time is since|before refTime of refPath.
12 | 
13 | Files examined = UNION of paths + optional delim-delimited input file (stdin if
14 | "-"|if "" & stdin is not a terminal), maybe recursed as roots.
15 | 
16 | To print regular files m-older than LAST under CWD:
17 |     since -t-m -pLAST -r0 .
18 | 
19 | Options:
20 |   -p=, --refPath= string        NEED  path to ref file
21 |   -T=, --refTime= string        ""    stamp of ref file to use (if different)
22 |   -t=, --time=    string        "m"   stamp to compare ({-}[bamcv]*)
23 |   -r=, --recurse= int           1     recurse n-levels on dirs; 0:unlimited
24 |   -c, --chase     bool          false chase symlinks to dirs in recursion
25 |   -D, --Deref     bool          false dereference symlinks for file times
26 |   -k=, --kinds=   set(FileKind) file  i-node type like find(1): [fdlbcps]
27 |   -q, --quiet     bool          false suppress file access errors
28 |   -x, --xdev      bool          false block recursion across device boundaries
29 |   -f=, --file=    string        ""    optional input ("-"|!tty=stdin)
30 |   -d=, --delim=   char          '\n'  input file record delimiter
31 |   -e, --eof0      bool          false read dirents until 0 eof
32 |   -n, --noDot     bool          false remove a leading . from names
33 |   -u, --unique    bool          false only print a string once
34 | ```
35 | Related Work
36 | ------------
37 | GNU `find -*newer` does not support the new-ish Linux b-time and is also slow.
38 | 


--------------------------------------------------------------------------------
/doc/holes.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ==========
 3 | 
 4 | Virtual machine or ISO disc images or other "file systems within a file" or
 5 | sometimes object / database files can have intentionally large holes / sparsity.
 6 | Corrupted torrent downloads may, meanwhile, have very large holes by accident.
 7 | 
 8 | While these files can be easily identified with `ls -ls` or `stat` comparing the
 9 | allocated blocks and seek-addressable file size, I could find no standard Unix
10 | command-line tool to count/list holes.  (A quick web search will show numerous C
11 | programming examples to use the Unix API to list holes, though.)  So here is
12 | (probably another) one.
13 | 
14 | Usage
15 | =====
16 | 
17 | ```
18 |   holes [optional-params] [files: string...]
19 | 
20 | Show hole & data segments for files
21 | 
22 |   -f=, --format= string "" emit format interpolating:
23 |                              $count : number of data|hole segments
24 |                              $path  : path name of REGULAR FILE from $*
25 |                              $map   : map of all data&hole segments
26 |                              $nul   : a NUL byte
27 |                            "" => "$count\t$path\n$holes\n"
28 | ```
29 | 
30 | Example
31 | =======
32 | 
33 | ```sh
34 | truncate -s 5000 x; printf hi >>x; holes x
35 | ```
36 | 
37 | prints on a file system with 4096-byte blocks:
38 | 
39 | ```
40 | 2 x
41 | 	hole	4096
42 | 	data	906
43 | ```
44 | 
45 | Related
46 | =======
47 | 
48 | `filefrag` is a similar but distinct utility which uses a less portable Linux
49 | FIEMAP `ioctl`.  Distinctness-wise, for example, I get "4 extents" from a
50 | `filefrag foo.xfs`, while `holes` reports 42814 hole|data segments[^1].
51 | 
52 | [^1]: This is admittedly after running `xfs_fsr`.
53 | 


--------------------------------------------------------------------------------
/doc/unfold.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | Programs sometimes have multi-row/multi-line outputs with decent regularity.
 5 | You may want to "table-ify" such outputs for further processing, e.g. to do some
 6 | quick arithmetic work across columns with an `awk` or [rp](rp.md) at the end of
 7 | a shell pipeline.
 8 | 
 9 | This can also be useful in extract-transform-load (ETL) contexts where you want
10 | to re-shape inputs to a table loading pipeline.
11 | 
12 | Sometimes it seems more natural to create multi-row outputs and then pipe them
13 | to `unfold`.  For example:
14 | ```sh
15 | cat /sys/devices/system/cpu/cpu0/cpufreq/base_frequency \
16 |     /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq |
17 |     unfold -n2 | awk '{print $1/$2}' # Float arithmetic here
18 | ```
19 | 
20 | Usage
21 | -----
22 | ```
23 |   unfold [optional-params]
24 | Join blocks of stdin lines into one line sent to stdout.
25 |   -h, --help                  print this cligen-erated help
26 |   --help-syntax               advanced: prepend,plurals,..
27 |   -s=, --sep=    string "\t"  separates the old lines within the new
28 |   -n=, --n=      int    0     Join |n| lines into 1
29 |   -b=, --before= string ""    join blocks beginning with a matching line
30 |   -a=, --after=  string ""    join blocks ending with a matching line
31 |   -i, --ignore   bool   false regex are case-insensitive
32 |   -e, --extended bool   false regexes are nim re 'extended' syntax
33 | ```
34 | 
35 | Related Work
36 | ------------
37 | There are ways to do this with `awk`|etc. directly, but require either state
38 | machine-think that distracts in the heat of the analysis moment or else some
39 | devoted `awk`|etc. scripts.  You could think of this program as a replacement
40 | for some such scripts (that probably runs faster than them).
41 | 


--------------------------------------------------------------------------------
/tmpls.nim:
--------------------------------------------------------------------------------
 1 | when not declared(stderr): import std/syncio
 2 | import std/sugar, cligen, cligen/[sysUt, strUt, mslice, mfile, osUt], bu/esquo
 3 | 
 4 | proc interPrint(f: File; tmpl: string, prs: seq[MacroCall]; str: SomeString) =
 5 |   for (id, arg, call) in prs:
 6 |     if id.idIsLiteral: f.urite tmpl, arg
 7 |     elif tmpl[id.a] == 's': f.urite str
 8 |     elif tmpl[id.a] == 'n': (if needQuo in str: f.sQuote str else: f.urite str)
 9 |     elif tmpl[id.a] == 'q': f.sQuote str
10 |     elif tmpl[id.a] == 'e': f.escape str, need=needQuo
11 |     else: f.urite tmpl, call
12 | 
13 | proc tmpls(inp="/dev/stdin", nl='\n', outp="/dev/stdout", term='\n', meta='%',
14 |            templates: seq[string]): int =
15 |   ## Interpolate { %s)tring | %n)eed quoted | always %q)uoted | %e)scaped } into
16 |   ## as many templates as given, writing back-to-back template-filled-in batches
17 |   ## to stdout, with each individual template terminated by `term`.  E.g.:
18 |   ##   ``find . -name '\*.c' -print|sed 's/.c$//' | tmpls %s.c %s.o %n.c %e.o``
19 |   if templates.len < 1: Help !! "Need some template; Full $HELP"
20 |   let prs = collect(for t in templates: t.tmplParsed(meta))
21 |   let f = try: (if outp == "/dev/stdout": stdout else: open(outp, fmWrite))
22 |           except Ce: quit "could not open output: " & outp, 1
23 |   for ms in mSlices(inp, sep=nl, eat='\0'):
24 |     for i in 0 ..< templates.len:
25 |       f.interPrint templates[i], prs[i], ms
26 |       f.urite term
27 | 
28 | when isMainModule:
29 |   include cligen/mergeCfgEnv; dispatch tmpls, help={"templates": "templates...",
30 |     "inp"  : "input file of name 'stubs'",
31 |     "nl"   : "input string terminator",
32 |     "outp" : "output file of expansions",
33 |     "term" : "output string terminator",
34 |     "meta" : "self-quoting meta for %sub"}
35 | 


--------------------------------------------------------------------------------
/tattr.nim:
--------------------------------------------------------------------------------
 1 | import std/os, cligen/[sysUt, humanUt], cligen
 2 | when not declared(stdout): import std/syncio
 3 | 
 4 | proc tattr(attrs: seq[string]) =
 5 |   ## Emit to stdout an escape string activating text colors/styles, honoring
 6 |   ## $NO_COLOR & also reading ~/.config/cligen for $LC_THEME-based aliases.
 7 |   ##
 8 |   ## Non-color styles; Prefix with '-' to turn off.
 9 |   ##   bold, faint, italic, inverse, hid, struck, blink (slow), BLINK (fast),
10 |   ##   under{line double dot dash curl}, over.
11 |   ##
12 |   ## Regular color keywords are in lower case; Bright bank in UPPER CASE:
13 |   ##   black, red, green, yellow, blue, purple, cyan, white
14 |   ##   BLACK, RED, GREEN, YELLOW, BLUE, PURPLE, CYAN, WHITE
15 |   ## Colors are foreground by default.  Pre-pend "on_" for Background.
16 |   ##
17 |   ## 256-color or true color terminals like xterm|st|kitty also support:
18 |   ##   {fbu}[0..23] for F)ORE/B)ACKgrnd U)NDER grey scale
19 |   ##   {fbu}RGB where R, G, B are in 0..5
20 |   ##   {fbu}RRGGBB with RR, GG, BB are in hexadecimal (true color)
21 |   ##
22 |   ## An element of color scale NAME {viridis hue wLen gray pm3d} can be chosen
23 |   ## via:
24 |   ##   {fbu}sNAME<0.-1>[,..]
25 |   ## where only `hue` and `wLen` take [,sat,val] optionally.  "wLen" is for
26 |   ## "waveLength" - (yes, I know RGB light is a mixture; terms are just to imply
27 |   ## *rough* "spectral order" or hot..cold / cold..hot / "heat" map).
28 |   ##
29 |   ## off, none, NONE turn off all special graphics renditions while -fg, -bg
30 |   ## turn off just ForeGround, BackGround embellishment.
31 |   ##
32 |   ## *NOTE* May Need "--" for -bg, -bold, etc.
33 |   if attrs.len == 0: Value !! "\n  Need >= 1 attrs.  See tattr --help"
34 |   stdout.write textAttrOn(attrs, plain=existsEnv("NO_COLOR"))
35 | 
36 | dispatch tattr
37 | 


--------------------------------------------------------------------------------
/cfold.nim:
--------------------------------------------------------------------------------
 1 | import std/re, cligen, cligen/sysUt
 2 | when not declared(lines): import std/syncio
 3 | 
 4 | iterator csplit(s: string, pat: Regex): tuple[body: string, sep: string] =
 5 |   ## Iterate over segments of a string split by a pattern, yielding (body,sep)
 6 |   ## tuples.  This correctly handles cases where the string does or does not end
 7 |   ## in a sep and where all bodies are empty strings.
 8 |   var a, b: int
 9 |   var beg = 0
10 |   while true:
11 |     (a, b) = findBounds(s, pat, start = beg)  #s[a..b] if found else (-1,0)
12 |     if a == -1:
13 |       break
14 |     yield (s[beg .. a-1], s[a .. b])
15 |     beg = b + 1
16 |   if beg < s.len:
17 |     yield (s[beg..^1], "")
18 | 
19 | proc cfold(suppress=false, ignore=false, extended=false, file="-",
20 |            pattern: seq[string]) =
21 |   ## `cfold` is to `fold` as `csplit`(1) is to `split`(1).  ``pattern`` is an rx
22 |   ## at which to segment input lines in `file`.  This can also be done with GNU
23 |   ## sed, but ergonomics of getting \\n into expressions are poor.
24 |   var flags = {reStudy}
25 |   if ignore: flags.incl reIgnoreCase
26 |   if extended: flags.incl reExtended
27 |   if pattern.len != 1:
28 |     Help !! "Need exactly one pattern; Full $HELP"
29 |   let pat = re(pattern[0], flags)
30 |   for line in lines(if file != "-": open(file) else: stdin):
31 |     for segment in csplit(line, pat):
32 |       stdout.write(segment.body)
33 |       if not suppress:
34 |         stdout.write(segment.sep)
35 |       stdout.write("\n")
36 | 
37 | include cligen/mergeCfgEnv
38 | dispatch cfold, help={"file"    : "input file (\"-\" == stdin)",
39 |                       "ignore"  : "add ignore case to re flags",
40 |                       "extended": "nim re 'extended' syntax",
41 |                       "suppress": "exclude matched strings"}
42 | 


--------------------------------------------------------------------------------
/doc/adorn.md:
--------------------------------------------------------------------------------
 1 | # Motivation
 2 | 
 3 | A useful semi-frequent transformation of a text table is something like:
 4 | 
 5 | ```sh
 6 | awk '{$col = prefix $col suffix; print $0}'
 7 | ```
 8 | BUT this normalizes whitespace between columns, messing up terminal alignment
 9 | if any was present.  `adorn` seeks to be less disruptive.
10 | 
11 | There are, of course, Perl/Python solutions, but the body of the Nim code is
12 | only 28 lines and it runs much faster (>7X in informal timings).
13 | 
14 | # Usage
15 | ```
16 |   adorn [optional-params] colNums (origin-origin column numbers)
17 | 
18 | input-output filter to adorn fields by adding prefix &| suffix to delim-ited
19 | colNums, preserving ambient text.  colNums, prefix, suffix SHARE INDEXING (so
20 | you may need to pad with "").  E.g.:
21 | 
22 |   paste <(seq 1 3) <(seq 4 6)  <(seq 7 9) | adorn -pA -sB 1 -pC 3
23 | 
24 | Options:
25 |   --origin=      int     1     origin for colNums; 0 => signed indexing
26 |   -O, --O0       bool    false shorthand for --origin=0
27 |   -p=, --prefix= strings {}    strings to prepend to listed columns
28 |   -s=, --suffix= strings {}    strings to append to listed columns
29 |   -i=, --input=  string  ""    path to mmap|read as input; "" => stdin
30 |   -r=, --rowDlm= char    '\n'  input row delimiter character
31 |   -d=, --delim=  string  "w"   input field dlm chars; len>0=>fold;w=white
32 |   -o=, --output= string  ""    path to write output file; "" => stdout
33 | ```
34 | 
35 | # Examples
36 | 
37 | Add an explicit field label somewhere (possibly for additional post-processing):
38 | ```sh
39 | seq 1 9 | adorn -p 'label: ' 1
40 | ```
41 | 
42 | Make `$argv[0]` inverse { using [`tattr`](tattr.md) } in a cb0 `--style=basic`
43 | [`procs display`](https://github.com/c-blake/procs) listing:
44 | ```sh
45 | pd -sb | adorn -p$(tattr inverse) -s$(tattr -- -inverse) 8
46 | ```
47 | 


--------------------------------------------------------------------------------
/doc/okpaths.md:
--------------------------------------------------------------------------------
 1 | Basics
 2 | ------
 3 | Usage:
 4 | ```
 5 | okpaths ENVAR [DELIM(:) [ITYPE{bcdpfls}(d) [PERMS{rwx}(x) [DEDUP{FL*}(F)]]]]
 6 | ```
 7 | The [] notation here indicates optionality and defaults are in ().
 8 | 
 9 | This program echos re-assembled value for `$ENVAR` delimited by ASCII character
10 | `DELIM`.  Each retained element is i-node type `ITYPE` with permissions `PERMS`.
11 | 
12 | & optional de-duplication.
13 | 
14 | Eg., PATH=`okpaths PATH` keeps only existing (d)irs executable(x) by an invoking
15 | user.  DEPDUP starting with 'F' means keep F)irst use, while 'L' keeps L)ast use
16 | & other means no de-dup (this is case-insensitive).  So, eval `okpaths PATH` is
17 | nice in rc/init scripts for Unix shells.
18 | 
19 | Blocks of the 5 params can repeat (since fork&exec add to shell init time).
20 | 
21 | The i-node type abbreviation is the somewhat standard (`ls -l` | `find`):
22 |   * b   (B)lock device
23 |   * c   (C)haracter device
24 |   * d   (D)irectory
25 |   * p   named (P)ipe/FIFO
26 |   * f   Regular (F)ile
27 |   * l   Symbolic (L)ink
28 |   * s   Unix domain (S)ocket
29 | 
30 | Motivation
31 | ----------
32 | `eval $(okpaths PATH : d rx u)` is useful in shell start-up scripts (like
33 | `~/.profile`) where you might assemble a search path or man path or et cetera
34 | from a variety of *possible* locations, but then want to trim the value down to
35 | locations valid at shell init.
36 | 
37 | This trimming makes `echo $ENVAR` less noisy and may prevent annoying extra,
38 | unneeded work during start-up of dependent programs.  Sometimes this extra work
39 | can be quite a lot, (e.g. with a slow NFS automounter), although just running
40 | `okpaths` will have to do it at least once.
41 | 
42 | Note that login shells can be very long-lived and FS availability dynamic.  So,
43 | validity at `okpaths`/shell start-up-time is not a perfect solution.
44 | 


--------------------------------------------------------------------------------
/doc/ndelta.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | Often one runs a program twice - one way and the other way - to see how some
 4 | parameter/input/mode/.. changes things.  One then wants to compare the output of
 5 | something reported - maybe resource consumption like time/space or success
 6 | amounts or other accuracy parameters or other numeric outputs.  So, the output
 7 | is "largely" identical (or can be made so with sorting) except for "numbers" in
 8 | different areas of the report.  This situation is what `ndelta` is for.
 9 | 
10 | Usage
11 | -----
12 | ```
13 |   ndelta [optional-params] [paths: string...]
14 | 
15 | Replace numbers in token-compatible spots of paths[0] & paths[1] with (absolute
16 | | ratio | relative | perCent) deltas.  To trap out-of-order data, differences in
17 | context are highlighted unless sloppy is true.
18 | 
19 |   -k=, --kind=   DKind  ratio   DiffKind: absolute, ratio, relative, perCent
20 |   -d=, --delims= string "white" repeatable delim chars
21 |   -n=, --n=      int    3       FP digits to keep
22 |   -s, --sloppy   bool   false   allow non-numerical context to vary silently
23 | ```
24 | A relative difference here is the `ratio - 1.0` while `perCent` is that
25 | multiplied by 100.
26 | 
27 | Presently, `ndelta` has some sanity checks (total token count equality) to
28 | help the report be meaningful in the way intended and a sloppy mode to let
29 | context/delimiters vary but be reported in the output.
30 | 
31 | Related Work
32 | ------------
33 | There is, of course, the ever-present `diff` possibly combined with my `hldiff`
34 | to highlight sections, but this only presents textual differences while one
35 | often wants numeric (one of the 4 kinds currently supported by `ndelta`).
36 | `ndelta` is a very simple program.  Variants of it have surely been done many
37 | times.  If me not mentioning one here bugs you, bug me and I'll mention it. :)
38 | 


--------------------------------------------------------------------------------
/noa.nim:
--------------------------------------------------------------------------------
 1 | import std/cmdline {.all.} # cmdCount, cmdLine
 2 | from std/strutils   import strip
 3 | from std/parseutils import parseInt
 4 | from std/sugar      import collect
 5 | 
 6 | # Wrap a command to emit last non-option arg
 7 | const use = "(n)on-(o)ption (a)rgument usage:\n\n" &
 8 |   "  noa {index} options-and-args\n\n" &
 9 |   "E.g.: noa -1 cp -a foo -f -- /exists/maybe/missing\n" &
10 |   "emits \"/exists/maybe/missing\" no matter where \"--\" is.\n" &
11 |   "Can be nice in scripts to e.g. ensure must-haves exist."
12 | 
13 | iterator nonOpts(): int =               # Any alternative to Unix -- convention?
14 |   var optsDone = false
15 |   for i in 2 ..< cmdCount:              # skip $0 = noa and $1 = idx .. BUT
16 |     if optsDone: yield i                #..yield unadjusted `cmdLine` indices.
17 |     else:
18 |       let a = cmdLine[i]                #NOTE: All OSes terminate with \0
19 |       if a[0] == '-':                   # Some kind of option | end of options
20 |         if a[1] == '-' and a[2] == '\0':# "--": end of options
21 |           optsDone = true
22 |       else: yield i
23 | 
24 | if cmdCount < 3: quit use, 1            # Use cmdCount,Line not paramCount,Str..
25 | let dlr1 = $cmdLine[1]                  #..to avoid string creation w/giant argv
26 | if dlr1 in ["-h", "--help"]: echo use; quit 0
27 | 
28 | var ix: int
29 | let bare = dlr1.strip
30 | if bare.len == 0 or parseInt(bare, ix) != bare.len:
31 |   quit "\"" & bare & "\"" & " is not an integer.\n\n" & use, 2
32 | 
33 | if ix < 0:
34 |   let ixes = collect(for i in nonOpts(): i)
35 |   let ix = ixes.len + ix
36 |   if ix >= 0 and ix < ixes.len: echo cmdLine[ixes[ix]]
37 |   else: quit "noa index " & bare & " out of bounds\n\n" & use, 3
38 | else:
39 |   var ixCt = ix
40 |   for i in nonOpts():
41 |     if ixCt == 0: echo cmdLine[i]; quit 0
42 |     dec ixCt
43 |   quit "noa index " & bare & " out of bounds\n\n" & use, 3
44 | 


--------------------------------------------------------------------------------
/notIn.nim:
--------------------------------------------------------------------------------
 1 | import std/[sets, os, strutils], cligen/[sysUt, osUt]
 2 | when not declared(stdin): import std/syncio
 3 | 
 4 | proc doNotIn*(file="", delim='\0', term='\0', pattern="$1", invert=false,
 5 |               roots: seq[string]) =
 6 |   ## Find files under `roots` NOT matching `pattern` applied to any `file` entry.
 7 |   ## E.g.:
 8 |   ##   `(cd D1; find . -print0) | notIn D2 D3 | xargs -0 echo`
 9 |   ## echoes every entry under *D2* or *D3* not also under *D1*.  Input paths are
10 |   ## normalized to nix empty path components (e.g. 1st & 3rd in "./foo/./bar").
11 |   ## `find -path A -o -path B ..` can do this, but is hard for many paths.
12 |   if "$1" notin pattern:
13 |     Value !! "`pattern` must contain \"$1\" somewhere"
14 |   var pats = initHashSet[string]()      # Build up a big HashSet[string]
15 |   let file = if file.len == 0: stdin else: open(file)
16 |   for pat in getDelim(file, delim):
17 |     if (let pat = pat.normalizedPath; pat.len > 0):
18 |       pats.incl pattern % [ pat ]
19 |   let filter = {pcFile, pcLinkToFile, pcDir, pcLinkToDir}
20 |   for root in roots:                    # Now walk roots listing (mis)matches
21 |     let root = if root.endsWith("/"): root[0..^2] else: root
22 |     try:
23 |       for path in walkDirRec(root, filter):
24 |         let pat = path[root.len+1..^1]
25 |         if (invert and pat in pats) or pat notin pats:
26 |           stdout.write path, term
27 |     except Ce:
28 |       erru "could not recurse into ",root,"\n"
29 | 
30 | when isMainModule:
31 |   import cligen; include cligen/mergeCfgEnv
32 |   dispatch doNotIn, cmdName="notIn", short={"invert": 'v'}, help={
33 |     "file"   : "delimited input ( `\"\"` => ``stdin`` )",
34 |     "delim"  : "input path delimiter",
35 |     "term"   : "output path terminator",
36 |     "pattern": "a \\$1-containing under `roots` pattern",
37 |     "invert" : "find files that *do* match a `file` entry"}
38 | 


--------------------------------------------------------------------------------
/doc/tmath.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ==========
 3 | Sometimes rather than converting to/from epoch seconds you prefer to embrace the
 4 | International Date Line. ;) E.g., often the fastest way to do "date subtraction"
 5 | is converting to [rata die](https://en.wikipedia.org/wiki/Rata_Die).  Or a media
 6 | player needs input in [[H:][M:]S and you prefer `$(tmath hms 2345)="39:05"` or
 7 | worse there are start time/end time/length calcs to toil through.
 8 | 
 9 | Since this may be useful as a lib not just as a CLI, the module is under bu/.
10 | This is amongst the least novel code here likely with *many* copies of the core
11 | ideas out in the world, but it's simple enough and I've used it enough over the
12 | last few years that it seemed worth including.
13 | 
14 | Examples
15 | ========
16 | ```sh
17 | $ echo $(($(tmath r 2017-06-06)-$(tmath r 2001-01-01)))
18 | 6000
19 | $ tmath + 10:20:30 \ -4:5:6
20 | 6:15:24
21 | ```
22 | 
23 | Usage
24 | =====
25 | While `tmath h` will dump it all, these subcommands do not take real options,
26 | just lists of what they say they take.  Y4-M-D refers to a date formatted like
27 | 2000-1-31 or 1996-07-04.
28 | ```
29 | Various calendar & time-of-day math routines that operate directly on broken
30 | down representations with a convenient CLI.
31 | 
32 |   tmath {SUBCMD}  [sub-command options & parameters]
33 | where {SUBCMD} is one of:
34 |   help      print comprehensive or per-cmd help
35 |   julians   Julian Days for given Y4-M-D Gregorian dates
36 |   dates     Get Gregorian date for a given Julian Day in 8 integer divides
37 |   rataDies  Days since Gregorian 1/1/1 for given Y4-M-D dates (1div, 1cacheLn)
38 |   gregorys  Gregorian dates given days since 1/1/1 (in 4 int divs).
39 |   toHMS     Get all elements of seconds as H:M:S
40 |   seconds   Get all elements of hmses as seconds
41 |   addHMS    H:M:S sum of H:M:S args[0] and H:M:S args[1] (quote "space-")
42 |   +         alias for addHMS
43 | ```
44 | 


--------------------------------------------------------------------------------
/doc/memlat.md:
--------------------------------------------------------------------------------
 1 | A little utility to measure latency at various levels of the memory hierarchy.
 2 | Only READ/load latency right now, though.
 3 | 
 4 | This was basically inspired by the discussion here (forum.nim-lang.org seems
 5 | to no longer render many historical posts - sorry - that is one reason why
 6 | I am re-posting this code here):
 7 | 
 8 |     https://forum.nim-lang.org/t/5734#35832
 9 | 
10 | as well as some lame arguments about cache locality in the context of very hard
11 | to measure cold-cache (& cold branch predictor) hash tables where vanilla linear
12 | probing basically always wins (sometimes by a lot) in spite of the fact that hot
13 | everything L1 micro-benchmarks can make it seem like "pseudorandom probing" can
14 | have a (small, probably not CPU-portable) edge.
15 | 
16 | The `--kind=ranElt` and `=truRan` tests here basically emulate hash lookups
17 | while the `--kind=shuff` emulates cold cache memory loads (but branch predictors
18 | are still hot cache) or a load pattern more like hopping a long linked list or
19 | a very deep tree.  (`truRan` only works on Linux right now.)
20 | 
21 | This utility (in shuffle mode) is actually not so bad a way to measure memory
22 | systems against each other at various data scales.  I see a great deal of
23 | variation in main memory/DIMM latencies which are not (often) covered in
24 | marketing speak like "DDR-N", but often very impactful on performance.
25 | 
26 | ```
27 | Usage:
28 |   lat [optional-params]
29 | Time latency three ways. shuffle measures real latency.
30 |   -k=, --kind=    Algo shuff   shuff: chase ran perm
31 |                                ranElt: access ran elt
32 |                                truRan: pre-read getrandom
33 |   -s=, --sizeKiB= int  1048576 set sizeKiB
34 |   -n=, --nAcc=    int  1000000 set nAcc
35 |   -a=, --avgN=    int  4       set avgN
36 |   -m=, --minN=    int  4       set minN
37 |   --seed=         int  0       0=>random, else set
38 | ```
39 | 


--------------------------------------------------------------------------------
/doc/chom.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | On multi-user systems and servers filesystem permissions can matter.  Often one
 5 | wants a restrictive umask (e.g. 077) for file/directory creation as a "safe
 6 | default".  Then sometimes you want to "open up" perms on some entire file
 7 | sub-tree..say to collaborate with some other user.
 8 | 
 9 | While you can do `chown -R` there is no "only directories" filter or handle
10 | user-executable files differently option.  One can do wrapper scripts with
11 | `find`, but using `chom` is more efficient for both users and the system.
12 | One can also do fancy Zsh recursive globbing like `**(.x)`, but at least to
13 | me the ergonomics of `chom` are better than either of these options.
14 | 
15 | Usage
16 | -----
17 | ```
18 |   chom [optional-params] [paths: string...]
19 | 
20 | This enforces {owner, group owner, permissions} for {dirs, non-executable other
21 | files, and user-executable files}.  This only makes chown/chmod syscalls when
22 | needed, both for speed & not to touch ctime unnecessarily.  It does not handle
23 | ACLs, network FS defined access, etc.  Return zero if no calls are needed.
24 | 
25 | Options:
26 |   -v, --verbose    bool   false print chown and chmod calls as they happen
27 |   -q, --quiet      bool   false suppress most OS error messages
28 |   -n, --dry-run    bool   false only print what system calls are needed
29 |   -r=, --recurse=  int    0     max recursion depth for any dir in paths
30 |   -c, --chase      bool   false follow symbolic links to dirs in recursion
31 |   -x, --xdev       bool   false block recursion across device boundaries
32 |   -o=, --owner=    string ""    owner to set; may need root; defl=self
33 |   -g=, --group=    string ""    group owner to set; defl=primaryGid(self)
34 |   -d=, --dirPerm=  Perm   2755  permission mask for dirs
35 |   -f=, --filePerm= Perm   664   permission mask for files
36 |   -e=, --execPerm= Perm   775   permission mask for u=x files
37 | 


--------------------------------------------------------------------------------
/doc/lncs.md:
--------------------------------------------------------------------------------
 1 | `lncs` (pronounced links) is much like `cligen/examples/dups` but for clusters
 2 | of hard-links, not elsewise duplicate files.
 3 | 
 4 | `lncs` searches within paths of maybe-chasing, maybe-recursive closure of the
 5 | UNION of roots and optional dlm-delimited input file (stdin if "-"|if "" & stdin
 6 | not a tty).
 7 | 
 8 | Exit code is min(255, num.clusters >= thresh).
 9 | 
10 | Eg.,
11 | ```
12 | find -print0|lncs -d\0 -o\0 -e\0
13 | ```
14 | makes a report reliably splittable on double-NUL then single-NUL for fully
15 | general path names while `lncs -ls -n0 -r0 /` echoes a summary.
16 | 
17 | There are a few knobs to filter out some common cases like small files or
18 | only include regular files, etc., but `find` can of course do all this and
19 | much more as an input generator.
20 | 
21 | ```
22 | Usage:
23 |   lncs [optional-params] filesystem roots
24 | 
25 |   -f=, --file=    string        ""    optional input ("-"|!tty=stdin)
26 |   -d=, --dlm=     char          '\n'  input file delimiter (0->NUL)
27 |   -r=, --recurse= int           1     recurse n-levels on dirs; 0:unlimited
28 |   -c, --chase     bool          false follow symlinks to dirs in recursion
29 |   -X, --xdev      bool          false block recursion across device boundaries
30 |   -0, --eof0      bool          false read dirents until 0 eof
31 |   -k=, --kinds=   set(FileKind) file  i-node type like find(1): [fdlbcps]
32 |   -m=, --minSize= int           0     minimum file size
33 |   -t=, --thresh=  int           2     smallest hard link cluster to count
34 |   -q, --quiet     bool          false suppress file access errors
35 |   -l=, --log=     set(LncsLog)  osErr >stderr{osErr, summary}
36 |   -n=, --nEcho=   int           -1    num to print; 0: none; -1: unlimited
37 |   -., --noDot     bool          false remove a leading . from names
38 |   -o=, --outDlm=  string        "\t"  output internal delimiter
39 |   -e=, --endOut=  string        "\n"  output record terminator
40 | ```
41 | 


--------------------------------------------------------------------------------
/doc/uce.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ==========
 3 | It can sometimes help to have an estimate of the number of unique/distinct items
 4 | from a large, possibly compressed-on-storage input stream. E.g., you may want to
 5 | spend 1-pass over the data to know if you could fit a hash table containing all
 6 | keys in available space (& even pre-size such a table to avoid growth costs).
 7 | If you cannot then you may need some other estimation approach.
 8 | 
 9 | KMV Sketch Method
10 | =================
11 | For a low price of just `k` max value entries[^1] you can get a pretty good
12 | estimate with error ~ 1/sqrt(k).
13 | [adix/uniqce.nim](https://github.com/c-blake/adix/blob/master/adix/uniqce.nim)
14 | has more details.[^2]
15 | 
16 | Usage[^3]
17 | =========
18 | ```
19 |   uce [optional-params]
20 | 
21 | Emit Unique Count Estimate of input lines to stdout.  Algo is fast, low space
22 | 1-pass KMV over mmap | stream input. (For exact, see lfreq.)
23 | 
24 |   -i=, --input= "/dev/stdin"                input data path
25 |   -k=, --k=     1024                        size of the sketch in float64 elts
26 |   -r=, --re=    0..5                        range of 10expon defining 'near 1'
27 |   -f=, --fmt1=  "$val0 +- $err0"            fmt for uncertain num near 1
28 |   -e=, --expF=  "($valMan +- $errV)$valExp" fmt for uncertain num beyond `re`
29 | ```
30 | Empty string for `fmt1` produces two columns of float at full precision.
31 | 
32 | Examples
33 | ========
34 | ```sh
35 | $ (seq 1 50; seq 1 50) | uce
36 | 50.00 +- 0.10
37 | 
38 | $ (seq 1 5000000; seq 1 5000000) | uce
39 | (5.09 +- 0.16)e+06
40 | ```
41 | 
42 | [^1]: A `k` fitting in an L1 data cache yields O(1%) estimates.
43 | 
44 | [^2]: This is by far the simplest sketch family along these lines -
45 | conceptually, code, etc.
46 | 
47 | [^3]: BTW, in my head I pronounce "uce" like the tail of "Bruce". { And yes I am
48 | aware that UCE also stands for Unsolicited Commercial Email aka "spam".  I hope
49 | you like this tool better than that, at least. ;-) }
50 | 


--------------------------------------------------------------------------------
/fsids.nim:
--------------------------------------------------------------------------------
 1 | when not declared(stderr): import std/syncio
 2 | include cligen/unsafeAddr
 3 | import std/[tables, algorithm, posix], cligen/[dents, osUt, posixUt, statx]
 4 | type IdKind = enum user, group, all="both"
 5 | type Order  = enum id, count
 6 | 
 7 | proc print*[Id](hdr: string, ids: Table[Id, int], nm: Table[Id, string],
 8 |                 order=id) =
 9 |   var pairs, sorted: seq[tuple[id: Id, count: int]]
10 |   for id, cnt in ids: pairs.add (id, cnt)
11 |   case order
12 |   of id   : sorted = pairs.sortedByIt( it[0])
13 |   of count: sorted = pairs.sortedByIt(-it[1])
14 |   echo hdr, "\tNentry\tName"
15 |   for tup in sorted:
16 |     echo tup[0], "\t", tup[1], "\t", nm.getOrDefault(tup[0], "MISSING")
17 | 
18 | proc fsids*(roots: seq[string], kind=all, order=id,
19 |             recurse=0, follow=false, xdev=false, eof0=false) =
20 |   ## Print a histogram of uids and/or gids used by a file tree
21 |   var uids: Table[Uid, int]
22 |   var gids: Table[Gid, int]
23 |   let doU = kind in { user , all }
24 |   let doG = kind in { group, all }
25 |   for root in (if roots.len > 0: roots else: @[ "." ]):
26 |     forPath(root, recurse, true, follow, xdev, eof0, stderr,
27 |             depth, path, nmAt, ino, dt, lst, dfd, dst, did):
28 |       if doU: uids.mgetOrPut(lst.st_uid, 0).inc
29 |       if doG: gids.mgetOrPut(lst.st_gid, 0).inc
30 |     do: discard                         # No pre-recurse
31 |     do: discard                         # No post-recurse
32 |     do: recFailDefault("fsids", path)   # cannot recurse
33 |   if doU: print "#Uid", uids, users() , order
34 |   if doG: print "#Gid", gids, groups(), order
35 | 
36 | when isMainModule:import cligen;include cligen/mergeCfgEnv;dispatch fsids,help={
37 |   "kind"   : "kind of ids to report user, group, both",
38 |   "order"  : "sort order: up by id or down by count",
39 |   "recurse": "recursion limit for dirs in `roots`; 0=unbounded",
40 |   "follow" : "follow symbolic links to dirs in recursion",
41 |   "xdev"   : "block recursion from crossing devices" }
42 | 


--------------------------------------------------------------------------------
/holes.nim:
--------------------------------------------------------------------------------
 1 | import std/[posix, strutils]
 2 | when not declared(File): import std/syncio
 3 | 
 4 | iterator holes*(fd: cint): (bool, int) =
 5 |   const SEEK_DATA = cint(3)
 6 |   const SEEK_HOLE = cint(4)
 7 |   const what = [SEEK_HOLE, SEEK_DATA]
 8 |   let eof  = lseek(fd, 0, SEEK_END)
 9 |   var pos  = lseek(fd, 0, SEEK_HOLE)
10 |   var hole = pos == 0
11 |   if pos > 0:
12 |     yield (hole, pos)
13 |   errno = 0.cint        # Clear any earlier ENXIO's
14 |   while pos < eof and errno != ENXIO:
15 |     if (let new = lseek(fd, pos, what[hole.int]); new != -1):
16 |       if new - pos > 0:
17 |         yield (hole, new - pos)
18 |       pos  = new
19 |       hole = not hole
20 |   if eof - pos > 0:
21 |     yield (hole, eof - pos)
22 | 
23 | proc sholes(format="", files: seq[string]) =
24 |   ## Show hole & data segments for `files`
25 |   const name = ["data", "hole"]
26 |   let format = if format.len != 0: format else: "$count $path\n$map"
27 |   let needMap  = "$map"  in format or "${map}"  in format
28 |   let userTerm = "$zero" in format or "${zero}" in format
29 |   var m: string
30 |   for file in files:
31 |     if (let fd = open(file.cstring, O_RDONLY); fd >= 0):
32 |       m.setLen 0
33 |       var n = 0
34 |       for (hole, size) in fd.holes:
35 |         inc n
36 |         if needMap:
37 |           m.add '\t'; m.add name[hole.int]
38 |           m.add '\t'; m.add $size
39 |           m.add '\n'
40 |       discard fd.close  # Can fail on netFSes; No recovery really possible
41 |       stdout.write format % ["count",$n, "path",file, "map",m, "nul","\0"]
42 |       if not userTerm and not needMap:
43 |         stdout.write '\n'
44 | 
45 | when isMainModule:
46 |  import cligen;include cligen/mergeCfgEnv;dispatch sholes,cmdName="holes",help={
47 |   "format": """emit format interpolating (braces ok for flush-text):
48 |   $count : number of data|hole segments
49 |   $path  : path name of REGULAR FILE from $\*
50 |   $map   : map of all data&hole segments
51 |   $zero  : a NUL byte
52 | \"\" => \"$count\\t$path\\n$holes\\n\""""}
53 | 


--------------------------------------------------------------------------------
/bu/emin.nim:
--------------------------------------------------------------------------------
 1 | import std/[stats, algorithm], bu/eve
 2 | 
 3 | type MinEst* = tuple[est, err: float] ## An uncertain estimate of a minimum
 4 | 
 5 | template eMin*(k=2, n=7, m=3, get1): untyped =
 6 |   ## This template takes as its final parameter any Nim code block giving one
 7 |   ## `float` (probably a delta time) and gives a `MinEst` by a best k/n m-times
 8 |   ## approach. `doc/tim.md` has details; `bu/tim.nim` is a CLI utility example.
 9 |   #IDEA: Check m-sampling same via Anderson-Darling(minTail-weighted/clipped).
10 |   var xall: seq[float]
11 |   var sest: RunningStat
12 |   let a = k.a_ik
13 |   for outer in 1..m:
14 |     var samp: seq[float]
15 |     for inner in 1..n: samp.add (block: get1)
16 |     samp.sort
17 |     sest.push samp.eLE(a)
18 |     xall.add samp
19 |   (est: xall.eLE(a_ik(2*k)), err: sest.standardDeviation) #/sqrt(m.float)4big m?
20 | 
21 | when isMainModule:
22 |   import cligen
23 |   when defined test:
24 |     when not declared(addFloat): import std/formatFloat
25 |     proc minE(k: int, x: seq[float]) =
26 |       var x = x; x.sort
27 |       echo eLE(x, k.a_ik)
28 |       x.reverse; echo "flipped method, just basic estimate"
29 |       let off = 2*x[0] + - x[^1]
30 |       echo "off: ", off
31 |       for e in mitems x: e = off - e
32 |       echo off - x.ere(k.a_ik)          # , off  # for debugging
33 |     dispatch minE, help={"k":"2k=num of order stats", "x":"1-D / univar data.."}
34 |   else:
35 |     import cligen/strUt; include cligen/mergeCfgEnv
36 |     proc minE(warmup=1, k=2, n=7, m=3, ohead=0, x: seq[float]) =
37 |       ## Emit a minimum estimator of `x` with its uncertainty
38 |       if x.len != warmup + n*m:
39 |         quit "warmup, n, m mismatch given x[]; Run with --help for more.", 1
40 |       var i = warmup - 1
41 |       let (est, err) = eMin(k, n, m, (inc i; x[i]))
42 |       echo fmtUncertain(est, err, e0= -2..5)
43 |     dispatch minE,cmdName="emin", help={"x":"x1 x2..", "warmup":"initial skip",
44 |       "k":"k for eLE", "n":"n for eLE", "m":"outer reps", "ohead":"ignored"}
45 | 


--------------------------------------------------------------------------------
/doc/notIn.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | https://github.com/c-blake/ndup has a POSIX shell script, sh/ndup which keeps
 5 | a mirrored set of files related to some source files.  A natural course of file
 6 | management (especially with duplicate/near duplicate removal in play) is the
 7 | user creating new files or directories, renaming old ones, etc.  With any sort
 8 | of mirrored hierarchy of derived files, this induces a need to clean up stale
 9 | derivations.  This is what `notIn` helps do.
10 | 
11 | The salient line in the above mentioned script is:
12 | ```sh
13 | notIn -f$w/f0 $w/digs $w/sets | xargs -0 rm -fv
14 | ```
15 | This will remove any files under digs/ or sets/ that are *not* in the path list
16 | file `f0` (hence the program name `notIn`).
17 | 
18 | I have not used it for this personally, but another example use case might be a
19 | parallel hierarchy used for `lc -X,--extra` extra parameter values for
20 | [lc](https://github.com/c-blake/lc) only just where a user has permission to
21 | write.  In this case, due to the nature of `lc`, you probably only care about
22 | stale directories.
23 | 
24 | In general, parallel file trees can be an interesting tool both conceptually
25 | and practically and `notIn` can help to maintain them/query disparities/etc.
26 | 
27 | Usage
28 | -----
29 | ```
30 |   notIn [optional-params] [roots: string...]
31 | 
32 | Find files under roots NOT matching pattern applied to any file entry.  E.g.:
33 |   (cd D1; find . -print0) | notIn D2 D3 | xargs -0 echo
34 | echoes every entry under D2 or D3 not also under D1.
35 | 
36 | Input paths are normalized to nix empty parts (e.g. 1st&3rd in "./foo/./bar").
37 | 
38 | find -path A -o -path B .. can do this, but is hard for many paths.
39 | 
40 |   -f=, --file=    string ""     delimited input ( "" => stdin )
41 |   -d=, --delim=   char   '\x00' input path delimiter
42 |   -t=, --term=    char   '\x00' output path terminator
43 |   -p=, --pattern= string "$1"   a $1-containing under roots pattern
44 |   -v, --invert    bool   false  find files that do match a file entry
45 | ```
46 | 


--------------------------------------------------------------------------------
/fpr.nim:
--------------------------------------------------------------------------------
 1 | when not declared(stderr): import std/syncio
 2 | import cligen, cligen/[mfile, osUt], std/strutils
 3 | 
 4 | iterator fprs(paths: (iterator(): string)):
 5 |     tuple[pages: tuple[resident, total: int], path: string, err: int] =
 6 |   var empty: tuple[resident, total: int]
 7 |   for path in paths():
 8 |     when defined(windows):
 9 |       yield (pages: empty, path: path, err: 1)
10 |     else:
11 |       if (let mf = mopen(path); mf != nil):
12 |         yield (pages: mf.inCore, path: path, err: 0)
13 |         mf.close
14 |       else:
15 |         yield (pages: empty, path: path, err: 1)
16 | 
17 | type Emit = enum summary, detail, errors
18 | 
19 | proc fpr(file="", delim='\n', emit={summary}, paths: seq[string]): int =
20 |   ## File Pages Resident. Examine UNION of `paths` & optional `delim`-delimited
21 |   ## input `file` (stdin if "-"|"" & stdin not a tty). Eg., `find -print0 | fpr
22 |   ## -d\\0`.  Like util-linux `fincore`, but more Unix-portable & summarizing.
23 |   var nErr, r, t, nFile: int            # Track numErr, resid, total pages
24 |   for y in fprs(both(paths, fileStrings(file, delim))):
25 |     nFile.inc                           # Update number of files & stats for
26 |     r.inc    y.pages.resident           # y)ielded tuples
27 |     t.inc    y.pages.total
28 |     nErr.inc y.err
29 |     if errors in emit and y.err != 0:   # Ignore errors from zero length files?
30 |       stderr.write "fpr: error: \"", y, "\" (zero length/special file?)\n"
31 |     if detail in emit:
32 |       echo y.pages.resident," of ",y.pages.total," pages resident in ",y.path
33 |   if summary in emit and nFile > 0:
34 |     echo r," of ",t," pages ", formatFloat(r.float/t.float*100.0, ffDecimal, 2),
35 |          "% resident in ",nFile," files ",nErr," errors"
36 |   min(nErr, 127)                        # Exit with appropriate status
37 | 
38 | include cligen/mergeCfgEnv
39 | dispatch fpr, help={"file" : "optional input (\"-\"|!tty=stdin)",
40 |                     "delim": "input file delimiter (\\0->NUL)",
41 |                     "emit" : "Stuff to emit: *summary* *detail*"}
42 | 


--------------------------------------------------------------------------------
/bu/testf.nim:
--------------------------------------------------------------------------------
 1 | ##[ `vip` caches these answers for UI logic simplicity & efficiency.  Stale info
 2 | can be displayed if FS changes fast relative to interactive pick sessions. ]##
 3 | 
 4 | import std/posix        # .so source for use in `dirs|vip -k libtestf.so:cdable`
 5 | 
 6 | var cpath = ""                            # Reused path buffer to be NUL/\0-term
 7 | proc cdable(path: pointer, nPath: clong): cint {.noconv, exportc, dynlib.} =
 8 |   if nPath == 0 or path.isNil:
 9 |     return 0
10 |   cpath.setLen nPath    # `vip` does not open any files post `parseIn()`
11 |   copyMem cpath[0].addr, path, nPath
12 |   cint(chdir(cast[cstring](cpath[0].addr)) == 0)
13 | #NOTE: Above assumes strings come as rooted paths ("/a/b/leafDir").  Lacking a
14 | #      leading "/" makes it a relative path which can succeed relative to the
15 | #      (newly, per all our chdirs) current working directory, but which would
16 | #      fail relative to the original parent process.
17 | 
18 | #[ To cursor down, `vip` must test one at a time until a success.  To get more
19 | async/scalable needs a batch interface with forked kids which an ok idea since a
20 | hanging NFS mount can hang a kid process & we might want to kill it.  Laziness
21 | of outer validation may mean only 0..3 timeouts in any given UI interaction.
22 | So, they could be made 50..100ms.  There may be a way to build a critbit tree,
23 | monitor /proc/mounts, and only time out once per mount prefix or etc.  Of
24 | course, a file system could also come back online at any moment as well.
25 | 
26 | On systems with nice enough terminal interaction for `vip` to make sense but
27 | poor dynamically loadable lib support (are there any??), `vip` *could* add a
28 | `--validation-coprocess=foo` to make a kid process to delegate requests to,
29 | mostly blocked on IPC read, but ready to read a path, do whatever user-program
30 | tests & write 1-byte.  While 2 syscalls per request (in both parent & kid +
31 | whatever validation work), they are at least fast-ish pipe-IO calls.
32 | 
33 | This module could grow a large family of `bu/ft`-like tests.  PRs welcome if
34 | this would help your specific application setting. ]#
35 | 


--------------------------------------------------------------------------------
/jointr.nim:
--------------------------------------------------------------------------------
 1 | import std/[tables, strutils], cligen/[sysUt, osUt, mfile, mslice], cligen
 2 | 
 3 | proc jointr*(cont=" <unfinished ...>", boc="<... ", eoc=" resumed>", all=false,
 4 |              path: seq[string]) =
 5 |   ## Multi-process programs are often usefully debugged via something like
 6 |   ##   `strace --decode-fds -fvs8192 -oFoo multi-process-program`
 7 |   ## but this breaks up a system call execution suspension into top & bottom
 8 |   ## halves with "..." indicators like:
 9 |   ##   PID (.\*)" <unfinished ...>\\\n .. samePID <... CALL resumed>
10 |   ## where top-half parameters are elided in bottom half resumption.  This
11 |   ## program joins these lines for easier reading, optionally retaining the
12 |   ## "unfinished" to aid temporal reasoning.  In said retention mode,
13 |   ## never-resumed calls print in hash order at the end.
14 |   var top: Table[MSlice, MSlice]
15 |   let sep = initSep "white"
16 |   var cols: seq[MSlice]
17 |   for line in mSlices(if path.len<1: "/dev/stdin" else: path[0], keep=true):
18 |     sep.split line, cols, 2
19 |     if cols.len != 2: continue
20 |     let (pid, rest) = (cols[0], cols[1])
21 |     if rest.startsWith boc:             # Skip to 1st eoc & output top,bottom
22 |       let ix = line.find eoc
23 |       if ix == -1: IO !! "missing \"" & eoc & "\""
24 |       outu alignLeft($pid, 5), " ", top[pid], line[ix+eoc.len..^1], "\n"
25 |       top.del pid
26 |     elif rest.endsWith cont:            # Save for bottom half
27 |       top[pid] = rest[0 ..^ (cont.len + 1)]
28 |       if all: outu line, '\n'
29 |     else: outu line, '\n'
30 |   if not all:
31 |     for pid, rest in top:               # Would be nicer to emit in orig order,
32 |       outu alignLeft($pid, 5), rest     #..but that needs more subtle buffering
33 |       outu cont, '\n'                   #..& never-resumed calls must be rare.
34 | 
35 | include cligen/mergeCfgEnv; dispatch jointr, help={
36 |   "path": "strace log path (or none for stdin)",
37 |   "cont": "line suffix saying it continues",
38 |   "boc" : "beg of contin. indication to eat",
39 |   "eoc" : "end of contin. indication to eat",
40 |   "all" : "retain \"unfinished ...\" in-place"}
41 | 


--------------------------------------------------------------------------------
/doc/sr.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | Linux can misbehave, especially on more exotic hardware or with untested
 5 | software.  So, the kernel provides a way to interpret special keys on the
 6 | keyboard and a more remote-friendly "/proc/sysrq-trigger" interpretation.
 7 | This program wraps all of that to make it easy to just run a command to
 8 | get the intended results.
 9 | 
10 | For example, if programs and libc are all breaking but you have a statically
11 | linked `sr` and can still get a few pages off the disk you may be able to run
12 | `sr u; sr s; sr b` with some success.
13 | 
14 | Usage (***NOT*** a cligen utility)
15 | -----
16 | Usage (as root!):
17 |   sr <CODE>
18 | where CODE is:
19 |   b  immediately reboot without syncing or unmounting
20 |   c  crash system by NULL pointer deref, leave crashdump if configured
21 |   d  shows locks that are held
22 |   e  send SIGTERM to all processes, except for init
23 |   f  call OOM killer to kill memory hogs; No panic if nothing can be killed
24 |   g  used by kgdb (kernel debugger)
25 |   i  send SIGKILL to all processes, except for init
26 |   j  forcibly "Just thaw it" - filesystems frozen by FIFREEZE ioctl
27 |   k  secure Access Key (SAK); Kill programs on current virtual console
28 |   l  shows stack backtrace for active CPUs
29 |   m  dump current memory info to your console
30 |   n  used to make RT tasks nice-able
31 |   o  shut your system off (if configured & supported)
32 |   p  dump current registers & flags to your console
33 |   q  dump armed hrtimers (NOT regular timer_list timers) & clockevent dev info
34 |   r  turns off keyboard raw mode & sets it to XLATE
35 |   s  attempt to sync mounted filesystems
36 |   t  dump current tasks & their information to your console
37 |   u  attempt to remount mounted filesystems read-only
38 |   v  forcefully restores framebuffer console; causes ETM buffer dump on ARM
39 |   w  dumps tasks that are in uninterruptable (blocked) state
40 |   x  used by xmon on PPC; Show global PMU Regs on sparc64; Dump TLBs on MIPS
41 |   y  show global CPU Registers [SPARC-64 specific]
42 |   z  dump FTRACE buffer
43 |  0-9 set console log level; 0=emergency messages (PANICs|OOPSes) only
44 | 


--------------------------------------------------------------------------------
/sr.nim:
--------------------------------------------------------------------------------
 1 | import std/[os, posix, strutils]
 2 | const SRQ = "/proc/sysrq-trigger"
 3 | const use = """Usage (as root!): sr <CODE>[<CODE>..] [DELAY(ms)] where CODE is:
 4 |   b  immediately reboot without syncing or unmounting
 5 |   c  crash system by NULL pointer deref, leave crashdump if configured
 6 |   e  send SIGTERM to all processes, except for init
 7 |   f  call OOM killer to kill memory hogs; No panic if nothing can be killed
 8 |   i  send SIGKILL to all processes, except for init
 9 |   j  forcibly \"Just thaw it\" - filesystems frozen by FIFREEZE ioctl
10 |   k  secure Access Key (SAK); Kill programs on current virtual console
11 |   l  shows stack backtrace for active CPUs
12 |   m  dump current memory info to your console
13 |   n  used to make RT tasks nice-able
14 |   o  shut your system off (if configured & supported)
15 |   p  dump current registers & flags to your console
16 |   q  dump armed hrtimers (NOT regular timer_list timers)&clockevent dev info
17 |   r  turns off keyboard raw mode & sets it to XLATE
18 |   s  attempt to sync mounted filesystems
19 |   t  dump current tasks & their information to your console
20 |   u  attempt to remount mounted filesystems read-only
21 |   w  dumps tasks that are in uninterruptable (blocked) state
22 |   x  used by xmon on PPC; Show global PMU Regs on sparc64; Dump TLBs on MIPS
23 |   y  show global CPU Registers [SPARC-64 specific]
24 |   z  dump FTRACE buffer
25 |  0-9 set console log level; 0=emergency messages (PANICs|OOPSes) only"""
26 | 
27 | if paramCount() < 1 or paramStr(1).len < 1 or paramStr(1) == "h":
28 |   quit use, 0
29 | if geteuid() != 0:
30 |   quit "only root can use "&SRQ&"\n", 2
31 | let delay = if paramCount() > 1: parseInt(paramStr(2)) else: 250
32 | for i, c in paramStr(1):
33 |   if c in {'b','c', 'e','f', 'i'..'u', 'w'..'z', '0'..'9'}:
34 |     if (let fd = open(SRQ, O_WRONLY); fd >= 0):
35 |       var buf = [c, '\n']
36 |       if write(fd, buf[0].addr, 2) != 2:
37 |         quit "write()!=2: " & $errno.strerror, 4
38 |       discard close(fd) # Nim bug: No discard => err@wrong lineNo
39 |     else:
40 |       quit "open "&SRQ&": " & $errno.strerror, 3
41 |   else:
42 |     quit use, 1
43 |   if i + 1 < paramStr(1).len:
44 |     sleep delay
45 | 


--------------------------------------------------------------------------------
/fkindc.nim:
--------------------------------------------------------------------------------
 1 | import std/[posix, strutils, tables], cligen/[osUt, mslice, magic, procpool]
 2 | when not declared(stderr): import std/syncio
 3 | 
 4 | when haveMagic:
 5 |  type Excl = enum compress,tar,soft,apptype,elf,text,cdf,tokens,encoding,ascii
 6 |  const e2Flag = {  # CSV & json missing; Maybe cligen/magic needs updating?
 7 |   apptype : MAGIC_NO_CHECK_APPTYPE , ascii   : MAGIC_NO_CHECK_ASCII   ,
 8 |   encoding: MAGIC_NO_CHECK_ENCODING, tokens  : MAGIC_NO_CHECK_TOKENS  ,
 9 |   cdf     : MAGIC_NO_CHECK_CDF     , compress: MAGIC_NO_CHECK_COMPRESS,
10 |   elf     : MAGIC_NO_CHECK_ELF     , soft    : MAGIC_NO_CHECK_SOFT    ,
11 |   tar     : MAGIC_NO_CHECK_TAR     , text    : MAGIC_NO_CHECK_TEXT    }.toTable
12 | 
13 |  var gFlags = 0.cint
14 |  proc count(histo: var CountTable[string], s: MSlice) = histo.inc $s
15 | 
16 |  proc classify(r, w: cint) = # Reply with same path as input if it passes filter
17 |   var m = magic_open(gFlags)
18 |   if m == nil or magic_load(m, nil) != 0:
19 |     stderr.write "cannot load magic DB: %s\n\t", m.magic_error, "\n"
20 |     quit 1
21 |   for path in r.open.getDelim('\0'):
22 |     let fileType = $m.magic_file(path.cstring)
23 |     discard wrLenBuf(w, fileType)
24 | 
25 |  proc fkindc*(gen="find $1 -print0", dlr1=".", excl: set[Excl]={}, jobs=0) =
26 |   ## Use ``gen`` and ``dlr1`` to generate paths and histogram by `file(1)` type.
27 |   var histo: CountTable[string]
28 |   for e in excl: gFlags = gFlags or cint(e2Flag[e]) # Set up gFlags for libmagic
29 |   let inp = popen(cstring(gen % dlr1), "r".cstring) # Fire input path generator
30 |   var pp = initProcPool(classify, framesLenPfx, jobs) # Start & drive kids
31 |   pp.eval0term(inp.getDelim('\0'), histo.count)     # Replies=0-term file types
32 |   discard inp.pclose
33 |   histo.sort
34 |   for k, ct in histo: echo ct, '\t', k
35 | 
36 |  when isMainModule:
37 |   import cligen; include cligen/mergeCfgEnv
38 |   dispatch fkindc, short={"excl": 'x'}, help={
39 |     "gen" : "generator cmd with dlr1 -> $1",
40 |     "dlr1": "$1 for gen fmt; Eg. *\". -type f\"*",
41 |     "excl": "tests to exclude like `file(1)`",
42 |     "jobs": "use this many kids (0=auto)" }
43 | else: quit "libmagic from file was not found when this program was built.", 1
44 | 


--------------------------------------------------------------------------------
/cols.nim:
--------------------------------------------------------------------------------
 1 | when not declared(fmWrite): import std/syncio
 2 | import std/sets, cligen, cligen/[mfile, mslice, osUt] # mSlices MSlice Sep
 3 | 
 4 | type Ranges = seq[Slice[int]] # cg wants `seq[T]` syntax *OR* its semantics
 5 | proc cols(input="/dev/stdin", rowDlm='\n', delim="white", output="/dev/stdout",
 6 |           sepOut=" ", blanksOk=false, cut=false, origin=1, O0=false, term='\n',
 7 |           colRanges: Ranges) =
 8 |   ## Write just some columns of input to output; Memory map input if possible.
 9 |   let origin = if O0: 0 else: origin
10 |   var outFile = open(output, fmWrite)
11 |   var colSet = initHashSet[int](colRanges.len)
12 |   if cut:
13 |     for r in colRanges:
14 |       for c in r: colSet.incl c
15 |   let sep = initSep delim
16 |   var cols: seq[MSlice] = @[ ]
17 |   for line in mSlices(input, sep=rowDlm, eat='\0'): # RO mmap | 1-use slices
18 |     var wrote = false                   # wrote something &so need sepOut|\n
19 |     sep.split line, cols
20 |     if cut:
21 |       for j, f in cols:
22 |         if (origin + j) in colSet or (origin + j - cols.len) in colSet:
23 |           continue
24 |         if wrote: outFile.urite sepOut
25 |         outFile.urite f
26 |         wrote = true
27 |     else:
28 |       for r in colRanges:
29 |         for i in r:
30 |           let j = if i < 0: i + cols.len else: i - origin
31 |           if j < 0 or j >= cols.len:
32 |               continue
33 |           if wrote: outFile.urite sepOut
34 |           outFile.urite cols[j]
35 |           wrote = true
36 |     if wrote or blanksOk: outFile.urite term
37 | 
38 | when isMainModule: include cligen/mergeCfgEnv; dispatch cols, help={
39 |   "colRanges": "colNums or A..B | X:Y (in|ex)clusive ranges thereof",
40 |   "input"    : "path to mmap|read as input",
41 |   "rowDlm"   : "inp *row* delimiter character",
42 |   "delim"    : "inp *field* dlm chars; len>0 => fold",
43 |   "output"   : "path to write output file",
44 |   "sepOut"   : "output field separator",
45 |   "blanksOk" : "allow blank output rows",
46 |   "cut"      : "cut/censor specified columns, not keep",
47 |   "origin"   : "origin for colNums; 0=>signed indexing",
48 |   "O0"       : "shorthand for `--origin=0`",
49 |   "term"     : "set output row terminator (e.g. \\\\0)"}, short={"O0": '0'}
50 | 


--------------------------------------------------------------------------------
/doc/cstats.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | Programs will often dump out numbers with varying amounts of context in rows.
 4 | This might be resource usage information, like [ru](ru.md) or various results
 5 | or really a great many things.  It is very simple/natural to go from one report
 6 | to many for various parameters/input files and so on in a shell loop, but with
 7 | such data either collected or at the start of a pipeline, at other stages of
 8 | inquiry it can be nice to have summary statistics.
 9 | 
10 | Usage
11 | -----
12 | ```
13 |   cstats [optional-params] [stats: string...]
14 | 
15 | This consumes any stdin looking like regular intercalary text with embedded
16 | floats & prints a summary with the LAST such text & requested stats for any
17 | varying float column. If table!="", context is joined via hsep into headers
18 | for associated reduced numbers, with columns separated by table (eg. ',').
19 | Available stats (ms if none given)..
20 | 
21 |   mn: mean      sd: sdev      se: stderr(mean) (i.e. sdev/n.sqrt)
22 |   sk: skewness  kt: kurtosis  ms: mn +- se via pm exp nd unity sci params
23 |   iq: interQuartileRange      sq: semi-interQuartileRange   n: len(nums)
24 |   qP: General Parzen interpolated quantile P (0<=P<=1 float; 0=min; 1=max)
25 | 
26 | ..print as separate rows in the table mode or else joined by join.
27 | 
28 |   -d=, --delim= string "white"                      inp delims; Repeats=>fold
29 |   -t=, --table= string ""                           labels -> header of a
30 |                                                     table-separated table
31 |   --hsep=       string "strip"                      header sep|strip if=strip
32 |   -p=, --pm=    string " +- "                       plus|minus string
33 |   -e=, --exp=   Slice  -2..4                        pow10 range for 'unity'
34 |   -n=, --nd=    int    2                            n)um sig d)igits of sigma
35 |   -u=, --unity= string "$val0${pm}$err0"            near unity format
36 |   -s=, --sci=   string "($valMan $pm $errV)$valExp" scientific format
37 |   -j=, --join=  string ","                          intern st-delim for 1-row
38 |   -m=, --min=   int    0                            use min-most numbers
39 |   -M=, --max=   int    0                            use max-most numbers
40 | ```
41 | 


--------------------------------------------------------------------------------
/doc/rs.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | Since data sets can be large, random fair subsets/re-samplings can be useful.
 4 | 
 5 | https://en.wikipedia.org/wiki/Reservoir_sampling has more details.  Note that
 6 | the cost comparison there between Algorithm R & L neglects IO costs which are
 7 | around 50+% dominant for this little utility even on a RAM filesystem (just from
 8 | `memchr` for line splitting even on very short lines).  So, rather than the big
 9 | O(n/k)-ish asymptotic speed-up factor, Algorithm L is likely only <~ 2X faster,
10 | at least with the `.add` API of `bu/rs.nim`.
11 | 
12 | The random sampling with replacement algorithm is quite slow and should be
13 | replaced, but to detail its logic here, all slots in the reservoir table evolve
14 | identically & independently, and the evolution of the first slot looks like:
15 | ```
16 | data   1    2    3    4    5 ... N
17 | slot0  1 -> 1 -> 1 -> 1 -> 1 ... N, p=1/2*2/3*3/4*4/5*...=1/N
18 | slot0       2 -> 2 -> 2 -> 2 ... N, p=    1/3*3/4*4/5*...=1/N
19 | slot0            3 -> 3 -> 3 ... N, p=        1/4*4/5*...=1/N
20 | ```
21 | So, each slot has a similar 1/N independent chance of surviving until the end.
22 | 
23 | Some care was put into the command-line API here, in particular the ability to
24 | `--flush` the outputs to give immediate reads to possible FIFO workers.  Also,
25 | you can create as many random subsets/samples of whatever various sizes as you
26 | like in various files rather easily by just listing them.
27 | 
28 | Usage
29 | -----
30 | ```
31 |   rs [optional-params] [pfx.][-]n.. output paths; pfx""=>stdout
32 | 
33 | Write ranSubsets|Samples of rows of input -> prefix.ns.  If n>0 do random
34 | subsets else sample with replacement.  O(Σns) space.  Examples:
35 | 
36 |   seq 1 100 | rs 10 .-5 or (after maybe mkfifo f1 f2)
37 |   workOn f1 & workOn f2 & seq 1 1000 | rs -f f1.10 f2.-20
38 | 
39 | Options:
40 |   -i=, --input=   string "" "" => stdin
41 |   -f, --flush     bool      write to outs immediately
42 |   -r, --randomize bool      randomize() for non-deterministic filtering
43 | ```
44 | 
45 | Examples
46 | --------
47 | Input:
48 | ```sh
49 | seq 1 1000 | rs foo1.9 foo2.9 foo3.9 foo4.9
50 | for f in foo*; do cstats q.5 < $f; done
51 | ```
52 | Output:
53 | ```
54 | 281.0
55 | 442.0
56 | 370.0
57 | 591.0
58 | ```
59 | 


--------------------------------------------------------------------------------
/doc/newest.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | This is (mostly) a convenience program for something I often want to know.
 4 | 
 5 | Usage
 6 | -----
 7 | ```
 8 |   newest [optional-params] [paths: string...]
 9 | 
10 | Echo ended by outEnd the <= n newest files in file time order {-}[bamcv] for
11 | Birth, Access, Mod, Ctime, Version=max(MC); { - | CAPITAL means oldest }.
12 | 
13 | Examined files = UNION of paths + optional delim-delimited input file (stdin
14 | if "-"|if "" & stdin is not a terminal), maybe recursed as roots.
15 | 
16 | E.g. to echo the 3 oldest regular files by m-time under the CWD:
17 |   newest -t-m -n3 -r0 .
18 | 
19 | Options:
20 |   -n=, --n=       int           1     number of 'newest' files
21 |   -t=, --time=    string        "m"   timestamp to compare ({-}[bamcv]*)
22 |   -r=, --recurse= int           1     recurse n-levels on dirs; 0:unlimited
23 |   -c, --chase     bool          false chase symlinks to dirs in recursion
24 |   -D, --Deref     bool          false dereference symlinks for file times
25 |   -k=, --kinds=   set(FileKind) file  i-node type like find(1): [fdlbcps]
26 |   -q, --quiet     bool          false suppress file access errors
27 |   -x, --xdev      bool          false block recursion across device boundaries
28 |   -o=, --outEnd=  string        "\n"  output record terminator
29 |   -f=, --file=    string        ""    optional input ("-"|!tty=stdin)
30 |   -d=, --delim=   char          '\n'  input file record delimiter
31 |   -e, --eof0      bool          false set eof0
32 | ```
33 | 
34 | Related Work
35 | ------------
36 | `find -printf` does not support the new-ish Linux b-time.  Even if it did one
37 | would need to pipe its output to something like `topn 3` (see [topn](topn.md))
38 | that maintained a heap sorted by the desired time (in the desired order) to be
39 | as memory efficient.  `sort` is highly wasteful for this use case, as is `topn`
40 | really considering all the binary -\> ASCII -\> binary that must happen.
41 | 
42 | This also uses my `cligen/dents` tree walker to be faster than `find` (much
43 | faster with `-d:batch` & a custom kernel module) on Linux.  GNU `find` worries
44 | about arbitrary FS depth while I've never seen a (non-artificial) depth > 30.
45 | Common cases should not suffer from pathologies.  Default open fd limits that
46 | hail from 1970s memory costs are are already pretty dumb.
47 | 


--------------------------------------------------------------------------------
/doc/METAPKG.md:
--------------------------------------------------------------------------------
 1 | Missing
 2 | -------
 3 | 
 4 | To people who say "Tool XYZ is missing", I say "PRs welcome|publish yourself".
 5 | 
 6 | Duplicative
 7 | -----------
 8 | 
 9 | To people who say "Tool XYZ is duplicative", I say "Not for me when I wrote it,
10 | but yes, truly exhaustive related research is often harder than writing code.
11 | Anyway, enjoy|not and I am happy to add refs to analogues to per-tool docs."
12 | 
13 | Layout
14 | ------
15 | 
16 | To people not liking the file tree layout, I agree, but this seemed the easiest
17 | when some code like "bu/eve" is both Nim import-able lib and nimble-installable
18 | binary.  I think nimble micro-manages these things to its detriment.
19 | 
20 | Over-bundling/Too Many/Mah Head Asplode
21 | ---------------------------------------
22 | 
23 | To people who say "this package is over-bundled", my rejoinder is:
24 | 
25 |  - Packages package; Pros & cons
26 | 
27 |  - I did not want to overly bias the nimbleverse toward even more packages
28 |    needing [cligen](https://github.com/c-blake/cligen) (maybe only because I
29 |    wrote both).
30 | 
31 |  - It's not such a nose-bleed percentile in the context of healthy Unix package
32 |    ecosystems.  Using `qtl` from [fitl](https://github.com/c-blake/fitl), I get:
33 | ```sh
34 | (for p in `q list -Iv`;do echo `q files $p|grep /bin/|wc -l` $p;done)|
35 | awk '{if($1>0)print $1}'|qtl .08 .5 .92
36 | ```
37 | gives on one of my Gentoo's:
38 | ```
39 | 1.0 2.008333333333333 14.72363636363637
40 | ```
41 | (at the start, anyway, `bu` had 15 bins..so, about 92nd percentile).  Anyway,
42 | util-linux has 73, coreutils has 127 and we will (probably) never get near 200.
43 | 
44 | [`cligen/examples`](https://github.com/c-blake/cligen/tree/master/examples) will
45 | mostly move here in the near-term because >1 person has complained about that
46 | having too much.  This will almost double the size of the collection.  I should
47 | probably port a dozen or two more from C, but I still consider this all quite
48 | restrained.  I have 1100 scripts & programs in `~/bin` | `/usr/local/bin`.  Most
49 | are not in Nim.  I expect <200 are of "broad" interest or ongoing relevance..a
50 | structural hazard of writing programs over several decades with an eye toward
51 | generality.  Many of them replace|generalize earlier variants.  It is a bit of
52 | work to even curate & better document this (small) collection for what might
53 | interest others.
54 | 


--------------------------------------------------------------------------------
/niom.nim:
--------------------------------------------------------------------------------
 1 | # This is like `nio moments`, but has `adix` as a hard dep for histo & qs.
 2 | when not declared(addFloat): import std/formatFloat
 3 | import std/math, nio, adix/[mvstat, lghisto], cligen/osUt
 4 | 
 5 | type MomKind = enum mkN="n", mkMin="min", mkMax="max", mkSum="sum", mkAvg="avg",
 6 |                     mkSdev="sdev", mkSkew="skew", mkKurt="kurt", mkHisto="histo"
 7 | 
 8 | proc fmtStat(ms: MovingStat, mk: MomKind, fmt: string): string =
 9 |   case mk
10 |   of mkN:     ms.n.float64        .formatFloat(fmt)
11 |   of mkMin:   ms.min              .formatFloat(fmt)
12 |   of mkMax:   ms.max              .formatFloat(fmt)
13 |   of mkSum:   ms.sum              .formatFloat(fmt)
14 |   of mkAvg:   ms.mean             .formatFloat(fmt)
15 |   of mkSdev:  ms.standardDeviation.formatFloat(fmt)
16 |   of mkSkew:  ms.skewness         .formatFloat(fmt)
17 |   of mkKurt:  ms.kurtosis         .formatFloat(fmt)
18 |   else: ""
19 | 
20 | proc niom(fmt=".4g", stats={mkMin, mkMax}, qs: seq[float] = @[],
21 |           a=1e-16, b=1e20, n=8300, paths: Strings): int =
22 |   ## Print selected statistics over all columns of all `paths`.
23 |   let opt = if mkHisto in stats or qs.len > 0: {OrderStats} else: {}
24 |   for path in paths:
25 |     var inp = nOpen(path)
26 |     var sts: seq[MovingStat[float64,uint32]]
27 |     for c in inp.rowFmt.cols:
28 |       sts.add initMovingStat[float64,uint32](a, b, n, opt)
29 |     var num: float
30 |     block fileLoop:
31 |       while true:
32 |         for j in 0 ..< sts.len:
33 |           if not inp.read(num): break fileLoop
34 |           if not num.isNaN: sts[j].push num
35 |     for j in 0 ..< sts.len:
36 |       outu path, ":", j
37 |       for mk in [mkN, mkMin, mkMax, mkSum, mkAvg, mkSdev, mkSkew, mkKurt]:
38 |         if mk in stats: outu " ", $mk, ": ", fmtStat(sts[j], mk, fmt)
39 |       for i, q in qs: outu (if i>0: " " else: ""), sts[j].quantile(q)
40 |       if mkHisto in stats: outu " ", $sts[j].lgHisto
41 |       outu "\n"
42 |     inp.close
43 | 
44 | when isMainModule:
45 |   import cligen; include cligen/mergeCfgEnv; dispatch niom, help={
46 |     "paths": "[paths: 1|more paths to NIO files]",
47 |     "fmt"  : "Nim floating point output format",
48 |     "stats": "*n* *min* *max* *sum* *avg* *sdev* *skew* *kurt* *histo*",
49 |     "a"    : "min absolute value histo-bin edge",
50 |     "b"    : "max absolute value histo-bin edge",
51 |     "n"    : "number of lg-spaced histo bins",
52 |     "qs"   : "desired quantiles"}
53 | 


--------------------------------------------------------------------------------
/doc/wgt.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | Sometimes you want to simulate workloads from logs/records or other bases of
 4 | synthesis.  There may be many ways to "score" items in terms of desired sampling
 5 | frequency.  Here each kind of score is a "source" of weight (e.g. requires a DB
 6 | query or required device IO or whatever).  These are listed in a weight meta
 7 | file specified by `wgt make -w`.  E.g.:
 8 | ```
 9 | # SOURCE  WEIGHT  LABEL
10 | # Some base weight for all known tokens
11 | BASE      100     ""
12 | # tokens that need a real DB query
13 | NeedDB     25     DB
14 | # tokens that needed IO
15 | NeededIO   25     IO
16 | ```
17 | So, the idea is we want to "skew" sampling toward (or away from) items/tokens of
18 | costs that vary.  (There can even be uses besides "cost" such as desirability.)
19 | What `wgt` does is calculate the total weight over all scores and then use this
20 | to create a sampling of tokens weighted however.  `wgt make` additionally allows
21 | an updating state machine to effect a self-avoiding random-walk style sampler
22 | with a colorful sample-to-sample weight delta report.
23 | 
24 | Usage
25 | -----
26 | ```
27 | ⁞ wgt [-d|--dir=(".")] {SUBCMD} [sub-command options & parameters]
28 | 
29 | SUBCMDs:
30 | 
31 |   help    print comprehensive or per-cmd help
32 |   make    Write keyOff,Len,Wgt,Why dictionary implied by source & keys
33 |   print   Emit WEIGHT TOKEN SOURCE(s) for all/some keys to stdout
34 |   assay   Emit aggregate stats assay for given .NC3CS6C files
35 |   sample  Emit n-sample of keys {nl-delim file} weighted by table weights
36 |   diff    Emit color-highlighted diff of old & new weights for keys
37 | 
38 | wgt {-h|--help} or with no args at all prints this message.
39 | wgt --help-syntax gives general cligen syntax help.
40 | Run "wgt {help SUBCMD|SUBCMD --help}" to see help for just SUBCMD.
41 | Run "wgt help" to get *comprehensive* help
42 | ```
43 | 
44 | A few more "whys"
45 | -----------------
46 | `--dir` is included here since the tokens & weights files are closely related
47 | and may often be co-located in the file tree in some directory.
48 | 
49 | You may also want to limit/cap the total number of samples any given token can
50 | realize, a kind of back stop against over-skewed scores.
51 | 
52 | This all rather complex to drive.  So, you also may want to report & explain
53 | total weights on a per token basis (`print`). 
54 | 
55 | Related Work
56 | ------------
57 | [rs](rs.md) is a flat-weighted sampler.
58 | 


--------------------------------------------------------------------------------
/doc/only.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | People often want to know what kind of file something is.  For a long time,
 4 | Apple had some whole resource fork in its FS for this metadata.  Maybe it still
 5 | does.  On Unix the tradition is a magic number and something like `file(1)` or
 6 | `libmagic(3)` that instead opens & partially parses files.  This procedure is,
 7 | however, very slow and often CPU bound (depending upon what the OS has cached).
 8 | 
 9 | Cost is relative, of course.  For one file it does not take long in human terms,
10 | but you can have a *lot* of files.  Modern CPUs have many cores to deploy work
11 | to along these lines, but at least the Linux libmagic is very MT-UNSAFE.  So,
12 | forked kids are the best way to go multi-core and cligen/procpool is an easy
13 | way to do that.  This program was basically the original motivation for procpool
14 | in Nim and its original demo program.
15 | 
16 | One example usage might be `rm $(only ELF)` as a kind of ghetto "make clean",
17 | assuming you have a way to rebuild any ELF object files that is.
18 | 
19 | Usage
20 | -----
21 | ```
22 |   only [optional-params] [patterns: string...]
23 | 
24 | Use gen and dlr1 to generate paths, maybe skip trim and then emit any path
25 | (followed by eor) whose file(1) type matches any listed pattern.
26 | 
27 | all & no can combine to mean not all patterns match.
28 | 
29 |   -g=, --gen=   string    "find $1 -print0" generator cmd with dlr1 -> $1
30 |   -d=, --dlr1=  string    "."               $1 for gen fmt; Eg. ". -type f"
31 |   -t=, --trim=  string    "./"              output pfx to trim (when present)
32 |   -e=, --eor=   char      '\n'              end of record delim; Eg.'\0'
33 |   -a, --all     bool      false             all patterns match (vs. any)
34 |   -n, --no      bool      false             no patterns match (vs. any)
35 |   -i, --insens  bool      false             regexes are case-insensitive
36 |   -x=, --excl=  set(Excl) {}                tests to exclude like file(1)
37 |   -j=, --jobs=  int       0                 use this many kids (0=auto)
38 | ```
39 | 
40 | Related Work
41 | ------------
42 | `find|xargs -PN stdout -oL file -F:Xx:|grep ":Xx: .$@"|sed -e 's/:Xx: .$//'` is
43 | slower & needs some ":Xx:" delimiter guaranteed to be neither in paths nor types
44 | and does not have all the boolean combiner gadgets.  There is probably a way to
45 | make it work with some xargs helper program, though it is debatable if that is
46 | simpler than the Nim code.
47 | 


--------------------------------------------------------------------------------
/fread.nim:
--------------------------------------------------------------------------------
 1 | when not declared(addFloat): import std/formatFloat
 2 | import std/times, cligen/[mfile, mslice]
 3 | when defined(windows):
 4 |   import std/winlean
 5 |   let sin = getStdHandle(STD_INPUT_HANDLE)
 6 |   proc read(fd: Handle, buf: pointer, len: int): int =
 7 |     let len = min(int32.high.int, len).int32
 8 |     var nRd: cint
 9 |     if readFile(fd, buf, len, nRd.addr, nil) == 0: -1 else: int(nRd)
10 | else: import std/posix
11 | 
12 | proc fread*(bsz=65536, lim=0i64, nPass=1, off=64, verb=false,paths:seq[string])=
13 |   ## This is like `cat`, but just discards data.  Empty `paths` => just read
14 |   ## from stdin.  That can be useful to ensure data is in an OS buffer cache
15 |   ## or try to evict other data (more portably than /proc/sys/vm/drop_caches)
16 |   ## for cold-cache runs, measure drive/pipe or device throughput, etc.  Eg. in
17 |   ## Zsh you can say: `fread \*\*` or `fread -l $((1<<30)) < /dev/urandom`.
18 |   ##
19 |   ## Users may pass paths to FIFOs/named pipes/other block-on-open special files
20 |   ## which are skipped.  Anything named is only used if mmap-able & only 1 byte
21 |   ## (really 1 cache line) per 4096 is used by the process.  Can use multiple
22 |   ## passes to measure DIMM bandwidth through a CPU prefetching lens.
23 |   var buf = newString(bsz)
24 |   var n = 0i64
25 |   let mx = if lim != 0: lim else: int64.high
26 |   let t0 = if verb: epochTime() else: 0
27 |   var s = 0
28 |   if paths.len == 0:
29 |     when defined(windows):
30 |       while n < mx and (let k = read(sin, buf[0].addr, bsz); k > 0): inc n, k
31 |     else:
32 |       while n < mx and (let k = read(0, buf[0].addr, bsz); k > 0): inc n, k
33 |   else:
34 |     for path in paths:
35 |       if (let mf = mopen path; mf.mem != nil):
36 |         for pass in 0..<nPass: # Use pass-scaled within page offset (line size)
37 |           for o in countup(pass*off and 4095, mf.len - 1, 4096):
38 |             inc s, mf.mslc[o.int].int
39 |             if o > mx: break
40 |           inc n, mf.mslc.len # Pass1: OS do VM page; Pass2+: CPU do 1 cache line
41 |         mf.close             # BUT above^can vary; Can measure via Lin.Regress.
42 |   if verb:
43 |     let dt = epochTime() - t0
44 |     echo "fread ",n," bytes in ",dt," s: ",n.float/dt/1e9," GB/s par",s and 1
45 | 
46 | when isMainModule:import cligen;include cligen/mergeCfgEnv;dispatch fread,help={
47 |   "bsz": "buffer size for stdin IO", "lim": "max bytes to read; 0=>unlimited",
48 |   "nPass": "passes per file", "off": "total [off*0-origin-pass within pages]",
49 |   "verb": "print bytes read", "paths": "paths: paths to read in"}
50 | 


--------------------------------------------------------------------------------
/doc/noc.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ==========
 3 | ANSI CSI/OSC/SGR color escape sequences are divisive.  Many enjoy the extra,
 4 | categorical emphasis colors can yield.  Others dislike their interference with
 5 | tools oriented around unembellished text.  One compromise is `$NOCOLOR`, as
 6 | advocated by https://nocolor.org/.  Another idea is an easy tool to wedge into a
 7 | pipeline to sanitize input for a next stage &| to test "uncolored readability"
 8 | (e.g. for the color blind).
 9 | 
10 | In the latter case, a simple `sed 's/[[^m]*m//g'` filter "mostly" does the job,
11 | *but* corner cases of [CSI/OSC syntax](en.wikipedia.org/wiki/ANSI_escape_code)
12 | exist not handled by the above.  E.g., a stray newline embedded in an Esc-[..
13 | can cause trouble.  So, a new, more careful filter utility is motivated.
14 | 
15 | Usage
16 | =====
17 | `noc` (short for "nocolor" or "noCSIOSC") is just a standard input-to-standard
18 | output filter with no options or other command syntax.
19 | 
20 | If given a whole, memory mappable file, `noc` does a single pass.  Otherwise a
21 | stdio buffered mode is used.
22 | 
23 | A Subtlety
24 | ==========
25 | Broken &| hostile input can leave a CSI/OSC construct unterminated potentially
26 | to EOF.  This can cause expansion of an IO buffer to all-input and more notably,
27 | unless one propagates parser state across buffers, a parse re-start after each
28 | read, repeating work & making total CPU time quadratic (to emit very little!).
29 | So, while a naive take away reading the code might seem like "Much sound & fury
30 | to optimize work non-repetition", it's actually there to work on bad input.
31 | 
32 | For example, one can create a 100 MB file of input:
33 | ```sh
34 | $ printf '\e]%100000000sm' | tr ' ' '\n' > /dev/shm/hard
35 | ```
36 | This input breaks a 3.5-ish second `sed` (producing 100 MLines of output, not 0
37 | bytes).  It also blows up CPU time on a naive buffered implementation of `noc`
38 | to many more seconds.  But `noc` itself dispatches the work in 38 millisec as a
39 | whole file and about 135ms in a pipeline[^1], producing correct, empty output
40 | both ways at about 750..2600 MB/s.[^2]
41 | 
42 | [^1]: While faster here, the memory mapped way was more done to verify the more
43 | complex, pipeline-friendly buffered implementation.
44 | 
45 | [^2]: This is fast enough for my purposes here - hey 25-100X faster than a `sed`
46 | that I never felt too slow. `cligen/textUt.noCSI_OSC` only works byte-at-a-time.
47 | So, `memchr`-like SIMD optimization (possibly just using `memchr-\e`) can surely
48 | speed it up, at the cost of substantial complexity; CFRO anyone?  ;-)
49 | 


--------------------------------------------------------------------------------
/newest.nim:
--------------------------------------------------------------------------------
 1 | when not declared(stderr): import std/syncio
 2 | include cligen/unsafeAddr
 3 | import std/posix, cligen, cligen/[osUt, posixUt, dents, statx], adix/topk
 4 | 
 5 | type TimePath = tuple[tm: int64, path: string]
 6 | 
 7 | proc newest*(n=1, time="m", recurse=1, chase=false, Deref=false, kinds={fkFile},
 8 |              quiet=false, xdev=false, outEnd="\n", file="", delim='\n',
 9 |              eof0=false, paths: seq[string]) =
10 |   ##[ Echo ended by *outEnd* <= *n* newest files in file *time* order
11 |   `{-}[bamcv]` for Birth, Access, Mod, Ctime, Version=max(MC); { `-` | CAPITAL
12 |   means ***oldest*** }.  Examined files = UNION of *paths* + optional
13 |   *delim*-delimited input *file* ( ``stdin`` if `"-"`|if `""` & ``stdin`` is
14 |   not a terminal ), **maybe recursed** as roots.  E.g. to echo the 3 oldest
15 |   regular files by m-time under the CWD: ``newest -n3 -t-m -r0 .``. ]##
16 |   let err = if quiet: nil else: stderr
17 |   let tO = fileTimeParse(time)                  #- or CAPITAL=oldest
18 |   let it = both(paths, fileStrings(file, delim))
19 |   var t  = initTopK[TimePath](n)                # topk accumulator
20 |   for root in it():
21 |     if root.len == 0: continue                  # skip any improper inputs
22 |     forPath(root, recurse, false, chase, xdev, eof0, err,
23 |             depth, path, nmAt, ino, dt, lst, dfd, dst, did):
24 |       if dt != DT_UNKNOWN:                      # unknown here => disappeared
25 |         if (dt==DT_LNK and Deref and doStat(dfd,path,nmAt,lst,Deref,quiet)) or
26 |            lst.stx_nlink != 0 or doStat(dfd,path,nmAt,lst,Deref,quiet):
27 |           if lst.stx_mode.match(kinds):
28 |             t.push (fileTime(lst, tO.tim, tO.dir), path)
29 |     do: discard
30 |     do: discard
31 |     do: recFailDefault("newest", path)
32 |   for tp in t.ascending: stdout.write tp.path, outEnd # Emit in given tmOrd
33 | 
34 | when isMainModule:  # Exercise this with an actually useful CLI wrapper.
35 |   include cligen/mergeCfgEnv; dispatch newest, help={
36 |     "n"      : "number of 'newest' files",
37 |     "time"   : "timestamp to compare ({-}[bamcv]\\*)",
38 |     "recurse": "recurse n-levels on dirs; 0:unlimited",
39 |     "chase"  : "chase symlinks to dirs in recursion",
40 |     "xdev"   : "block recursion across device boundaries",
41 |     "Deref"  : "dereference symlinks for file times",
42 |     "kinds"  : "i-node type like find(1): [fdlbcps]",
43 |     "quiet"  : "suppress file access errors",
44 |     "outEnd" : "output record terminator",
45 |     "file"   : "optional input (\"-\"|!tty=stdin)",
46 |     "delim"  : "input file record delimiter" }
47 | 


--------------------------------------------------------------------------------
/doc/keydowns.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ==========
 3 | People often discuss human computer interaction (HCI) "ergonomics".  This has
 4 | many dimensions.  One is data entry.  Such conversation is often less objective
 5 | than it might be, and one way this is true is measuring only numbers of bytes &|
 6 | rendered string lengths.  `keydowns` is an attempt to elevate the conversation
 7 | ever so slightly by making it trivial to copy-paste into a terminal to measure
 8 | needed keyboard depressions to enter a string.
 9 | 
10 | Doing this in one's head/manually is not so hard, but it is also monotonous, and
11 | error-prone.  So, a little program can help.
12 | 
13 | Usage
14 | =====
15 | ```
16 |   keydowns [optional-params]
17 | 
18 | Return min key downs needed to enter all lines on stdin, optimizing SHIFTs.
19 | 
20 |   -s=, --shift= string "~!@#$%^&*()_+|}{:\"?><" in addition to 'A'..'Z'
21 |   -v, --v       bool   false                    err log counts & strings
22 | ```
23 | 
24 | Example
25 | =======
26 | ```sh
27 | $ keydowns -v
28 | awk -F, 'BEGIN{a=1;b=2;c=3}{print $a,$b+$c}'
29 | rp -d, -fa,b,c 'echo s[a],b.f+c.i.float'
30 | ^D
31 | 51 awk -F, 'BEGIN{a=1;b=2;c=3}{print $a,$b+$c}'
32 | 41 rp -d, -fa,b,c 'echo s[a],b.f+c.i.float'1234
33 | 92
34 | ```
35 | The byte-length measurement makes it seems like the Nim is "4 bytes easier"
36 | while it is 10 fewer keydowns.  I mostly wrote this program because I had a
37 | vague sense that Nim generally scores well on this metric compared to other PLs.
38 | 
39 | More examples are in the [rp docs](rp.md#comparing-examples-to-awk) and limited
40 | only by your imagination.
41 | 
42 | Caveats
43 | =======
44 | Some people don't enter via keyboards at all, but rather with voice or other
45 | interfaces.  This metric obviously does not apply there.
46 | 
47 | Real people may or may not optimize their use of the SHIFT key or of Caps-Lock
48 | modes; `keydowns` always does.  So, in some sense it is a lower bound.
49 | 
50 | Defaults are set up for a US-style keyboard and people and societies can & do
51 | re-map keys.  `--shift` allows some adaptation to that.  An ambitious person can
52 | adjust their keyboard mappings to their input text families to minimize this
53 | metric (at some perhaps significant re-learning costs) making it useless.
54 | I don't do that.  So, I don't find it useless. :-)
55 | 
56 | Presently `keydowns` does not optimize for keys which are traditionally
57 | unmodified by SHIFT.  The headliner example of this is the space bar where you
58 | can continue holding SHIFT across the space in "{ }".  My own typing rarely
59 | optimizes for this, but since it is possible, the program should grow an option
60 | to measure both ways.
61 | 


--------------------------------------------------------------------------------
/doc/zeh.md:
--------------------------------------------------------------------------------
 1 | # Motivation
 2 | The [Zsh](https://zsh.org/) `setopt EXTENDEDHISTORY` feature is nice.  It adds
 3 | to the basic multi-line command format the starting epoch time & command
 4 | duration to the history file.
 5 | 
 6 | However, if you have various user accounts and different computers you may want
 7 | to merge history files.  The history file may just get too big for the shell to
 8 | load efficiently.  `zeh` goes at about 1..1.5 GB/s for me and yet still takes
 9 | 10s of milliseconds for a 600k/26MB file and seems faster than Zsh itself.
10 | Enough 10s and you have noticeable delay.
11 | 
12 | So, you may want to perform various manipulations to thin your history.  E.g.,
13 | discarding very short commands, less than say 5 bytes.  Trailing newlines are
14 | easy to enter by mistake (either with \ or by pasting newlines) but do not
15 | add any value (at least to my command histories).  And so on.  The format has
16 | been the same for like 30 years and it's easy enough to whip up tools to do
17 | basic things.  This is one.
18 | 
19 | # Usage
20 | ```
21 |   zeh [optional-params] [paths: string...]
22 | 
23 | Check|Merge, de-duplicate&clean short cmds/trailing \n Zsh EXTENDEDHISTORY
24 | (format ": {t0%d}:{dur%d};CMD-LINES[\]"); Eg.: zeh -tm3 h1 h2 >H.  Zsh saves
25 | start & duration @FINISH TIME => with >1 shells in play, only brief cmds match
26 | the order of timestamps in the file => provide 3 more modes on to of --check:
27 | --endT, --sort, --begT.
28 | 
29 |   -m=, --min= int  0     Minimum length of a command to keep
30 |   -t, --trim  bool false Trim trailing whitespace
31 |   -c, --check bool false Only check validity of each of paths
32 |   -s, --sort  bool false sort exactly 1 path by startTm,duration
33 |   -b, --begT  bool false add dur to take startTm,dur -> endTm,dur
34 |   -e, --endT  bool false sub dur to take endTm,dur -> startTm,dur
35 | ```
36 | 
37 | # Testing
38 | This is new code a new `adix/ways.kWayMerge` iterator.  So, it's very possible
39 | there are bugs, but this works anyway, and maybe constitutes an example:
40 | ```sh
41 | seq2zh() { sed 's/^\(.*\)$/: 1000000\1:0;cmd\1/' ;}
42 | zh2cmd() { sed 's/.*;//' ;}
43 | seq -w 1 3 100|seq2zh>by3
44 | seq -w 2 2 100|seq2zh>by2
45 | seq -w 4 4 100|seq2zh>by4
46 | cat by[234]|zh2cmd|sort>costly
47 | zeh by*|zh2cmd>cheap
48 | cmp cheap costly
49 | ```
50 | 
51 | # Examples
52 | Check a file { or do a parsing benchmark :-) } :
53 | 
54 | `zeh -c $ZDOTDIR/history`
55 | 
56 | XXX should really add a bunch of vignettes here.
57 | 
58 | # Future work
59 | An idea for near-term extensions might be adding a fancier filter language than
60 | just "length >= min", such as the first whitespace delimited command is not in
61 | some set (e.g. `ps`).
62 | 


--------------------------------------------------------------------------------
/oft.nim:
--------------------------------------------------------------------------------
 1 | import cligen, cligen/[mfile, mslice, osUt], adix/amoft, std/math
 2 | from std/strutils as su import nil
 3 | when not declared(stderr): import std/syncio
 4 | 
 5 | proc pyIx[T](vs: openArray[T], i: int): T = vs[if i < 0: i + vs.len else: i]
 6 | 
 7 | proc oft*(input="/dev/stdin", delim=" ", mxCol=0, errate=0.005, cover=0.98,
 8 |           salts: seq[int] = @[], specs: seq[string]) =
 9 |   ## Write most often seen N keys in various columns to outFile's.  Specs are
10 |   ## `<n>[,<keyCol>(0)[,outFile(stdout)]]`.  ColNos are Py-like 0-origin,signed.
11 |   ## Algorithm is approximate fast one-pass over mmap|stream input.  E.g., to
12 |   ## write most frequent final column to stdout do: ``oft 10,-1``. (For exact,
13 |   ## see `lfreq`, possibly with column splitting to FIFOs).
14 |   let k    = specs.len                  # Handle all `k` keys in one pass
15 |   if k < 1: stderr.write "No specs requested.  -h for help.\n"; return
16 |   var keyC = newSeq[int](k)
17 |   var oFil = newSeq[File](k)
18 |   var amos = newSeq[AMOft[string, uint32]](k)
19 |   let w    = ceil(exp(1.0)/errate).int  # Qs: Make per key-col?  Snap to pow2?
20 |   let nTab = ceil(-ln(1.0 - cover)).int
21 |   for i, spec in specs:                 # Parse key-output specifiers
22 |     let params = su.split(spec, ',')
23 |     if params.len < 1:
24 |       stderr.write "too few sub-params in spec ", spec, "\n"; continue
25 |     amos[i] = initAMOft[string, uint32](su.parseInt(params[0]), w, nTab, salts)
26 |     keyC[i] = if params.len > 1: su.parseInt(params[1]) else: 0
27 |     oFil[i] = if params.len > 2: open(params[2], fmWrite) else: stdout
28 |   let sep = initSep(delim)              # Init into-seq[MSlice] splitter
29 |   var row: seq[MSlice] = @[]
30 |   let mf = mopen(input)
31 | 
32 |   template sweep(mf, T) {.dirty.} =
33 |     for line in mSlices(mf, eat='\0'):  # RO mmap | slices from stdio
34 |       sep.split(line, row, mxCol)       # Split into columns
35 |       for i in 0 ..< k:                 # Update our 1-to-several AMOfts
36 |         amos[i].inc $pyIx(row, keyC[i])
37 |     for i in 0 ..< k:                   # Pops out like sort -gk<C+1>|tail -n<n>
38 |       for (k, c) in amos[i].mostCommon: oFil[i].urite c, " ", k, "\n"
39 |       if not oFil[i].isNil and oFil[i] != stdout: oFil[i].close
40 | 
41 |   if mf.mem.isNil: sweep(input, string) else: sweep(mf, MSlice)
42 | 
43 | when isMainModule: include cligen/mergeCfgEnv; dispatch oft, help={
44 |   "input" : "input data path",
45 |   "delim" : "delimiting (repeats=>any num; \"white\")",
46 |   "mxCol" : "max columns in input to parse",
47 |   "errate": "size tables to make err `nSamp\\*this`",
48 |   "cover" : "enough tables to make coverage this",
49 |   "salts" : "override random salts"}
50 | 


--------------------------------------------------------------------------------
/okpaths.nim:
--------------------------------------------------------------------------------
 1 | import std/[os, posix, strutils, sets]
 2 | 
 3 | if paramCount() < 1 or  paramCount() mod 5 != 0: quit """Usage:
 4 |   okpaths ENVAR [DELIM(:) [ITYPE{bcdpfls}(d) [PERMS{rwx}(x) [DEDUP{FL*}(F)]]]]
 5 | 
 6 | echos re-assembled value for $ENVAR delimited by char DELIM where each element
 7 | kept is i-node type ITYPE with permissions PERMS & optional de-duplication.
 8 | 
 9 | Eg., PATH=`okpaths PATH` keeps only existing (d)irs executable(x) by an invoking
10 | user.  DEPDUP starting with 'F' means keep F)irst use, while 'L' keeps L)ast use
11 | & other means no de-dup (this is case-insensitive).  So, eval `okpaths PATH` is
12 | nice in rc/init scripts for Unix shells.
13 | 
14 | Blocks of the 5 params can repeat (since fork&exec add to shell init time).""",0
15 | 
16 | for shf in countup(0, paramCount()-1, 5):
17 |   let delim = if paramCount()>1+shf: paramStr(2+shf)[0]         else: ':'
18 |   let kinds = if paramCount()>2+shf: paramStr(3+shf)            else: "d"
19 |   let perms = if paramCount()>3+shf: paramStr(4+shf)            else: "rx"
20 |   let dedup = if paramCount()>4+shf: paramStr(5+shf).toUpper[0] else: 'F'
21 | 
22 |   func kind(mode: Mode): char =
23 |     if   mode.S_ISBLK : 'b'
24 |     elif mode.S_ISCHR : 'c'
25 |     elif mode.S_ISDIR : 'd'
26 |     elif mode.S_ISFIFO: 'p'
27 |     elif mode.S_ISREG : 'f'
28 |     elif (when not defined(windows): mode.S_ISLNK  else: false): 'l'
29 |     elif (when not defined(windows): mode.S_ISSOCK else: false): 's'
30 |     else: '.'
31 | 
32 |   proc perm(perms: string): cint =
33 |     if 'r' in perms: result = result or R_OK
34 |     if 'w' in perms: result = result or W_OK
35 |     if 'x' in perms: result = result or X_OK
36 | 
37 |   let prms = perm(perms)
38 |   var res: seq[string]            # Result to output (re-joined with delim)
39 |   var ids: seq[Ino]               # i-node identity; [i] tracks res[i]
40 |   var st: Stat
41 |   var did: HashSet[Ino]
42 |   for e in paramStr(1+shf).getEnv.split(delim):
43 |     let ec = e.cstring
44 |     if stat(ec, st) == 0 and st.st_mode.kind in kinds and access(ec, prms) == 0:
45 |       if dedup == 'F':            # F)irst retention
46 |         if st.st_ino notin did:   # Only add if have not already
47 |           res.add e
48 |           did.incl st.st_ino
49 |       elif dedup == 'L':          # L)ast retention
50 |         if st.st_ino in did:      # Already added; First delete [old]
51 |           let ino = ids.find(st.st_ino)
52 |           res.delete ino
53 |           ids.delete ino
54 |         did.incl st.st_ino        # Add it
55 |         ids.add st.st_ino
56 |         res.add e
57 |       else:                       # Not de-duplicating
58 |         res.add e
59 | 
60 |   echo paramStr(1+shf),"=",join(res, $delim)
61 | 


--------------------------------------------------------------------------------
/ndelta.nim:
--------------------------------------------------------------------------------
 1 | import cligen/[sysUt, mfile, mslice], cligen
 2 | import std/parseutils; import std/strutils except parseFloat
 3 | when not declared(stderr): import std/syncio
 4 | 
 5 | type DKind = enum absolute, ratio, relative, perCent
 6 | 
 7 | proc load(path: string): MSlice =
 8 |   if (let m = mopen(path); m != nil): m.toMSlice
 9 |   else: path.readFile.toMSlice(keep=true)
10 | 
11 | proc delta(num0, num1: float, kind: DKind, n: int): string =
12 |   template ffDec(x: float): untyped = formatFloat(x, ffDecimal, n)
13 |   case kind:
14 |     of absolute: ffDec(num1 - num0)
15 |     of ratio   : (if num0 != 0.0: ffDec(num1/num0) else: "INF")
16 |     of relative: (if num0 != 0.0: ffDec(num1/num0 - 1) else: "INF")
17 |     of perCent : (if num0 != 0.0: ffDec(100.0*num1/num0 - 1) else: "INF")
18 | 
19 | proc ndelta(paths: seq[string], kind=ratio, delims="white", n=3, sloppy=false) =
20 |   ## Replace numbers in token-compatible spots of `paths[0]` & `paths[1]` with
21 |   ## (absolute | ratio | relative | perCent) deltas.  To trap out-of-order data,
22 |   ## differences in context are highlighted unless `sloppy` is true.
23 |   if paths.len != 2: Help !! "Need 2 paths; Full $HELP"
24 |   let sep = initSep(delims)
25 |   let tok0 = paths[0].load.frame(sep)   # Fully split both files into 2..
26 |   let tok1 = paths[1].load.frame(sep)   #.. seq[TextFrame]s of tokens.
27 |   if tok0.len != tok1.len:              # Check compatibility
28 |     stderr.write "WARNING: files have different token structure\n"
29 |   for i in 0 ..< tok0.len:              # Now loop: identify & compare floats
30 |     if tok0[i].ms.len == 0: continue    # Empty data frame (if not repeat)
31 |     if tok0[i].isSep:
32 |       stdout.write tok0[i].ms
33 |     else:                               # Both tokens are non-separator text
34 |       var num0, num1: float
35 |       let s0 = $tok0[i].ms              # An interesting but tricky extension..
36 |       let s1 = $tok1[i].ms              #.. would be optional parse of x +- dx
37 |       if s0.parseFloat(num0) == s0.len and s1.parseFloat(num1) == s1.len:
38 |         stdout.write delta(num0, num1, kind, n)
39 |         if kind == perCent: stdout.write '%'
40 |       elif not sloppy and tok0[i].ms != tok1[i].ms: # Differing Context
41 |         stdout.write "\e[1m", tok0[i].ms, "\e[22m<>\e[3m", tok1[i].ms, "\e[23m"
42 |       else:                             # Same context/labels/etc.
43 |         stdout.write tok0[i].ms
44 | 
45 | include cligen/mergeCfgEnv
46 | dispatch ndelta,help={"kind"  : "DiffKind: absolute, ratio, relative, perCent",
47 |                       "delims": "repeatable delim chars",
48 |                       "n"     : "FP digits to keep",
49 |                       "sloppy": "allow non-numerical context to vary silently"}
50 | 


--------------------------------------------------------------------------------
/doc/dups.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | Many processes in a system can create duplicate files.  These (usually) waste
 5 | space, but at the very least one often wants to "map out" such duplication.
 6 | 
 7 | `lncs` lets you map out varying names for the same i-node.  This utility lets
 8 | you map out clusters of i-nodes with exactly duplicate data (under the name of
 9 | the first found hard link for a file).
10 | 
11 | This was the original `cligen/examples` utility program.  There are many open
12 | source tools like this out on the internet.  One tool author even popped up
13 | on a [cligen issue thread](https://github.com/c-blake/cligen/issues/99).
14 | This one is pretty efficient.  I continue to benchmark it as ~2X faster than
15 | jdupes in a fully RAM-cached test case, but for uncached use cases it is of
16 | course very dominated by IO speed/organization.
17 | 
18 | A related case is *near* duplicate data, but that deserves [its own github
19 | repo](https://github.com/c-blake/ndup).
20 | 
21 | Usage
22 | -----
23 | 
24 | ```
25 |   dups [optional-params] [paths: string...]
26 | 
27 | Print sets of files with duplicate contents. Examined files are UNION of paths &
28 | optional delim-delimited input file ( stdin if "-"|if ""& stdin not a tty ).
29 | 
30 | E.g.:
31 |     find -type f -print0 | dups -d\\0.
32 | Exits non-0 if a dup exists.
33 | 
34 | Trusting hashes can give false positives, but sorting can be slow w/many large
35 | files of the same size|hash. slice can reduce IO, but can also give false pos.
36 | {False negatives not possible. 0 exit => surely no dups.}.
37 | 
38 | Within-set sort is by st_blocks if summ is logged, then by requested file time
39 | {v=max(m,c)} & finally by st_ino.
40 | 
41 | Options:
42 |   -f=, --file=    string  ""    optional input ( "-" | !tty = stdin )
43 |   -d=, --delim=   char    '\n'  input file delimiter; \0 -> NUL
44 |   -r=, --recurse= int     1     recurse n-levels on dirs; 0: unlimited
45 |   -F, --follow    bool    false follow symlinks to dirs in recursion
46 |   -x, --xdev      bool    false block cross-device recursion
47 |   -D, --Deref     bool    false dereference symlinks
48 |   -m=, --minLen=  int     1     minimum file size to consider
49 |   -s=, --slice=   string  ""    file slice (float|%:frac; <0:tailRel)
50 |   -H=, --Hash=    Digest  wy    hash function [size|wy|nim|SHA]
51 |   -c, --cmp       bool    false compare; do not trust hash
52 |   -j=, --jobs=    int     1     Use this much parallelism
53 |   -l=, --log=     set(Lg) osErr >stderr{ osErr, summ }
54 |   -b, --brief     bool    false do NOT print sets of dups
55 |   -t=, --time=    string  ""    sort each set by file time: {-}[bamcv].*
56 |   -o=, --outDlm=  string  "\t"  output internal delimiter
57 |   -e=, --endOut=  string  "\n"  output record terminator
58 | ```
59 | 


--------------------------------------------------------------------------------
/doc/tw.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | Sometimes you have maybe-colorized, maybe-utf8 output which has "tabular shape"
 4 | but a trailing final column prone to line wrap which then makes the table hard
 5 | to read.
 6 | 
 7 | For example, C compiles often have very long command lines due to the lack of
 8 | any other standard source of compiler options.  So, some `ps ww` invocation may
 9 | create a "table" with a dozen terminal rows.
10 | 
11 | Having an easy way to "clip" or "crop" lines to fit in your terminal can thus be
12 | nice.
13 | 
14 | Usage (***NOT*** a `cligen` utility)
15 | -----
16 | 
17 | With no argument, this roughly reproduces what many VTXXX compatible terminals
18 | can do with `printf '\033[?7l'; command; printf '\033[?7h'`:
19 | ```sh
20 | $ input-generator|tw
21 | ```
22 | 
23 | Unlike the VTXXX approach, though, with `tw` you can optionally pass a first
24 | argument which is an integer number of rows to limit wrapping to.  For the
25 | motivating compiler example, this can be useful:[^1]
26 | ```sh
27 | $ pd -w|tw 2
28 | ```
29 | 
30 | Finally, with a second argument you can override the terminal width detected
31 | by $COLUMNS and ioctls, as in
32 | 
33 | ```sh
34 | $ pd -w|tw 2 40
35 | ```
36 | One application of this last mode might be useful to "re-format" a table given
37 | easily split leading & trailing text per row for re-assembly fitting in bounds.
38 | 
39 | Related Work
40 | ------------
41 | While I did look, I did not find any one really doing this anywhere, but it is
42 | hard to make such searches truly exhaustive.  The core of this is just a 30-line
43 | state machine.  The idea is pretty obvious though - basically the "width-wise"
44 | version of `head` or `tail`.  In fact, in combination they let you crop to your
45 | viewable terminal via e.g. `tail -n $LINES|tw`.[^2]
46 | 
47 | If you know the input has neither ANSI SGR Color escape sequences nor multi-byte
48 | utf8 characters then you can, of course, just `cut -c "1-${COLUMNS:-80}"`.
49 | 
50 | If you are willing to depend upon regex and terminal libraries as well as do
51 | terminal manipulation (like alternate screen buffers etc.) and you never want
52 | bounded-but-multiple rows then you can do `less -RES --redraw-on-quit
53 | --rscroll=-`.  That's a lot of IFs, though.[^3]  `tw` is also several times
54 | faster due to its more limited scope.
55 | 
56 | Future Work
57 | -----------
58 | The current impl does handle Unicode combining characters (including as the
59 | final non-clipped character) but not double wide or grapheme extension type
60 | renders.
61 | 
62 | [^1]: `pd` here is `procs display` as per https://github.com/c-blake/procs
63 | 
64 | [^2]: For me this is just `|t|tw` which may become `|ttw` or `|crop` someday.
65 | 
66 | [^3]: [noc](noc.md) lets you enforce no escape sequence part of the IFs.
67 | 


--------------------------------------------------------------------------------
/doc/du.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | This is a very small program, mostly recapitulating functionality from GNU `du`
 4 | that began just as a simple cligen/examples program, but might conceivably have
 5 | broader use/popularity.
 6 | 
 7 | Some of the value add differences are adding some missing short form flags for
 8 | commonly desirable giga/tera/peta `--block-size=`s, de-conflating a few baggage
 9 | of history things like shell patterns vs. regexes & --bytes => apparent-size.
10 | 
11 | (Also, it does not try to do anything with file times.  That seems like weird
12 | mission creep in the GNU `du`.)
13 | 
14 | Usage
15 | -----
16 | ```
17 |   du [optional-params] [roots: string...]
18 | 
19 | Mostly compatible replacement for GNU du using my 1.4-2x faster file tree walk
20 | that totals st_blocks*512 with more/better short options.  Notable differences:
21 |   drops weakly motivated options {time, [aDHt], max-depth, separate-dirs}
22 |   outEnd replaces null|-0; patterns are all PCRE not shell and need ".*"
23 |   bytes does not imply apparent-size
24 |   dereference does not imply chase.
25 | 
26 | Options:
27 |   -?, --help                          print this cligen-erated help
28 |   -f=, --file=          string  ""    optional input ("-"|!tty=stdin)
29 |   -d=, --delim=         char    '\n'  input file record delimiter
30 |   -x, --one-file-system bool    false block recursion across devices
31 |   --chase               bool    false chase symlinks in recursion
32 |   -L, --dereference     bool    false dereference symlinks for size
33 |   -a, --apparent-size   bool    false instead total st_bytes
34 |   -i, --inodes          bool    false instead total inode count
35 |   -l, --count-links     bool    false count hard links multiple times
36 |   -X=, --exclude-from=  string  ""    exclude all pattern(s) in named file
37 |   -e=, --exclude=       strings {}    exclude paths matching pattern(s)
38 |   -b, --bytes           bool    false like --block-size=1
39 |   -k, --kilo            bool    false like --block-size=1[Kk] (DEFAULT)
40 |   -m, --mega            bool    false like --block-size=1[Mm]
41 |   -g, --giga            bool    false like --block-size=1[Gg]
42 |   -t, --tera            bool    false like --block-size=1[Tt]
43 |   -p, --peta            bool    false like --block-size=1[Pp]
44 |   -B=, --block-size=    string  ""    units; CAPITAL sfx=metric else binary
45 |   -s, --summarize       bool    false echo only total for each argument
46 |   --si                  bool    false -[kmgt] mean powers of 1000 not 1024
47 |   -h, --human-readable  bool    false print sizes in human readable format
48 |   -c, --total           bool    false display a grand total
49 |   -o=, --outEnd=        string  "\n"  output record terminator
50 |   -q, --quiet           bool    false suppress most OS error messages
51 | ```
52 | 


--------------------------------------------------------------------------------
/doc/topn.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | Sometimes you have a pipeline emitting various numbers and you want to get (in
 4 | one pass since input is a pipeline, but also for memory bandwidth efficiency)
 5 | reports of the top-N (N biggest) according to various columns of the input.
 6 | This is what `topn` is for.  Internally, it is a very thin wrapper around
 7 | [adix/topk](https://github.com/c-blake/adix/blob/master/adix/topk.nim).
 8 | 
 9 | Usage
10 | -----
11 | ```
12 |   topn [optional-params] [specs: string...]
13 | 
14 | Write spec'd cols of topN-rows-by-various-other-cols to outFile's.
15 | 
16 | A spec is <N>[,<keyCol>(0)[,outCol(same)[,outFile(stdout)]]].
17 | 
18 | ColNos are Py-like 0-origin,signed.
19 | 
20 | outCol can be an A:B exclusive or A..B slice.
21 | 
22 | Algo is fast one-pass over (mmap|stream) input.
23 | 
24 | Simple & Fancy E.g.s:
25 |   find . -type f -printf '%C@ %p\n' | topn -m1 5  # newest 5 by ctime
26 |   topn 9,1,-1,x # writes last col of top 9-by-col-1 rows to file x.
27 | 
28 | If n!=0 then <N> can end in '%' to instead mean 100*pct/n rows.
29 | 
30 | Options:
31 |   -i=, --input= string    "/dev/stdin" input data path
32 |   -d=, --delim= string    " "          delimiting (repeats=>any num; "white")
33 |   -m=, --mxCol= int       0            max columns in input to parse
34 |   -n=, --n=     int       0            scale for '%' amounts
35 |   -o=, --order= TopKOrder Cheap        order: Cheap, Ascending, Descending
36 |   -p=, --partn= Partn     last         partition: last, ran
37 | ```
38 | 
39 | Very Simple Example
40 | -------------------
41 | ```sh
42 | $ paste <(seq 1 100) <(seq 1 10 1000) | topn 5
43 | 96      951
44 | 97      961
45 | 98      971
46 | 99      981
47 | 100     991
48 | ```
49 | 
50 | Fancier Example
51 | ---------------
52 | This will recurse in `.` emitting c-time, m-time, and path names to a pipeline.
53 | ```sh
54 | find . -printf '%Cs %Ts %P\n' |
55 |   topn 3,0,2 4,1,:,/dev/stderr
56 | ```
57 | The `topn` part collects the top-3 paths (2) by 0-origin column 0 (ctime) and
58 | *whole rows* of the top-4 by 0-origin column 1 (mtime), emitting the first to
59 | stdout and the second to stderr. (Yes, [`newest`](newest.md) handles this
60 | *exact* example and mismatched 3/4 are weird, but it's just an *example*).
61 | 
62 | Any Python-like `[a]:[b]` exclusive slice or Nim `[a]..[b]` inclusive slice is
63 | ok, but non-numeric|missing a/b become 0 and out of bounds refs map to `""`.
64 | 
65 | If you want a top fraction like 10% (instead of an absolute number like "3")
66 | then you can also get that ***IF*** you provide the scale via `-n` and also
67 | tell `topn` to use it via, e.g., `topn -n4321 10%,0,2`.  (Yes, this is mostly
68 | just a convenience to multiply 0.1 by 4321 - if you do not know `n` ahead of
69 | time, a one-pass, tiny memory algo is not possible.)
70 | 


--------------------------------------------------------------------------------
/unfold.nim:
--------------------------------------------------------------------------------
 1 | when not declared(stdin): import std/syncio
 2 | include cligen/unsafeAddr
 3 | import cligen/[sysUt, osUt], std/re, cligen # cligen is early for `HelpError`
 4 | 
 5 | proc unfold(sep="\t", n=0, before="", after="", ignore=false, extended=false) =
 6 |   ## Join blocks of stdin lines into one line sent to stdout.
 7 |   var eolBuf = "\n"; let eol = eolBuf[0].addr
 8 |   let nS = sep.len; let sep = sep[0].unsafeAddr
 9 |   var i = 0
10 |   var need = false
11 |   var str: string
12 |   var flags = {reStudy}
13 |   if ignore: flags.incl reIgnoreCase
14 |   if extended: flags.incl reExtended
15 |   template wrLine =
16 |     if stdout.uriteBuffer(ln, nLn-1) != nLn-1: return
17 |   template wrEOL =
18 |     if stdout.uriteBuffer(eol, 1) != 1: return else: need = false
19 |   template wrSep =
20 |     if stdout.uriteBuffer(sep,nS) != nS: return else: need = true
21 |   if n > 0 and before.len == 0 and after.len == 0:
22 |     for (ln, nLn) in stdin.getDelims:   # :( My ancient rec_rdln is ~1.3x faster
23 |       inc i
24 |       wrLine()                          # Always output the input
25 |       if i == n: wrEOL(); i = 0         # but EOL only after n cycles
26 |       else     : wrSep()                # otherwise just sep
27 |   elif after.len != 0 and n == 0 and before.len == 0:
28 |     let rx = re(after, flags)
29 |     for (ln, nLn) in stdin.getDelims:
30 |       inc i
31 |       wrLine()                          # Always output the input
32 |       str.setLen nLn-1; copyMem str[0].addr, ln, nLn-1
33 |       if rx in str: wrEOL()             # but EOL only only if line matches
34 |       else        : wrSep()             # otherwise just sep
35 |   elif before.len != 0 and n == 0 and after.len == 0:
36 |     let rx = re(before, flags)          # A somewhat different state machine
37 |     for (ln, nLn) in stdin.getDelims:
38 |       inc i
39 |       if i == 1:
40 |         wrLine(); need = true           # Write 1st line unconditionally
41 |       else:                             # Copy `ln` to `str` for pattern match
42 |         str.setLen nLn-1; copyMem str[0].addr, ln, nLn-1
43 |         if rx in str: wrEOL()           # Then terminate only if line matches
44 |         else        : wrSep()           # otherwise just sep
45 |         wrLine()                        # Then output the input
46 |   else: Help !! "Set `n` | `before` | `after`; Full $HELP"
47 |   if need: wrEOL()      # May need final \n (non-delimiting sep gives user clue)
48 | 
49 | include cligen/mergeCfgEnv; dispatch unfold, help={
50 |   "n"       : "Join `|n|` lines into 1",
51 |   "after"   : "join blocks ending with a matching line",
52 |   "before"  : "join blocks beginning with a matching line",
53 |   "sep"     : "separates the old lines within the new",
54 |   "ignore"  : "regex are case-insensitive",
55 |   "extended": "regexes are nim re 'extended' syntax",
56 | }
57 | 


--------------------------------------------------------------------------------
/doc/cols.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | This is a faster to key stroke (& execute) version of `awk '{print $X}'`.  It
 5 | also acts as demo/example code for some library APIs from
 6 | [`cligen/`](https://github.com/c-blake/cligen) & was a very early member of
 7 | `cligen/examples/` itself.
 8 | 
 9 | Something it provides over the `awk` invocation is
10 |  - the ability to delete indicated columns (with `-c, --cut`).
11 | 
12 | Things it provides over both `awk` & GNU coreutils `cut` are the ability to:
13 |  - shift column-numbering origins (e.g. 0 | 1-origin)
14 |  - do either inclusive (..) OR exclusive (:) ranges/slices
15 |  - allows numbers < 0 to mean from-the-end (like Python) { use `--` or \\-escape
16 |    (or quote) whitespace before `'-'` to avoid treatment as an option }.
17 | 
18 | Over just `cut` it provides:
19 |  - default to keep; more terse CL syntax than "--complement"
20 |  - ability to split 1 column on repeated bytes (like `awk`)
21 | 
22 | Usage
23 | -----
24 | ```
25 |   cols [optional-params] colNums or A..B | X:Y (in|ex)clusive ranges thereof
26 | 
27 | Write just some columns of input to output; Memory map input if possible.
28 | 
29 |   -i=, --input=  string "/dev/stdin"  path to mmap|read as input
30 |   -r=, --rowDlm= char   '\n'          inp row delimiter character
31 |   -d=, --delim=  string "white"       inp field dlm chars; len>0 => fold
32 |   -o=, --output= string "/dev/stdout" path to write output file
33 |   -s=, --sepOut= string " "           output field separator
34 |   -b, --blanksOk bool   false         allow blank output rows
35 |   -c, --cut      bool   false         cut/censor specified columns, not keep
36 |   --origin=      int    1             origin for colNums; 0=>signed indexing
37 |   -0, --O0       bool   false         shorthand for --origin=0
38 |   -t=, --term=   char   '\n'          set output row terminator (e.g. \0)
39 | ```
40 | 
41 | Examples
42 | --------
43 | After:
44 | ```
45 | (echo 1 2 3 4; echo; echo 4 5 6 7) > /tmp/d
46 | ```
47 | you get:
48 | ```
49 | cols 2 4 < /tmp/d
50 | ```
51 | producing
52 | ```
53 | 2 4
54 | 5 7
55 | ```
56 | With `cols -c0 -- -4..-3` you get:
57 | ```
58 | 3 4
59 | 6 7
60 | ```
61 | since you are cutting 0-origin 4th from end & 3rd from end.
62 | Meanwhile with `cols -0 1:3` you get:
63 | ```
64 | 2 3
65 | 5 6
66 | ```
67 | since you are keeping the exclusive slice indicating 0-origin 1 & 2.
68 | 
69 | With all of them if you add `-b` the blank row propagates, or you can make the
70 | output separated TAB or terminator NUL, etc.
71 | 
72 | That's it, really.  This intends to be a very simple utility.  Among the most
73 | advanced examples I can think of is :
74 | ```
75 | ls -l --zero | cols -cr\\0 1..4 -t\\0
76 | ```
77 | to produce a list of 0-terminated rows where (for GNU ls) the first 4 columns
78 | are guaranteed to be space separated and any newlines are from path names.  The
79 | consumer of that output data needs to remain careful, of course.
80 | 


--------------------------------------------------------------------------------
/doc/crp.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | This is a port of `rp.nim` to a Nim-written C code generator.  The point was
 5 | (mostly) to experiment with how much noisier pure C syntax is than the Nim-Nim
 6 | `rp`, in awk 1-liner-like problem settings.  Side interest was speed of machine
 7 | code generation with `tcc` and speed of execution of said fast-generated code.
 8 | 
 9 | Consult [doc/rp.md](rp.md) for more pontificating on the idea space which would
10 | require few words if it were more commonly used.  Personally, I think C even
11 | aided with macros is a bit too noisy for the ergonomics to be great.
12 | 
13 | Usage
14 | -----
15 | 
16 | ```
17 |   crp [optional-params] C stmts to run (guarded by where); none => echo row
18 | 
19 | Gen+Run prelude,fields,begin,where,stmts,epilog row processor against input.
20 | 
21 | Defined within where & every stmt are:
22 |   s[idx] & row => C strings, i(idx) => int64, f(idx) => double.
23 |   nf & nr (AWK-ish), rowLen=strlen(row);  idx is 0-origin.
24 | 
25 | A generated program is left at outp.c, easily copied for "utilitizing".  If you
26 | know AWK & C, you can learn crp FAST.
27 | 
28 | Examples (most need data):
29 |   seq 0 1000000 | crp -w'rowLen<2'              # Print short rows
30 |   crp 'printf("%s %s\n", s[1], s[0])'           # Swap field order
31 |   crp -vt=0 t+=nf -e'printf("%g\n", t)'         # Prn total field count
32 |   crp -vt=0 -w'i(0)>0' 't+=i(0)' -e'printf("%g\n", t)' # Total>0
33 |   crp 'float x=f(0)' 'printf("%g\n", (1+x)/x)'  # cache field 0 parse
34 |   crp -d, -fa,b,c 'printf("%s %g\n",s[a],f(b)+i(c))'   # named fields
35 |   crp -mfoo 'printf("%s\n", s[2])'              # column if row matches
36 | 
37 | Add niceties (eg. prelude="#include <mystuff.h>") to ~/.config/crp.
38 | 
39 | Options:
40 |   -p=, --prelude= strings {}          C code for prelude/include section
41 |   -b=, --begin=   strings {}          C code for begin/pre-loop section
42 |   -v=, --var=     strings {}          preface begin with double var decl
43 |   -m=, --match=   string  ""          row must match this regex
44 |   -w=, --where=   string  1           C code for row inclusion
45 |   -e=, --epilog=  strings {}          C code for epilog/end loop section
46 |   -f=, --fields=  string  ""          delim-sep field names (match row0)
47 |   -g=, --genF=    string  "$1"        make Field names from this Fmt;Eg c_$1
48 |   -c=, --comp=    string  ""          "" => tcc {if run: "-run"} {args}
49 |   -r, --run       bool    true        Run at once using tcc -run .. < input
50 |   -a=, --args=    string  ""          "" => -I$HOME/s -O
51 |   -o=, --outp=    string  /tmp/crpXXX output executable; .c NOT REMOVED
52 |   -i=, --input=   string  ""          path to read as input; ""=stdin
53 |   -d=, --delim=   string  " \t"       inp delim chars for strtok
54 |   -u, --uncheck   bool    false       do not check&skip header row vs fields
55 |   -M=, --MaxCols= int     0           max split optimization; 0 => unbounded
56 | ```
57 | 


--------------------------------------------------------------------------------
/doc/ft.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | The "usability idea" is to leverage user recall of `test` flags by staying as
 5 | close as reasonable to that set.  The only real difference is that `ft` {for
 6 | f)ile t)ype} uses `h` to mean a h)ard link not an alias for -L.
 7 | 
 8 | Yes, there are more verbose ways to do this with `man 1 find` and shell `for`
 9 | loops and more terse ways to do it with Zsh extended globbing (`man 1 zshexpn`).
10 | 
11 | I suspect most people are more comfortable with a shell loop that would also be
12 | portable, but OTOH, it really is a very small program.
13 | 
14 | Usage
15 | -----
16 | ```
17 |   ft [optional-params] [paths: string...]
18 | 
19 | Batch (in both predicates & targets) test / [ .  Emit subset of paths that
20 | pass expr.  E.g.: $(ft -eL *) =~ Zsh extended glob *(@).  Can also read stdin
21 | as in find -type f|ft -ew. (Yes, can cobble together less tersely w/GNU find
22 | -files0-from | find GLOB -maxdepth 0 PREDICATE.) Maybe counter-intuitively,
23 | exit with status = match count (0=NONE).
24 | 
25 |   -f=, --file=     string  ""     optional input ( `"-"` | !tty = ``stdin`` )
26 |   -d=, --delim=    char    '\n'   input file delimiter; `\\0` -> NUL
27 |   -t=, --term=     char    '\n'   output path terminator
28 |   -p=, --pattern=  string  "$1"   emit a \$1-using pattern; E.g. "match:\$1"
29 |   -q, --quiet      bool    false  Do not emit; Just count as exit status
30 |   -s, --stat       bool    false  Use stat not lstat; Others say "dereference"
31 |   -e=, --expr=     string  "e"    Concatenated extended one-letter test(1) codes
32 |                                       e  (e)xists in any way
33 |                                       b  is (b)lock special
34 |                                       c  is (c)haracter special
35 |                                       d  is a (d)irectory
36 |                                       f  is a regular (f)ile
37 |                                      l|L is a symbolic (l)ink; NOTE: h differs!
38 |                                       p  is a named (p)ipe {aka FIFO}
39 |                                       S  is a (S)ocket;CASE differs from ls/find
40 |                                       s  has a (s)ize greater than zero
41 |                                       h  is a (h)ard link; Link count > 1
42 |                                       N  (N)ew; modify time > access time
43 |                                       k  has its stic(k)y bit set
44 |                                       u  its set-(u)ser-ID bit is set
45 |                                       g  is set-(g)roup-ID
46 |                                       O  is (O)wned by the effective user ID
47 |                                       G  is owned by effective (G)roup ID
48 |                                     r|R|A user|World|Group can (r)ead
49 |                                     w|W|I user|World|Group can (w)rite
50 |                                     x|X|E user|World|Group can e(x)ecute|travers
51 |                                   In all cases a file must exist for 'true'
52 |                                   Codes are logically ANDed; '^' prefix => NOT
53 | ```
54 | 


--------------------------------------------------------------------------------
/since.nim:
--------------------------------------------------------------------------------
 1 | when not declared(stderr): import std/syncio
 2 | include cligen/unsafeAddr
 3 | import std/[posix, sets, strutils], cligen, cligen/[osUt, posixUt, dents, statx]
 4 | 
 5 | proc since*(refPath: string, refTime="", time="m", recurse=1, chase=false,
 6 |             Deref=false, kinds={fkFile}, quiet=false, xdev=false, file="",
 7 |             delim='\n', eof0=false, noDot=false, unique=false,
 8 |             paths: seq[string]) =
 9 |   ## Print files whose *time* is since|before *refTime* of *refPath*.  Files
10 |   ## examined = UNION of *paths* + optional *delim*-delimited input *file* (
11 |   ## ``stdin`` if `"-"`|if `""` & ``stdin`` is not a terminal ), **maybe
12 |   ## recursed** as roots.  To print regular files m-older than LAST under CWD:
13 |   ## ``since -t-m -pLAST -r0 .``
14 |   let err = if quiet: nil else: stderr
15 |   let tO  = fileTimeParse(time)                 #- or CAPITAL=oldest
16 |   let tR  = if refTime.len > 0: fileTimeParse(refTime) else: tO
17 |   var refStat: Statx
18 |   if stat(refPath, refStat) != 0: quit(1)
19 |   let r   = fileTime(refStat, tR.tim, tR.dir)
20 |   var dip = initHashSet[string]()
21 |   let it  = both(paths, fileStrings(file, delim))
22 |   var roots: seq[string]
23 |   for root in it(): (if root.len > 0: roots.add root)
24 |   for rt in (if roots.len == 0 and paths.len == 0: @["."] else: roots.move):
25 |     forPath(rt, recurse, true, chase, xdev, eof0, err,
26 |             depth, path, nmAt, ino, dt, lst, dfd, dst, did):
27 |       if dt != DT_UNKNOWN:                      # unknown here => disappeared
28 |         if (dt == DT_LNK and Deref and not chase and
29 |             doStat(dfd,path,nmAt,lst,Deref,quiet)) or
30 |            lst.stx_nlink != 0 or doStat(dfd,path,nmAt,lst,Deref,quiet):
31 |           if lst.stx_mode.match(kinds) and fileTime(lst, tO.tim, tO.dir) > r:
32 |             let path = if noDot:
33 |                          if   path.startsWith("./."): path[3..^1]
34 |                          elif path.startsWith("./"): path[2..^1]
35 |                          else: path
36 |                        else: path
37 |             if not unique or path notin dip:
38 |               stdout.write path, "\n"
39 |               dip.incl path
40 |     do: discard
41 |     do: discard
42 |     do: recFailDefault("since", path)
43 | 
44 | when isMainModule:  # Exercise this with an actually useful CLI wrapper.
45 |   include cligen/mergeCfgEnv; dispatch since, help={
46 |     "refPath": "path to ref file",
47 |     "time"   : "stamp to compare ({-}[bamcv]\\*)",
48 |     "refTime": "stamp of ref file to use (if different)",
49 |     "recurse": "recurse n-levels on dirs; 0:unlimited",
50 |     "chase"  : "chase symlinks to dirs in recursion",
51 |     "xdev"   : "block recursion across device boundaries",
52 |     "Deref"  : "dereference symlinks for file times",
53 |     "kinds"  : "i-node type like find(1): [fdlbcps]",
54 |     "quiet"  : "suppress file access errors",
55 |     "file"   : "optional input (\"-\"|!tty=stdin)",
56 |     "delim"  : "input file record delimiter",
57 |     "eof0"   : "read dirents until 0 eof",
58 |     "noDot"  : "remove a leading . from names",
59 |     "unique" : "only print a string once"}, short={"refTime":'T', "refPath":'p'}
60 | 


--------------------------------------------------------------------------------
/doc/stripe.md:
--------------------------------------------------------------------------------
 1 | Description
 2 | -----------
 3 | 
 4 | `stripe` is parallelization/rudimentary job distribution utility and its library
 5 | optimization nano-shell module `bu/execstr`.  It only runs 1 command at a time.
 6 | 
 7 | When commands fit into the restricted nano-shell language, this is about as low
 8 | overhead as any new ELF/executable process creating tool can be (which, yes,
 9 | remains about 50-100X worse than just fork|`cligen/procpool`).
10 | 
11 | ```
12 | Usage:
13 | 
14 |   stripe [optional-params] [posArgs: string...]
15 | 
16 | where posArgs is either a number <N> or <sub1 sub2..subM>, reads job lines from
17 | stdin and keeps up to N | M running at once.
18 | 
19 | In sub mode, each job has $STRIPE_SUB set, in turn, to subJ.  Eg.:
20 | 
21 |   find . -printf "ssh $STRIPE_SUB FileJob '%P'\n" | stripe X Y
22 | 
23 | runs FileJobs first on host X then on host Y then on whichever finishes first.
24 | Repeat X or Y to keep more jobs running on each host.
25 | 
26 | $STRIPE_SLOT (arg slot index) & optionally $STRIPE_SEQ (job seqNum) are also
27 | provided to jobs.  In N-mode SIGUSR[12] (in|de)creases N.  If before uses $tot,
28 | job lines are read upfront to provide that count.
29 | 
30 |   -r=, --run=    string "/bin/sh" run job lines via this interpreter
31 |   -n, --nums     bool   false     provide STRIPE_SEQ to job procs
32 |   -s=, --secs=   float  0.0       sleep SECS before running each job
33 |   -l=, --load=   int    -1        0/1/2: 1/5/15-minute load average < N
34 |   -b=, --before= string ""        "D": $tm \e[1mslot: $nm $cmd\e[m
35 |                                   alsoAvail: $seq $tot
36 |   -a=, --after=  string ""        "D": $tm \e[7mslot: $nm usr: $u sys: $s\e[m
37 |                                   alsoAvail: wall $w MiBRSS $m $ct $pcpu $cmd
38 |   -i=, --irupt=  string ""        "D": $tm interrupted $nm after $w: $cmd
39 |                                   alsoAvail: substitution $sub
40 | ```
41 | 
42 | There is no need for `STRIP_SUB` to be ssh targets.  Any regular pool of work
43 | labels will do.  For example, you could do a 2-way or 4-way tile of images with
44 | some dispatcher savvy about screen-halves/quadrants/etc.
45 | 
46 | Related Work
47 | ------------
48 | 
49 | There are almost too many to even begin mentioning.  The closest is probably
50 | `xargs -n1 -P9 --process-slot-var=STRIPE_SUB`, but that doesn't provide sequence
51 | numbers.  (You may be able to work around that e.g. with e.g. `EPOCHREALTIME` or
52 | other unique Ids.)  Mostly I like my job log format, `execstr` shell-avoidance
53 | optimization, and the C version of this dates back to the very early 00s, long
54 | before `xargs` even had `-P` never mind 2012's `--process-slot-var`.  I also
55 | like not having to worry about shell array portability to convert from a numeric
56 | process-slot-var to string keys.  This is all trivial enough that it's probably
57 | been done many times by many folks to suit their idiosyncratic tastes.
58 | 
59 | Anyway, "chunks" of work need to be >30-100 microsec for this to make sense[^1].
60 | If per-job code is shell-ish, you may be able to do a lower overhead (fork scale
61 | rather than exec scale) system with `wait -n` added to Bash in 2014, IIRC.
62 | 
63 | [^1]: or even larger if a real shell launch per command is involved..
64 | 


--------------------------------------------------------------------------------
/doc/fread.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | 
 4 | Sometimes you want to ensure hot-cache or cold-cache or measure only pure read
 5 | behaviors.
 6 | 
 7 | For example:
 8 | ```sh
 9 | dd if=/dev/zero blocksize=16384 count=16384 | fread
10 | fread *.dat & # get pre-loading of all that going.
11 | ```
12 | 
13 | Usage
14 | -----
15 | ```
16 |   fread [optional-params] paths: paths to read in
17 | 
18 | This is like `cat`, but just discards data.  Empty `paths` => just read from
19 | stdin.  That can be useful to ensure data is in an OS buffer cache or try to
20 | evict other data (more portably than /proc/sys/vm/drop_caches) for cold-cache
21 | runs, measure drive/pipe or device throughput, etc.  Eg. in Zsh you can say:
22 | `fread \*\*` or `fread -l $((1<<30)) < /dev/urandom`.
23 | 
24 | Users may pass paths to FIFOs/named pipes/other block-on-open special files
25 | which are skipped.  Anything named is only used if mmap-able & only 1 byte
26 | (really 1 cache line) per 4096 is used by the process.  Can use multiple passes
27 | to measure DIMM bandwidth through a CPU prefetching lens.
28 | 
29 | Options:
30 |   -b=, --bsz=   int   65536 buffer size for stdin IO
31 |   -l=, --lim=   int64 0     max bytes to read; 0=>unlimited
32 |   -n=, --nPass= int   1     passes per file
33 |   -o=, --off=   int   64    total [off0-origin-pass within pages]
34 |   -v, --verb    bool  false print bytes read
35 | ```
36 | 
37 | Example:
38 | --------
39 | As just one example benchmark-y kind of sketch, we can easily create a 1 GiB
40 | file from `/dev/urandom`[^1], and then (w/`taskset` & `chrt`[^2] to lessen
41 | noise) loop over a series of numbers of passes and fit run times to a [linear
42 | model](https://github.com/c-blake/fitl):
43 | 
44 | ```sh
45 | export j=/dev/shm/junk
46 | dd if=/dev/random of=$j bs=32k count=32k
47 | taskset -c 2 chrt 99 sh -c \
48 |  'for n in `seq 1 64`;do printf "$n ";fread -vn$n $j;done'|
49 |   fitl -b99 -c,=,n,b 6 0 1
50 | ```
51 | That yields for me, on one rather old bare metal machine:
52 | ```
53 | $6= 0.070379 + 1.32172e-03 *$1
54 | 
55 | bootstrap-stderr-corr matrix
56 |  0.0001110    -0.8542
57 |             3.245e-06
58 | ```
59 | The very small errors on slope & intercept suggest a good fit & the fit suggests
60 | initial pass time of 70.38 +- 0.11 ms & per pass times of 1.3217 +- 0.0032 ms.
61 | Since each pass after the first only hits one 64B cache line per page this
62 | translates to about 1./64/1.3217e-3 =~ 11.82 GiB/s throughput.
63 | 
64 | Of course, there is more going on than *only* memory transfer (not a lot more!),
65 | but this is also just one example benchmark.  `perf stat` (on Linux, anyway) may
66 | afford a more refined understanding of this kind of throughput.  Similarly, a
67 | smaller 2MiB file on a HugeTLB FS might eliminate all TLB misses to study L3 CPU
68 | cache bandwidth (or even L2 for some CPUs these days).[^3]  And so on.  Data
69 | moves around in many ways and data motion is often a bottleneck and `fread` is
70 | here usually just one piece of a bigger puzzle.
71 | 
72 | [^1]: Or maybe by `cp somebigfile $j; truncate -s $((1024*1024*1024))`, etc.
73 | 
74 | [^2]: On Linux anyway..
75 | 
76 | [^3]: Measuring IO *itself* is also only one application of `fread`.  The
77 | original inspiration was eliminating IO time from *other* benchmarks.
78 | 


--------------------------------------------------------------------------------
/adorn.nim:
--------------------------------------------------------------------------------
 1 | when not declared(File): import std/syncio
 2 | import cligen, cligen/[sysUt, mfile, mslice, osUt], std/parseutils
 3 | 
 4 | proc anyNegative(xs: seq[int]): bool = (for x in xs: (if x < 0: return true))
 5 | proc outOfOrder(xs: seq[int]): bool =
 6 |   for j in 1..<xs.len: (if xs[j] <= xs[j-1]: return true)
 7 | proc urite(f: File, ms: MSlice) = discard f.uriteBuffer(ms.mem, ms.len)
 8 | 
 9 | const ess: seq[string] = @[]    # colNums could be A..B|X:Y ranges like bu/cols.
10 | proc adorn(colNums: seq[int], origin=1, O0=false, prefix=ess, suffix=ess,
11 |            input="", rowDlm='\n', delim="w", output="") =
12 |   ## `input`-`output` filter to adorn fields by adding `prefix` &| `suffix` to
13 |   ## `delim`-ited `colNums`, preserving ambient text.  `colNums`, `prefix`,
14 |   ## `suffix` SHARE INDEXING (so you may need to pad with `""`).  E.g.:
15 |   ##   **paste <(seq 1 3) <(seq 4 6)  <(seq 7 9) | adorn -pA -sB 1 -pC 3**
16 |   if colNums.len < 1: Help !! "Need >= 1 column; Full $HELP"
17 |   if colNums.outOfOrder: Help !! "Need firstCol < secondCol ...; Full $HELP"
18 |   let origin = if O0: 0 else: origin
19 |   var o = if output.len > 0: open(output, fmWrite) else: stdout
20 |   let sep = initSep delim                                 # Parse delimiter
21 |   let xfm = origin == 0 and colNums.anyNegative           # Transform indexing
22 |   var fs: seq[TextFrame]                                  # Frames
23 |   var ac = colNums; var m0 = 0                            # Absolute Column
24 |   for ln in mSlices(input, sep=rowDlm, eat='\0'):         # RO mmap|1-use slices
25 |     let m = ln.frame(fs, sep)                             # Make frames
26 |     if m == 0: discard o.uriteBuffer(ln.mem, ln.len)
27 |     else:
28 |       if xfm:
29 |         if m != m0:     # Re-use last ac[] if `m` same as last loop
30 |           copyMem ac[0].addr, colNums[0].addr, ac.len*ac[0].sizeof
31 |           for c in mitems ac: (if c < 0: c += (m + 1) div 2)
32 |           m0 = m
33 |       var dc, par = 0   # 3 indices: Raw fs[j], dataCol, param (& ac[that]==dc)
34 |       for j in 0..<m:   #NOTE Lines starting|ending w/delimiters get "".ms ..
35 |         if fs[j].isSep: #  .. frames.  Maybe bubble up something to CLI?
36 |           o.urite fs[j].ms; inc dc
37 |         elif par < ac.len and origin + dc == ac[par]:
38 |           if prefix.len > par and prefix[par].len > 0: o.urite prefix[par]
39 |           o.urite fs[j].ms
40 |           if suffix.len > par and suffix[par].len > 0: o.urite suffix[par]
41 |           inc par
42 |         else: o.urite fs[j].ms
43 |     o.urite "\n"        # Should maybe just extend final MSlice to include line
44 |                         # end, carefully preserving unterminated whole files.
45 | when isMainModule: dispatch adorn, help={
46 |   "colNums": "`colNums` (`origin`-origin column numbers)",
47 |   "origin": "origin for `colNums`; 0 => signed indexing",
48 |   "O0"    : "shorthand for `--origin=0`",
49 |   "prefix": "strings to prepend to listed columns",
50 |   "suffix": "strings to append to listed columns",
51 |   "input" : "path to mmap|read as input; \"\" => stdin",
52 |   "rowDlm": "`input` *row* delimiter character",
53 |   "delim" : "`input` *field* dlm chars; len>0=>fold;w=white",
54 |   "output": "path to write output file; \"\" => stdout"}, short={"output":'o'}
55 | 


--------------------------------------------------------------------------------
/wits.nim:
--------------------------------------------------------------------------------
 1 | when not declared(stdin): import std/[syncio, formatfloat]
 2 | import std/[math, times], adix/[bist, lmbist, embist, xhist1], nio
 3 | 
 4 | var old = 2000.0/2001.0
 5 | xhist1.def FHisto,  lna, exp, Bist[uint32]
 6 | xhist1.def EHisto,  lna, exp, EMBist[float32], Hini=true, old
 7 | 
 8 | xhist1.def     FBist  , lna, exp, Bist[uint32]
 9 | xhist1.defMove MFHisto, FBist, 1, 1
10 | 
11 | xhist1.def     LBist  , lna, exp, LMBist[uint32]
12 | xhist1.defMove MLHisto, LBist, it.t + 1, it.t + 1 - it.win
13 | 
14 | xhist1.def     EBist  , lna, exp, EMBist[float32], Hini=true, old
15 | xhist1.defMove MEHisto, EBist, 1.0, it.xwh.hist.scale(it.win)
16 | 
17 | type Kernel = enum kFlat="flat", kLin="linear", kExp="exponential"
18 | proc wits(input=".Nf", kernel=kFlat, win=60, oldW=0.99, a=1000.0, b=1e18,
19 |           n=32767, time=false, fs: seq[float]) =
20 |   ## Windowed/Weighted Incremental Time Series.  A CLI for maybe-time-windowed &
21 |   ## maybe-time-weighted incremental dynamic histograms of `adix` with related
22 |   ## quantities like Winsorized/trimmed moments.  Presently, this only takes one
23 |   ## binary numeric column (to get experience w/run-time parameterization), but
24 |   ## emits as many `float32` (aka `'f'`) as quantile fractions specified.
25 |   let fs = if fs.len==0: @[0.5] else: fs    # box & whiskers
26 |   var i = nOpen(input)                  #XXX Must decide on & impl a spec lang
27 |   var num: float                        #... for Winsor/trim moments & qtls (&
28 |   var o = newSeq[float32](fs.len)       #... also impl Winsor/trim moments!)
29 | 
30 |   var hMF = initMFHisto(a, b, n, win)
31 |   var hML = initMLHisto(a, b, n, win)
32 |   var hME = initMEHisto(a, b, n, win)   #XXX this is buggy/infinite loops XXX
33 |   var hF  = initFHisto(a,  b, n)
34 |   var hE  = initEHisto(a,  b, n)        #XXX must propagate a decay factor
35 |   let t0  = epochTime()
36 |   var c = 0
37 |   while i.read num:
38 |     if win > 0:
39 |       if   kernel == kFlat: hMF.add num
40 |       elif kernel == kLin : hML.add num
41 |       elif kernel == kExp : hME.add num
42 |     else:
43 |       if   kernel == kFlat: hF.add num
44 |       elif kernel == kExp : hE.add num
45 |     for j, f in fs:
46 |       o[j] = (if win > 0:
47 |                 if   kernel == kFlat: hMF.quantile f
48 |                 elif kernel == kLin : hML.quantile f
49 |                 elif kernel == kExp : hME.quantile f
50 |                 else: NaN
51 |               else:
52 |                 if   kernel == kFlat: hF.quantile f
53 |                 elif kernel == kExp : hE.quantile f
54 |                 else: NaN).float32
55 |     discard stdout.writeBuffer(addr o[0], o[0].sizeof*o.len)
56 |     inc c
57 |   if time: stderr.write (epochTime() - t0)*1e9/c.float, " ns/num\n"
58 | 
59 | when isMainModule: import cligen;include cligen/mergeCfgEnv;dispatch wits,help={
60 |   "fs"    : "quantile fractions; 0.5=median (also default)",
61 |   "input" : "nio input file; extension-only=>stdin",
62 |   "kernel": "time-kernel: flat, linear, exponential",
63 |   "win"   : "window; 0=>running/cumulative(!linear)",
64 |   "oldW"  : "weight on old data for exponential",
65 |   "a"     : "lower bound",
66 |   "b"     : "upper bound",
67 |   "n"   : """cross-sectional `n` for HDR histo; 32767=~
68 | 15/log10(1.001)=>defaults=>0.1% bins""",
69 |   "time"  : "report main loop execution time"}
70 | 


--------------------------------------------------------------------------------
/memlat.nim:
--------------------------------------------------------------------------------
 1 | import std/[random, times, strutils, stats], cligen
 2 | when defined(release): randomize()
 3 | 
 4 | when defined(mt):
 5 |   import mersenne               # Test against a diff PRNG
 6 |   var mt = newMersenneTwister(1234554321)
 7 |   proc rand(max: int): int =    # Inclusive of endpoint
 8 |     int(mt.getNum mod (uint32(max) + 1u32))
 9 | 
10 | proc sattolo_cycle[T](x: var openArray[T]) =
11 |   for i in countdown(x.len - 1, 1):
12 |     swap x[i], x[rand(i - 1)]   # i-1 -> i =>Fisher-Yates
13 | 
14 | proc prepRanElt(x: var seq[int], n: int) =
15 |   for i in 0..<n: x[i]=rand(9)  # 9 keeps sums short
16 | 
17 | var r = initRand(123)           # Speed not bias optimized
18 | proc runRanElt(x: seq[int], nAcc: int): int =
19 |   let mask = uint(x.len - 1)    # Only pow2 lens work!
20 |   when defined(mt):
21 |     for i in 1..nAcc: result += x[int(mt.getNum and mask)]
22 |   else:
23 |     for i in 1..nAcc: result += x[int(r.next.uint and mask)]
24 | 
25 | proc prepShuffle(x: var seq[int], n: int) =
26 |   for i in 0..<n: x[i] = i      # Pop seq with identity
27 |   x.sattolo_cycle               # ..perm & then shuffle
28 | 
29 | proc runShuffle(x: seq[int], nAcc: int): int =
30 |   for i in 1..nAcc: result = x[result]
31 | 
32 | when defined(linux):
33 |   proc getrandom(a: pointer, n: uint64, f: cuint): csize_t
34 |     {.header: "sys/random.h".}  # /dev/urandom on Linux
35 | 
36 | var tru: seq[uint]
37 | proc prepTrueRan(x: var seq[int], n: int) =
38 |   when defined(linux):
39 |     let mask = uint(x.len - 1)  # Only pow2 lens work!
40 |     for i in 0..<n:x[i]=rand(9) # 9 keeps sums short
41 |     tru.setLen n
42 |     discard getrandom(tru[0].addr, n.uint64 shl 3, 0.cuint)
43 |     for i in 0..<n: tru[i] = tru[i] and mask
44 |   else: quit "true random is Linux-only", 1
45 | 
46 | proc runTrue(x: seq[int], nAcc: int): int =
47 |   for i in 1..nAcc: result += x[tru[i]]
48 | 
49 | proc fmt(x=0.0, n=3): auto = formatFloat(x, ffDecimal, n)
50 | 
51 | proc time(prep, run: auto; n, nAcc, avgN, minN: int) =
52 |   var dtMins: RunningStat
53 |   var s = 0                     # Block skipping all work
54 |   var x = newSeq[int](n)
55 |   x.prep n
56 |   for avgIt in 1..avgN:
57 |     var dtMin = float.high
58 |     for minIt in 1..minN:
59 |       let t0 = epochTime()
60 |       s += x.run(nAcc)
61 |       dtMin = min(dtMin, (epochTime() - t0)*1e9/nAcc.float)
62 |     dtMins.push dtMin
63 |   echo "KiB: ", n shr 7, " ns/Access: ", fmt(dtMins.mean),
64 |        " +- ", fmt(dtMins.standardDeviationS), " s:", s
65 | 
66 | type Algo = enum ranElt, shuff, truRan
67 | 
68 | proc lat*(kind=shuff, sizeKiB=1048576, nAcc=1_000_000,
69 |           avgN=4, minN=4, seed=0) =
70 |   ## Time latency three ways. shuffle measures real latency.
71 |   if seed > 0: r = initRand(seed)
72 |   else: randomize(); r = initRand(rand(100000))
73 |   let n = (sizeKiB shl 10) shr 3    # or shl 7
74 |   case kind
75 |   of shuff : time(prepShuffle, runShuffle, n,nAcc,avgN,minN)
76 |   of ranElt: time(prepRanElt , runRanElt , n,nAcc,avgN,minN)
77 |   of truRan: time(prepTrueRan, runTrue   , n,nAcc,avgN,minN)
78 | 
79 | include cligen/mergeCfgEnv
80 | dispatch(lat, help = {"kind": "shuff: chase ran perm\n" &
81 |                               "ranElt: access ran elt\n" &
82 |                               "truRan: pre-read getrandom",
83 |                       "seed": "0=>random, else set" })
84 | 


--------------------------------------------------------------------------------
/man/catz.1:
--------------------------------------------------------------------------------
 1 | .\" -*- nroff -*-
 2 | .TH CATZ 1 "July 2002" "CB Utils"
 3 | 
 4 | .SH NAME
 5 | 
 6 | catz \- ``cat'' for compressed files
 7 | 
 8 | .SH SYNOPSIS
 9 | 
10 | .nf
11 | .B catz \fI[ -d ] [ -v STDIN_NAME_VAR ] [ FILES [ "-" ] [ < FILE ] ]\fR
12 | .fi
13 | 
14 | .SH DESCRIPTION
15 | 
16 | .B catz
17 | is a replacement for \fBcat\fR(1), but \fIcatz\fR decodes encoded files,
18 | avoiding temporary storage like \fBzcat\fR(1) but for many possible formats.
19 | 
20 | .SH USAGE
21 | 
22 | This program is careful to go by magic numbers in headers of compressed or
23 | otherwise encoded files when no pathname extension matches.  NOTE: in a shell
24 | context, it is easy to say "\fIcatz\fR < input.xz" to force ignoring extensions.
25 | 
26 | The flag "-v" indicates \fISTDIN_NAME_VAR \fR, an environment variable that
27 | the invoker sets to the pathname for stdin.  Other pathnames are available
28 | from the \fBcatz\fR argument list.  Pathnames are only really needed if magic
29 | number recognition would fail or if selected decoder programs need a pathname.
30 | 
31 | Just as with \fBcat\fR(1), a lone minus sign ("-") filename indicates
32 | how the standard input stream should be ordered within the catenation.
33 | 
34 | Currently, \fBcatz\fR has decoders for
35 | \fBzip\fR,
36 | \fBgzip\fR,
37 | \fBcompress\fR,
38 | \fBbzip\fR,
39 | \fBbzip2\fR,
40 | \fBlzop\fR,
41 | \fBlzma\fR (lzma-utils version),
42 | \fBxz\fR or \fBpixz\fR,
43 | \fBplzip\fR,
44 | \fBlz4\fR, and
45 | \fBzstd\fR and the document formats
46 | \fB.pdf\fR,
47 | \fB.ps*\fR, and
48 | \fB.htm*\fR
49 | 
50 | NOTE: Due to limitations in utilities for the format, zip files given as
51 | paths will have \fBall\fR members catenated, while only the \fBfirst\fR
52 | member is extracted from unseekable zip inputs.
53 | 
54 | A leading "-d" option is ignored for compatibility with GNU tar -I.
55 | 
56 | .SH IMPL NOTES
57 | 
58 | The one-file, seekable-input case allows simple replacement of the
59 | \fBcatz\fR process with a decoder process.  The \fBexec\fR(2) inherits
60 | the needed file descriptors.  This avoids any unnecessary context
61 | switching or copying.
62 | 
63 | The N-file, named-argument case requires a two-process at a time system
64 | in order to generate an ordered, integrated output stream.  \fBcatz\fR
65 | uses a forked version of itself to read output from the read side of a
66 | pipe, copying it to the original stdout.  All decoder programs send
67 | their output to the write-side.
68 | 
69 | If no path name is available (e.g., stdin) or if the pathname does not
70 | have a standard filename extension for compressed files, then a magic
71 | number \fImust\fR be read to identify the decoder to be used.  A decoder
72 | itself will (typically) \fIalso\fR insist on this magic number being
73 | present.  For seekable input streams, \fBlseek\fR(2) can restore the
74 | file pointer and the decoder process will be happy.  For unseekable
75 | input streams, we must fork and exec a new process to put the header
76 | back into place for a translator.  In short, \fBcat foo|catz\fR should
77 | work fine.  This is unlikely true of any/many other auto-decoder.
78 | 
79 | .SH AUTHOR
80 | 
81 | C.Blake <charlechaud@gmail.com> conceived & wrote \fBcatz\fR.
82 | 
83 | .SH BUGS
84 | 
85 | Please report them!
86 | 
87 | .SH SEE ALSO
88 | 
89 | .BR cat (1) ", zcat" (1) ", bzip" (1) ", bzip2" (1) ", zip" (1) ", compress" (1) ", grep"(1)
90 | 


--------------------------------------------------------------------------------
/doc/thermctl.md:
--------------------------------------------------------------------------------
 1 | # Motivation
 2 | 
 3 | OS kernels can down clock CPUs, but are often not aggressive enough to block
 4 | thermal shutdown.  This controller can sometimes do better.
 5 | 
 6 | This approach is limited, but still useful for me (e.g. on old laptops with
 7 | failing fans &| overclocked gamer rigs that only overheat with just the right L3
 8 | cache loads, often from g++ compiles).
 9 | 
10 | # Operation
11 | 
12 | At CPU temperature `T > temp.b`, `thermctl` sends SIGSTOP to all runnable PIDs
13 | & at `T <= temp.a`, it sends SIGCONT to all stopped PIDs.
14 | 
15 | # Limitations
16 | 
17 | Pausing can fail to block future work (loadAvg-targeting work ctl, permissions,
18 | rarely scheduled dispatchers, ..).  Operation can also undesirably SIGCONT jobs
19 | stopped in shells with job control.
20 | 
21 | # Temperature query & Parameter tuning
22 | 
23 | ```
24 |   thermctl [optional-params] 
25 | OS kernels can down clock CPUs but may not be aggressive enough to block thermal
26 | shutdown. This controller can sometimes do better. At T > temp.b, it SIGSTOPs
27 | runnable PIDs & at T <= temp.a, it SIGCONTs PIDs it stopped.
28 | 
29 | NOTE: Pausing can fail to block future work (loadAvg-targeting dispatch, perms,
30 | hot procs often put to sleep just before scheduling thermctl itself, etc.).
31 | So, this approach is limited, but maybe useful (e.g. on old laptops with failing
32 | fans &| overclocked gamer rigs).
33 | Options:
34 |   -q=, --qry=   string  "auto"     auto:Intel?turbostat -sCPU,CoreTmp:cpuTemp
35 |   -d=, --delay= float   1.0        $1 param to qry (likely a delay)
36 |   -m=, --match= string  "."        pattern selecting cpuTemp line
37 |   -t=, --temp=  Slice   80.0..90.0 > b => pause; < a => resume
38 |   -l=, --log=   string  ""         path to log control transitions to
39 |   -i=, --incl=  strings ffmpeg     cmd names to always SIGSTOP if hot
40 |   -e=, --excl=  strings thermctl   cmd names to never SIGSTOP
41 | ```
42 | 
43 | # A Few More Details
44 | 
45 | The idea of `--incl` is to avoid a noticed problem where main heat generating
46 | processes are suspended very briefly just before switching to turbostat/thermctl
47 | itself.  That means they are not marked Runnable (& so not sent SIGSTOP) for
48 | maybe several query cycles in a row.  I saw as many as 4..6 sequentially on a
49 | 4-core.  That allows temperature to rise too quickly.  A good rule of thumb for
50 | this parameter might be processes that tend to be long-lived and CPU-heavy that
51 | you are sure it's ok to SIGSTOP & SIGCONT like ffmpeg or gcc.
52 | 
53 | Note that `turbostat` is distributed with Linux kernel sources.  So, if you
54 | build your own kernels you can usually get it with
55 | ```
56 | make -C /usr/src/linux/tools turbostat_install WERROR=0 HOME=/usr/local
57 | ```
58 | For AMD CPUs you will probably need some kind of wrapper program to post-process
59 | the output of `lmsensors` (e.g. `sensors k10temp-pci-00cb k10temp-pci-00c3`) run
60 | in a loop.
61 | 
62 | Physics-minded folk might worry that turbostat itself adds to CPU load pseudo-
63 | Heisenberg-style which is true, but also a small effect.  I see 0.02% usage by
64 | turbostat with 1 second delays on a laptop with a 12 year old CPU. (The small
65 | effect can, however, become a large battery drain effect if temperature polling
66 | activity prevents a hard sleep mode.)
67 | 
68 | Anyway, I usually launch `thermctl -l/var/log/therm` at system boot.
69 | 


--------------------------------------------------------------------------------
/doc/eve.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ==========
 3 | One often wants to extrapolate from a finite sample to the true max|min.  When
 4 | benchmarking, one might want to [filter out system noise](doc/tim.md) { which
 5 | has some unknown distribution, but is even worse non-stationary/not IID :-( }.
 6 | Another example is in density estimation such as the "clip" or "cut off" values
 7 | for a simple histogram or KDE.
 8 | 
 9 | Solving foundational problems like "What background activity competes on time
10 | sharing systems, how stationary is it, etc.?", is hard.  However, it is not so
11 | hard to estimate true max|min's (& errors of said estimates) better than sample
12 | extremes of ginormous samples (if one views this as a performance optimization).
13 | Also, one cannot always sample more data - sometimes that is crazy expensive &
14 | limited by "dollars or years per sample" effects.
15 | 
16 | Approach
17 | ========
18 | The paper initially inspiring this utility is openly available at
19 | https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1433242 .  That block maxima
20 | idea (& implementation) has been superceded by the *far* more reliable more
21 | peaks-over-threshold (POT) school of Portuguese Extremists:
22 | https://arxiv.org/abs/1412.3972
23 | 
24 | Standard errors for the estimate of the true population extreme are estimated by
25 | a bootstrap which should make them ok, but full disclosure I am still working on
26 | this aspect.
27 | 
28 | Usage
29 | =====
30 | ```
31 |   eve [optional-params] 1-D / univariate data ...
32 | 
33 | Extreme Value Estimate by FragaAlves&Neves2017 Estimator for Right Endpoint
34 | method with bootstrapped standard error.  E.g.: eve -l $(repeat 99 tmIt).  This
35 | only assumes IID samples (which can FAIL for sequential timings!) and checks
36 | that spacings are not consistent with an infinite tail.
37 | 
38 |   -l, --low       bool      false flip input to estimate Left Endpoint
39 |   -b=, --boot=    int       32    number of bootstrap replications
40 |   -B=, --BLimit=  int       5     re-tries per replication to get not-long
41 |   -e=, --emit=    set(Emit) bound tail  - verbose long-tail test
42 |                                   bound - bound when short-tailed
43 |   -a=, --aFinite= float     0.05  tail index > 0 acceptance significance
44 |   -k=, --k=       float     -0.5  2k=num of order statistics; <0 => = n^|k|
45 |   -K=, --KMax=    int       50    biggest k; FA,N2017 suggests ~50..100
46 |   -s=, --shift=   float     0.0   shift MAX by this many sigma (finite bias)
47 | ```
48 | 
49 | Some Subtleties
50 | ===============
51 | The idea here does not make sense if extreme data spacings suggest an infinite
52 | rather than finite tail.  So, we are careful to rule this out at alpha level
53 | `aFinite` in both the main estimator and the bootstrap re-sampling.
54 | 
55 | The bootstrap preserves the sample-max to aid clustering of new estimates around
56 | that best known limit.  It also re-samples only the data that contributes to the
57 | estimate - and also only from that portion of the tail.  This seems to me the
58 | most coherent approach.
59 | 
60 | POT methods require that k/n->0 as n grows.  But we want a good estimate.  So,
61 | we want k big.  However, for the estimator formula, k cannot be > n/2.  So,
62 | internally, `eve` uses `k = min(n/2 - 1, n^kPow)`.  This should discard most
63 | data above (below for `-l`) the median or much more if you use a lower `kPow`.
64 | 


--------------------------------------------------------------------------------
/dirt.nim:
--------------------------------------------------------------------------------
 1 | # NOTE: Needs `devel` / >= 1.4.0 for `HeapQueue[T].find`.
 2 | when not declared(File): import std/syncio
 3 | include cligen/unsafeAddr
 4 | import std/[heapqueue,sets,posix,strformat], cligen,cligen/[dents,posixUt,statx]
 5 | 
 6 | proc setMTime*(dfd: cint; path: string; m0, m1: StatxTs;
 7 |                verb: File=nil, err=stderr, dryRun=false): int =
 8 |   ## Set the file m1)odification time of ``(dfd,path[nmAt..^1])`` only if not
 9 |   ## equal to the original ``m0`` times with typical command utility ``verb``,
10 |   ## ``err``, ``dryRun`` parameters, returning if the call must/did occur.
11 |   if m0 == m1: return 0
12 |   result = 1
13 |   let omit = Timespec(tv_sec: 0.Time, tv_nsec: UTIME_OMIT)
14 |   var ftms = [ omit, toTimespec(m1)]
15 |   verb.log &"futimens({dfd}({path}), [OMIT, {$ftms[1]}])\n"
16 |   if not dryRun and futimens(dfd, ftms) != 0:
17 |     err.log &"futimens({dfd}({path}): {strerror(errno)}\n"
18 | 
19 | proc dirt*(roots: seq[string], verbose=false, quiet=false, dryRun=false,
20 |            prune: seq[string] = @[], xdev=false): int =
21 |   ## Set mtimes of dirs under ``roots`` to mtime of its newest kid.  This makes
22 |   ## directory mtimes "represent" content age at the expense of erasing evidence
23 |   ## of change which can be nice for time-sorted ls in some archival file areas.
24 |   if roots.len == 0:  # For safety, do nothing if user specifies empty `paths`
25 |     return
26 |   let prune = toHashSet(prune)
27 |   let verb = if dryRun or verbose: stdout else: nil
28 |   let err  = if quiet: nil else: stderr
29 |   var n    = 0
30 |   for root in roots:
31 |     var dirs = @[initHeapQueue[int64]()]      # HeapQueue.pop is *MINIMUM*
32 |     forPath(root, 0, lstats=true, false, xdev, false, err,
33 |             depth, path, nmAt, ino, dt, lSt, dfd, dst, did):
34 |       if dt != DT_LNK:                        # Always:
35 |         dirs[^1].push -toInt64(lSt.stx_mtime) #   Track max age
36 |     do:                                       # Pre-recurse:
37 |       if path[nmAt..^1] in prune:
38 |         verb.log &"pruning at: {path}\n"
39 |         discard dirs[^1].pop
40 |         continue
41 |       dirs.add initHeapQueue[int64]()         #   Add new queue for kid
42 |       let dmt = lSt.stx_mtime                 #   Save old mtime
43 |     do:                                       # Post-recurse:
44 |       if dirs.len > 0:
45 |         if dirs[^1].len > 0:                  #   Deepest queue non-empty
46 |           let kidTm = dirs[^1].pop            #   Get & use max kid time stamp
47 |           n += setMTime(dfd, path, dmt, toStatxTs(-kidTm), verb, err, dryRun)
48 |           if dirs.len > 1:                    #   ASSUME setMTime SUCCEEDS
49 |             dirs[^2].del  dirs[^2].find(-toInt64(dmt)) #XXX BST/BTreeQ 4big dirs
50 |             dirs[^2].push kidTm               #   reflect dmt -> kidTm in parent
51 |         discard dirs.pop                      #   discard kid queue
52 |     do: recFailDefault("dirt", path)          # Cannot recurse
53 |   return min(127, n)
54 | 
55 | when isMainModule:
56 |   include cligen/mergeCfgEnv; dispatch(dirt, short={"dry-run": 'n'}, help={
57 |              "verbose": "print `utimes` calls as they happen",
58 |              "quiet"  : "suppress most OS error messages",
59 |              "dry-run": "only print what system calls are needed",
60 |              "prune"  : "prune exactly matching paths from recursion",
61 |              "xdev"   : "block recursion across device boundaries" })
62 | 


--------------------------------------------------------------------------------
/doc/pid2.md:
--------------------------------------------------------------------------------
 1 | # Basics
 2 | 
 3 | Usage: (***NOT*** a cligen utility)
 4 | ```
 5 | pid2 [integer(300)]
 6 | ```
 7 | The [] notation here indicates optionality and default is in ().
 8 | 
 9 | This program just does `vfork()` as fast as possible to wrap a Linux process
10 | table until the target Process ID integer is reached.
11 | 
12 | # Motivation
13 | 
14 | PID-wrapping was made famous more as a hacking tool for programs which foolishly
15 | assume the next PID is neither predictable nor re-used (e.g. a shell /tmp/foo.$$
16 | construct).
17 | 
18 | I am publishing it here mostly as an example of a big effect that OS scheduling
19 | affinity for a particular CPU can make.  It can also sometimes be nice to
20 | "position" within the process table if you often do PID-sorted process table
21 | listings..(e.g. to group all your xterms or shells together).
22 | 
23 | # Speed-up
24 | 
25 | For even greater speed, you can do this in parallel with each pid2 pinned
26 | to different CPUs, such as a wrapper script (called, say, `2pid`):
27 | 
28 | ```sh
29 | : "${j:=$(nproc)}"
30 | for k in `seq 0 "$((j-1))"`
31 | do pid2 "$@" "$k" & done
32 | wait
33 | ```
34 | Even with `2^22` pids (default lately), this can take under 8 sec on my laptop.
35 | Most people I know are unfamiliar with how fast the PID counter can advance
36 | under heavy fork load.
37 | 
38 | # Regrets, I have a few..
39 | 
40 | In 1979 when Berkeley introduced `vfork` on the VAX 11/780, they should have
41 | made PIDs 32-bits.  At the time, it was about 3..8ms to `vfork` meaning 32768
42 | wraparounds could take just a few minutes.[^1]  Meanwhile, 32-bit would have
43 | been 6..18 months doing almost nothing but `vfork` -- likely easily noticed /
44 | trapped activity right up until about the 64-bit moves in the late 90s.  PIDs
45 | then could have been "unique ids" from the dawn of Unix & very likely moved to
46 | 64-bit ids by the late 90s which (in 2025) would still be fine unique IDs for
47 | the foreseeable future.  Oh well!
48 | 
49 | As it is, today a `pid_max<default` on Linux of 10,000 allows lapping tables in
50 | as little as 20 ms.  Even a default `pid_max` of 32768 yields ~17 laps/sec -
51 | likely not even reliably trapped by logs with 1-second resolution timestamps.
52 | 
53 | Meanwhile, Linux, at least, grew hackish yet still racy workarounds like pidfd's
54 | that are at best a WIP (eg. no scheduler interfaces by pidfd; *Only* a pidfd API
55 | for `process_madvise`).  pidfdfs itself caused [two user-space regressions in
56 | 2024](https://lwn.net/Articles/976125/).  And on & on.  Those new APIs are also
57 | unlikely to ever be portable.  And so, re-used small PID chaos continues.[^2]
58 | 
59 | [^1]: Arguably, the Bell Labs guys at around 20..50 ms/fork faced wrapping in
60 | as little as 10 hours and were already lame for making PIDs signed 16 bit nums.
61 | 32-bit longs also date back to the dawn of Unix & C.  There is plenty of blame
62 | to go around, I suppose.
63 | 
64 | [^2]: 32-bits in the 70s & 80s and 64-bits today might still remain vulnerable
65 | to PID *predictability* problems, of course, for which the simplest compatible
66 | solution is to just make PIDs both larger/un-reused *and* unpredictable.  Int64-
67 | keyed hash tables and `/dev/random` are needed anyway.  It's conceivable some
68 | non-statistical-*guarantee*-to-be-unused-in-the-past-"many IDs" might also be a
69 | useful property, but as this program shows - at billions per hour, in order to
70 | truly proxy for useful real-time gaps that "many IDs" could be surprisingly big!
71 | 


--------------------------------------------------------------------------------
/doc/noa.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ==========
 3 | Long ago Unix (in `getopt`) and later POSIX standardized upon a `"--"` token as
 4 | a separator between command options and ordinary arguments.  However, many
 5 | commands on Unix (and elsewhere!) have behavior sensitive only to the location
 6 | of ordinary arguments.  E.g., the final argument of `cp` & `mv` behave very
 7 | differently from all the others.  Together these two features complicate writing
 8 | reliable scripts that wrap such commands to, say, create a target directory only
 9 | if it does not already exist.
10 | 
11 | This is where this simple program called `noa` comes in to simplify life.[^1]
12 | `noa` extracts command parameters by index over only the non-option arguments.
13 | Here indices are signed like Python with <0 indicating "backwards from the end".
14 | Some programs (to be more `xargs` friendly, say) may take similar dependencies
15 | like `cp` & `mv` but at index 0.  But `noa` is a general non-option indexer.
16 | 
17 | Usage
18 | =====
19 | ```
20 | (n)on-(o)ption (a)rgument usage:
21 | 
22 |   noa {index} options-and-args
23 | 
24 | E.g.: noa -1 cp -a foo -f -- /exists/maybe/missing
25 | emits "/exists/maybe/missing" no matter where "--" is.
26 | Can be nice in scripts to e.g. ensure must-haves exist.
27 | ```
28 | Note that `noa -1 ...` even works with a trailing non-argument `"--"` separator.
29 | 
30 | Example Script
31 | ==============
32 | One example application of this non-option argument indexing idea is a simple
33 | but somewhat general meta-wrapper script below named `ea`.  This makes it easy
34 | to issue
35 | ```
36 | ea mv -v -n -- foo /exists/maybe/missing
37 | ```
38 | and have it work even if `/exists/maybe/missing` does not exist yet with just a
39 | few extra keystrokes.  Here is a somewhat careful/complete `ea` in POSIX shell
40 | in terms of `noa`:
41 | 
42 | ```sh
43 | #!/bin/sh
44 | set -e
45 | : "${idx:=-1}"              # final non-option parameter targets are frequent
46 | : "${make:=mkdir -p --}"    # mkdir -p is often an ok idea for missing targets
47 | if [ $# -lt 1 ]; then
48 |     cat <<EOF
49 | E)nsure A)rgument Usage:
50 |   [idx=-1] [make="mkdir -p --"] [v=] ea {cmd needing an argument to exist}
51 | where
52 |   \$idx  is a Python like 0-origin or length-relative negative index
53 |   \$make is a prefix to a command to create the argument if missing
54 |   \$v    set to anything means echo \$make before running it
55 | E.g.:
56 |   ea cp -a foo -f -- /exists/maybe/missing
57 |   v= ea mv -v -n -- foo /exists/maybe/missing
58 | EOF
59 |     exit 1
60 | fi
61 | if ! type noa >/dev/null 2>&1; then
62 |     echo 1>&2 'Need to install `noa` from https://github.com/c-blake/bu'
63 |     exit 2
64 | fi
65 | noa "$idx" "$@" | (         # Carefully d=`noa $idx $*`; [ -e $d ] || $make $d
66 |     n='
67 | '                           # read -rd works in Bash|Zsh, but not POSIX.  So,
68 |     while IFS= read -r line #..loop which works for all text but input with no
69 |     do d="$d${line}$n"      #..final newline where we add one "erroneously".
70 |     done; d=${d%?}          # Chop extra newline
71 |     [ -e "$d" ] || {        # not -d since some cmds have -f to replace files
72 |         [ -n "${v+ANY}" ] && printf '%s\n' "$make $d"
73 |         $make "$d"          # Make needed argument $d
74 |     } )
75 | exec "$@"                   # Then just run passed command
76 | ```
77 | 
78 | [^1]: It could likely be written reliably in pure shell (have at it!), but `noa`
79 | is also very simple in lower level languages (even as low as ANSI C).
80 | 


--------------------------------------------------------------------------------
/topn.nim:
--------------------------------------------------------------------------------
 1 | import cligen, cligen/[mfile, mslice, osUt], adix/topk
 2 | from std/strutils as su import nil
 3 | when not declared(stderr): import std/syncio
 4 | 
 5 | proc pyIx(x: openArray[MSlice], i: int): MSlice = x[if i < 0: i + x.len else: i]
 6 | proc pyIx(x: openArray[MSlice], s: Slice[int]): MSlice =
 7 |   let a = if s.a < 0: s.a + x.len else: s.a
 8 |   let b = min(x.len - 1, if s.b < 0: s.b + x.len else: s.b) # b < a | OutOfB: ""
 9 |   if b < a or a + 1 > x.len or b + 1 > x.len: result.mem = nil; result.len = 0
10 |   else: result.mem = x[a].mem; result.len = x[b].mem +! x[b].len -! x[a].mem
11 | 
12 | proc topn*(input="/dev/stdin", delim=" ", mxCol=0, n=0, order=Cheap,
13 |            partn=Partn.last, specs: seq[string]) =
14 |   ## Write spec'd cols of topN-rows-by-various-other-cols to outFile's.  A spec
15 |   ## is `<N>[,<keyCol>(0)[,outCol(same)[,outFile(stdout)]]]`. ColNos are Py-like
16 |   ## 0-origin,signed.  *outCol* can be an A:B exclusive or A..B slice.  Algo is
17 |   ## fast one-pass over (mmap|stream) input.  Simple & Fancy E.g.s:
18 |   ##  ``find . -type f -printf '%C@ %p\\n' | topn -m1 5``  # newest 5 by ctime
19 |   ##  ``topn 9,1,-1,x`` # writes last col of top 9-by-col-1 rows to file x.
20 |   ## If `n!=0` then `<N>` can end in '%' to instead mean *100\*pct/n* rows.
21 |   let m = specs.len                     # Handle all `m` sort orders in one pass
22 |   if m < 1: stderr.write "No specs requested.  -h for help.\n"; return
23 |   var keyC = newSeq[int](m)
24 |   var nTop = newSeq[int](m)
25 |   var oCol = newSeq[Slice[int]](m)
26 |   var oFil = newSeq[File](m)
27 |   for i, spec in specs:                 # Parse key-output specifiers
28 |     let params = su.split(spec, ',')
29 |     if params.len < 1:
30 |       stderr.write "too few sub-params in spec ", spec, "\n"; continue
31 |     let p0 = params[0]
32 |     nTop[i] = if su.endsWith(p0, '%'): su.parseInt(p0[0..^2]) * n div 100
33 |               else: su.parseInt(p0)
34 |     nTop[i] = max(1, nTop[i])
35 |     keyC[i] = if params.len > 1: su.parseInt(params[1]) else: 0
36 |     oCol[i]=if params.len>2:parseHSlice[int,int](params[2])else:keyC[i]..keyC[i]
37 |     oFil[i] = if params.len > 3: open(params[3], fmWrite) else: stdout
38 |   let sep = initSep(delim)              # Init into-seq[MSlice] splitter
39 |   var row: seq[MSlice] = @[]
40 |   let mf = mopen(input)
41 | 
42 |   template sweep(mf, T, i, outsVal) {.dirty.} =
43 |     type Rec = tuple[val: float32; outs: T]
44 |     var tops: seq[TopK[Rec]]
45 |     for i in 0 ..< m: tops.add initTopK[Rec](nTop[i], partn)
46 |     var rec: Rec
47 |     for line in mSlices(mf, eat='\0'):  # RO mmap | slices from stdio
48 |       sep.split(line, row, mxCol)       # split into columns
49 |       for i in 0 ..< m:                 # update our 1-to-several `TopK`
50 |         rec.val  = parseFloat(pyIx(row, keyC[i])).float32 # tuned4 float rarity
51 |         rec.outs = outsVal
52 |         tops[i].push rec.move           # `$pyIx(..)` in outsVal maybe made cpy
53 |     for i in 0 ..< m:                   # Emit like sort -gk<C+1>|tail -n<n>|tac
54 |       for e in tops[i].maybeOrdered(order): oFil[i].urite e.outs, "\n"
55 |       if not oFil[i].isNil and oFil[i] != stdout: oFil[i].close
56 | 
57 |   # rec.outs become either GC'd `string` or no-need-to-GC `MSlice`
58 |   if mf.mem.isNil: sweep(input, string, i, $pyIx(row, oCol[i]))
59 |   else           : sweep(mf   , MSlice, i,  pyIx(row, oCol[i]))
60 | 
61 | when isMainModule: include cligen/mergeCfgEnv; dispatch topn, help={
62 |   "input": "input data path",
63 |   "delim": "delimiting (repeats=>any num; \"white\")",
64 |   "mxCol": "max columns in input to parse",
65 |   "n"    : "scale for '%' amounts",
66 |   "partn": "partition: last, ran",
67 |   "order": "order: Cheap, Ascending, Descending"}
68 | 


--------------------------------------------------------------------------------
/doc/dirq.md:
--------------------------------------------------------------------------------
 1 | Motivation
 2 | ----------
 3 | The idea is to use a directory as a work queue with `inotify` as a subscription
 4 | service to the queue.  Any process with write perms can drop a file into a
 5 | watched directory to trigger activity.
 6 | 
 7 | This can also be done with atomic rename, frequent stat(dir).st\_mtime, readdir,
 8 | but that is less efficient.  It can happen that a program you do not control
 9 | wants to just write & close, not rename into the directory.  `inotify` lets you
10 | detect the union of both events simply for a more general facility.
11 | 
12 | As a very concrete example, you could direct your web browser to save new files
13 | to "$HOME/dl" and leave a `dirq ~/dl got-dl` instance running.  The `got-dl`
14 | script/program can recognize various kinds of files and do appropriate stuff.
15 | 
16 | "Stuff" may be `mv` \*.pdf files to "~/doc/" or starting a \*.torrent download,
17 | making a simple browser click a gateway to auto-activity.  You could rename a
18 | browser fetched file to take out spaces/other things that may need annoying
19 | shell quoting.  You could (de)compress a file/otherwise re-encode it or even
20 | move it to another `dirq`-watched directory after processing.  Queues have many
21 | uses.  The only limit is your imagination. :)
22 | 
23 | There may be other interesting setups with other event classes.
24 | 
25 | Usage
26 | -----
27 | ```
28 |   dirq [optional-params] [cmdPrefix: string...]
29 | 
30 | chdir(dir) & wait for events to occur on it.  For each delivered event, run
31 | cmdPrefix NAME where NAME is the filename (NOT full path) delivered.
32 | 
33 | Handleable events are:
34 |   access    attrib  modify   open   closeWrite closeNoWrite
35 |   movedFrom movedTo moveSelf create delete     deleteSelf
36 | 
37 | Default events closeWrite (any writable fd-close) | movedTo (renamed into dir)
38 | usually signal that NAME is ready as an input file.
39 | 
40 | dirq can monitor & dispatch for many dirs at once with repeated --dir=A cmdPfx
41 | for A --dir=B cmdPfx for A patterns; events & wait are global.
42 | 
43 | Options:
44 |   -e=, --events= set(Event) closeWrite,movedTo inotify event types to use
45 |   -w, --wait     bool       false              wait4(kid) until re-launch
46 |   -d=, --dir=    string     "."                directory to watch
47 | ```
48 | 
49 | History/Cultural
50 | ----------------
51 | Circa 2006, Linux added a `man 7 inotify` system that obsoleted an inefficient
52 | & limited (*must* rename into) approach for this.  So, I did a C program (had to
53 | use `syscall(__NR_inotify_add_watch, ..)` since it took glibc a while to wrap).
54 | `dirq` is a Nim port of this C program.  (I pronounce `dirq` like "Dirk" myself,
55 | but you can do as you like.)
56 | 
57 | Future Work
58 | -----------
59 | I don't use BSD these days, but KQueue and similar facilities could allow this
60 | program to be a kind of portable command entry point for this limited subset of
61 | functionality.  Maybe something like it already exists?  I believe kqueue file
62 | monitoring pre-dates Linux inotify.  Similarly, a few events like `movedTo` can
63 | be handled portably with a stat-loop, re-scanning directories upon mtime update.
64 | 
65 | Related Work
66 | ------------
67 | `inotifywait` of [inotify-tools](https://github.com/inotify-tools/inotify-tools)
68 | does allow this, but a "command wrapper" use concept makes working with general
69 | filenames easier.  Specifically, `dirq` simply populates the last `argv[]` slot
70 | with the filename received from the kernel & runs your program.  This eliminates
71 | both quoting & parsing concerns.  With `inotifywait` you would have to format
72 | things in a reliably parsable way which is yet another convention to fret about.
73 | 
74 | Bell Labs Plan 9 has a not dissimilar concept called "plumb"/"plumbers" but I
75 | believe these require a bit more cooperation.
76 | 


--------------------------------------------------------------------------------
/doc/cbtm.md:
--------------------------------------------------------------------------------
 1 | WARNING
 2 | -------
 3 | Use at your own risk as with any tool that uses `xfs_db` | `debugfs`.  No
 4 | warranty, express or implied.
 5 | 
 6 | Motivation
 7 | ----------
 8 | 
 9 | Hardware hosting filesystems can change.  It can be nice to save & restore ctime
10 | & btime rather than always wiping file life cycle history.
11 | 
12 | There is no OS/FS-portable way to do this.  (settimeofday can do ctime, but with
13 | system-disruptive time storms.)  This utility fills the gap for XFS/ext4
14 | on Linux.
15 | 
16 | Basic usage for an XFS on DEV mounted at MNT
17 | --------------------------------------------
18 | ```
19 | cbtm save /MNT >MNT.stDat
20 | ```
21 | This basically just saves all the statx data (which tends to compress very
22 | well if you want).
23 | 
24 | Then, sometime later, on e.g. a brand new device:
25 | ```
26 | cbtm filt -qr/MNT <MNT.stDat | cbtm resto >CMDS
27 | umount /MNT
28 | xfs_db -x DEV <CMDS >CMDS.log 2>&1
29 | ```
30 | Here, `xfs_db` does the hard work.  NOTE: Does not yet work for `ext4`.
31 | 
32 | WARNING AGAIN
33 | -------------
34 | Note that until you become comfortable with this tool, you should look over
35 | generated `CMDS` and perhaps manually run just the first few against a (backed
36 | up!) FS.  { While `xfs_db` does have a `path` command as well as `inode` and
37 | even has escape/quote-sensitive tokenization code, unfortunately it does not
38 | de-escape or de-quote things before internal use.  So, one must use inode to get
39 | pathname generality.  Well, or patch `xfsprogs`.  But `stat(1)` does report
40 | inodes for you. }
41 | 
42 | More details
43 | ------------
44 | ```
45 | Usage:
46 |   cbtm {SUBCMD}  [sub-command options & parameters]
47 | where {SUBCMD} is one of:
48 |   help     print comprehensive or per-cmd help
49 |   save     Save all statx metadata for all paths under roots to output.
50 |   print    Print metadata stored in input in a human-readable format.
51 |   filter   Remove input records if source & target differ|same [bc]time.
52 |   restore  Generate commands to restore [cb]time input
53 | 
54 | save [optional-params] [roots: string...]
55 |   Save all statx metadata for all paths under roots to output.
56 | 
57 |   Output format is back-to-back (statx, 2B-pathLen, NUL-term path) records.
58 |   To be more selective than full recursion on roots, you can use the output of
59 |   find -print[0] if you like (& file=/dev/stdin to avoid temp files).
60 |     -f=, --file=   string ""            optional input ("-"|!tty=stdin)
61 |     -d=, --delim=  char   '\n'          input file record delimiter
62 |     -o=, --output= string "/dev/stdout" output file
63 |     -q, --quiet    bool   false         suppress most OS error messages
64 | 
65 | print [optional-params]
66 |   Print metadata stored in input in a human-readable format.
67 |     -i=, --input= string "/dev/stdin" metadata archive/backup path
68 |     -d=, --delim= string "\t"         set delim
69 | 
70 | filter [optional-params] PCRE path patterns to *INCLUDE*
71 |   Remove input records if source & target differ|same [bc]time.
72 |     -i=, --input=  string     "/dev/stdin"  metadata archive/backup path
73 |     -o=, --output= string     "/dev/stdout" output file
74 |     -r=, --root=   string     ""            target FS root
75 |     -q, --quiet    bool       false         do not stderr.emit mismatches
76 |     -m=, --match=  set(Match) {}            {}=>all else: name, size,perm,
77 |                                             owner,links,mtime,timeSame, re
78 |     -d=, --drop=   string     ""            PCRE path pattern to EXCLUDE
79 | 
80 | restore [optional-params]
81 |   Generate commands to restore [cb]time input
82 |     -i=, --input= string "/dev/stdin" metadata archive/backup path
83 |     -k=, --kind=  FSKind xfs          xfs: gen for xfs_db -x myImage
84 |                                       ext4: gen for debugfs -w myImage
85 | ```
86 | 


--------------------------------------------------------------------------------
/lncs.nim:
--------------------------------------------------------------------------------
 1 | when not declared(stderr): import std/syncio
 2 | include cligen/unsafeAddr
 3 | import std/[posix, tables, strutils], cligen, cligen/[osUt, posixUt, dents]
 4 | 
 5 | type LncsLog* = enum osErr, summary         ## A micro logging system
 6 | type DevIno = tuple[dev: Dev; ino: uint64]
 7 | 
 8 | proc lncs(paths: seq[string], file="", dlm='\n', recurse=1, chase=false, # in
 9 |           xdev=false, eof0=false, kinds={fkFile}, minSize=0, thresh=2,   # filt
10 |           quiet=false, log={osErr},                                      # log
11 |           nEcho= -1, noDot=false, outDlm="\t", endOut="\n"): int =       # out
12 |   ## Print hard link clusters within paths of maybe-chasing, maybe-recursive
13 |   ## closure of the UNION of ``roots`` and optional ``dlm``-delimited input
14 |   ## ``file`` (stdin if "-"|if "" & stdin not a tty).  Exit code is min(255,
15 |   ## num.clusters >= thresh).  Eg., ``find -print0|lncs -d\\0 -o '' -e ''``
16 |   ## makes a report reliably splittable on double-NUL then single-NUL for
17 |   ## fully general path names while ``lncs -ls -n0 -r0 /`` echoes a summary.
18 |   let outDlm = if outDlm.len > 0: outDlm else: "\x00"
19 |   let endOut = if endOut.len > 0: endOut else: "\x00"
20 |   var nPaths, nSet, nFile: int              #Track some statistics
21 |   var tab = initTable[DevIno, seq[string]](512)
22 |   let err = if quiet: nil else: stderr
23 |   let it  = both(paths, fileStrings(file, dlm))
24 |   var roots: seq[string]
25 |   for root in it(): (if root.len > 0: roots.add root)
26 |   for rt in (if roots.len == 0 and paths.len == 0: @["."] else: roots.move):
27 |     forPath(rt, recurse, true, chase, xdev, eof0, err,
28 |             depth, path, nmAt, ino, dt, lst, dfd, dst, did):
29 |       if dt != DT_UNKNOWN and lst.stx_mode.match(kinds): # unknown here =>gone
30 |         let path = if noDot and path.startsWith("./"): path[2..^1] else: path
31 |         nPaths.inc
32 |         if lst.stx_size >= minSize.uint64: # big enough
33 |           let key: DevIno = (lst.st_dev, lst.stx_ino)
34 |           tab.mgetOrPut(key, @[]).add(path)
35 |     do: discard
36 |     do: discard
37 |     do: recFailDefault("lncs", path)
38 |   for ino, s in tab:
39 |    if s.len >= thresh:
40 |     nSet.inc
41 |     nFile.inc s.len
42 |     if nEcho != 0:                  #Maybe emit report for set
43 |       let lim = min(s.len, if nEcho > 0: nEcho else: s.len)
44 |       stdout.write s[0 ..< lim].join(outDlm), endOut
45 |   if summary in log:                #Emit summary statistics
46 |     stderr.write nSet," sets of ",nFile," hard links in ",nPaths," paths\n"
47 |   return min(127, nSet)             #Exit with appropriate status
48 | 
49 | when isMainModule:                  #Provide a useful CLI wrapper.
50 |   include cligen/mergeCfgEnv
51 |   dispatch lncs, help={ "paths"  : "filesystem roots",
52 |                         "file"   : "optional input (\"-\"|!tty=stdin)",
53 |                         "dlm"    : "input file delimiter (\\0->NUL)",
54 |                         "recurse": "recurse n-levels on dirs; 0:unlimited",
55 |                         "chase"  : "follow symlinks to dirs in recursion",
56 |                         "xdev"   : "block recursion across device boundaries",
57 |                         "eof0"   : "read dirents until 0 eof",
58 |                         "kinds"  : "i-node type like find(1): [fdlbcps]",
59 |                         "minSize": "minimum file size",
60 |                         "thresh" : "smallest hard link cluster to count",
61 |                         "quiet"  : "suppress file access errors",
62 |                         "log"    : ">stderr{osErr, summary}",
63 |                         "nEcho"  : "num to print; 0: none; -1: unlimited",
64 |                         "noDot"  : "remove a leading . from names",
65 |                         "outDlm" : "output internal delimiter",
66 |                         "endOut" : "output record terminator" },
67 |            short = {"xdev": 'X', "eof0": '0', "noDot": '.'}
68 | 


--------------------------------------------------------------------------------
/tw.nim:
--------------------------------------------------------------------------------
 1 | when not declared(stdin): import std/syncio
 2 | from std/terminal import terminalWidth, isatty
 3 | from std/cmdline  import paramCount, paramStr
 4 | from std/strutils import parseInt
 5 | from cligen/osUt  import getDelims, putchar # ~1.65x fasterThan std.lines for me
 6 | template put(c) = putchar(c)
 7 | 
 8 | proc contin(b: char; uc: var int32): int = # Continuation bytes after `b`
 9 |   if   b.uint <= 127          : result = 0; uc = (b).int32
10 |   elif b.uint shr 5 == 0b110  : result = 1; uc = (b.int32 and 0b11111)
11 |   elif b.uint shr 4 == 0b1110 : result = 2; uc = (b.int32 and 0b1111)
12 |   elif b.uint shr 3 == 0b11110: result = 3; uc = (b.int32 and 0b111)
13 |   else: result = 0; uc = 0
14 | 
15 | proc isCombining(uc: int32): bool = uc >= 0x0300 and (uc <= 0x036f or
16 |   (uc >= 0x1ab0 and uc <= 0x1aff) or (uc >= 0x1dc0 and uc <= 0x1dff) or
17 |   (uc >= 0x20d0 and uc <= 0x20ff) or (uc >= 0xfe20 and uc <= 0xfe2f))
18 | 
19 | type ParseState = enum start, rune, esc, csi, osc  # Loop-to-loop Parse State
20 | const ST = '\\'
21 | proc putClipped(line: cstring; n, w: int) =
22 |   var ps: ParseState; var r,con,ix: int # ParseState,Rendered width,rune(Con&Ix)
23 |   var did=false; var uc=0i32            # Flag & unicode character
24 |   var bs: array[4, char]
25 |   for i in 0 ..< n:                     # Input byte Index
26 |     if did: did = false                 # String Terminator Esc-\ needs a peek
27 |     else:
28 |       let b = line[i] # NOTE: State machines fitting on one screen read easier.
29 |       case ps         # Idea is to just stop cursor advances after w char cells.
30 |       of start:                         # Special ASCII, then utf8, then emit
31 |         if   b == '\e' : ps = esc;put b # Enter Esc-Seq mode
32 |         elif b == '\b' : dec r; put b   # backspace rewinds 1
33 |         elif b == '\r' : r = 0; put b   # carriage-return rewinds all
34 |         elif b == '\t' : r = ((r + 8) div 8)*8; (if r < w: put b) # ${3:-8}?
35 |         elif ord(b)<32 : put b          # For me \v only lineFeeds, not moving r
36 |         elif ord(b)>127: ps = rune; con = contin(b, uc); ix = 0; bs[0] = b
37 |         elif r < w: inc r; put b
38 |         else: inc r                     # Advancing blocks combiners post r==w
39 |       of rune:                          # Does not handle Double-Wide Unicode,
40 |         inc ix                          #..or grapheme extensions or similar.
41 |         if ix <= con:                   # Accumulate rune / unicode character
42 |           bs[ix] = b; uc = (uc shl 6) or (b.int32 and 0b111111)
43 |         if ix == con:
44 |           ps = start                    # Unicode char assembled: maybe emit
45 |           if r < w or (r == w and uc.isCombining):
46 |             if   con==1: put bs[0]; put bs[1]
47 |             elif con==2: put bs[0]; put bs[1]; put bs[2]
48 |             elif con==3: put bs[0]; put bs[1]; put bs[2]; put bs[3]
49 |           if not uc.isCombining: inc r
50 |       of esc:                           # Assume no other escSeq & 0 advance.
51 |         if   b == '[': ps = csi; put b  #..This is inexact since several vtXXX
52 |         elif b == ']': ps = osc; put b  #..codes can reset/move cursors, BUT we
53 |         else: ps = start; put b         #..cannot be a full TEmulator *though*
54 |       of csi:                           #..some TEms DO have "no wrap" modes.
55 |         if ord(b) in 0x40..0x7E: ps = start
56 |         put b
57 |       of osc:
58 |         if   b == '\a': ps = start
59 |         elif b == '\e' and i<n-1 and line[i+1]==ST: ps = start; put ST; did=true
60 |         put b
61 | 
62 | proc main =
63 |   if stdin.isatty:
64 |     quit """Print input data lines clipped to ${1:-"1"} rows of Terminal Width.
65 | Optional $2 overrides $COLUMNS | OS-perceived terminal width. Eg.: pd -w|tw 2"""
66 |   let m = if paramCount() >= 1: parseInt(paramStr(1)) else: 1
67 |   let w = m*(if paramCount() >= 2: parseInt(paramStr(2)) else: terminalWidth())
68 |   for (line, n) in stdin.getDelims:
69 |     putClipped line, n - 1, w; put '\n'
70 | main()
71 | 


--------------------------------------------------------------------------------
/bu/rs.nim:
--------------------------------------------------------------------------------
 1 | import std/random # For weighted versions of this, see work of Yves Tillé with
 2 | type              #..keywords "unequal probability sampling without replacement"
 3 |   Reservoir*[T] = object    ## A Reservoir Random Subset Generator
 4 |     seen, size: int
 5 |     res*: seq[T]            ## Accumulated fair subset/sample
 6 |   Dup[T] = proc(x: T): T
 7 |   Del[T] = proc(x: T)
 8 | 
 9 | proc init*[T](r: var Reservoir[T], size=0) = r.size = size      ## Initialize
10 | proc initReservoir*[T](size=0): Reservoir[T] = result.init size ## Factory
11 | 
12 | proc add*[T](r: var Reservoir[T], item: T, dup: Dup[T]=nil, del: Del[T]=nil) =
13 |   ## Add an item to reservoir `r`
14 |   inc r.seen
15 |   template nix(j: int) = (if not del.isNil: del r.res[j])
16 |   template set(j: int, it: T) =
17 |     if not dup.isNil: r.res[j] = dup it
18 |     else: r.res[j] = it
19 |   if r.size > 0:                        # Subset mode (No Replacement)
20 |     if r.res.len < r.size:              #   Just populating reservoir
21 |       r.res.setLen r.res.len + 1        # `setLenUninit` needs {.nodestroy.}=>..
22 |       set r.res.len - 1, item           #..need `=dup`=>No Faster for T=string.
23 |     else:                               #   Random replacement in reservoir
24 |       if (let j = rand(0..<r.seen); j < r.size):
25 |         nix j; set j, item
26 |   elif r.size < 0:                      # Sample mode (Replacement)
27 |     if r.seen == 1:                     #   First time: fill with item
28 |       r.res.setLen -r.size
29 |       for j in 0 ..< -r.size: set(j, item)
30 |     else:
31 |       for j in 0 ..< -r.size:           #   For each slot independently:
32 |         if rand(0..<r.seen) == 0:       #     replace w/P(U < 1/seen) = 1/seen
33 |           nix j; set j, item
34 | 
35 | when isMainModule:      # Instantiate above generics as a simple CLI utility
36 |   import cligen, cligen/[sysUt, mfile, mslice, osUt], std/[os, syncio]
37 |   proc rs(input="", flush=false, randomize=false, prefixNs: seq[string]) =
38 |     ## Reservoir Sampled ranSubsets|Samples of rows of `input` -> prefix.`ns`.
39 |     ## If `n<0` sample w/replacement else do subsets. O(`Σns`) space. Examples:
40 |     ##   ``seq 1 100 | rs 10 .-5`` or (after maybe ``mkfifo f1 f2``)
41 |     ##   ``workOn<f1 & workOn<f2 & seq 1 1000 | rs -f f1.10 f2.-20``
42 |     if randomize: randomize()
43 |     var rs: seq[Reservoir[MSlice]]; var os: seq[File]; var mf: MFile; var e: int
44 |     for pn in prefixNs:
45 |       var (dir, name, ext) = pn.splitPathName(shortestExt=true)
46 |       if ext.len > 0: ext = ext[1..^1]
47 |       var n = parseInt(ext.toMSlice, e)
48 |       if (e==ext.len and ext.len!=0) and dir.len==0 and n!=0: # integral `ext`
49 |         rs.add initReservoir[MSlice](n)
50 |         let p = dir/name; os.add if name.len>0: open(p, fmWrite) else: stdout
51 |       else:
52 |         n = parseInt(name.toMSlice, e)
53 |         if e!=name.len or name.len==0 or dir.len!=0: Help!!"Non-integral! $HELP"
54 |         rs.add initReservoir[MSlice](n); os.add stdout
55 |     for line in mSlices(input, mf=mf):
56 |       proc dup(x: MSlice): MSlice =     # Program does not know until here..
57 |         if mf.mem.isNil:                #..if read-only memory map succeeded.
58 |           result = MSlice(mem: alloc x.len, len: x.len)
59 |           copyMem result.mem, x.mem, x.len  # Need a copy only if it failed.
60 |         else: result = x
61 |       proc del(x: MSlice) = (if mf.mem.isNil: dealloc x.mem else: discard)
62 |       for r in mitems rs: r.add line, dup, del  # PROCESS INPUT
63 |     var n = rs.len
64 |     while n > 0:        # Looping gives round-robin work to ||readers of os
65 |       for j, r in mpairs rs:
66 |         if r.res.len > 0:
67 |           os[j].urite r.res[^1]; os[j].urite '\n'
68 |           r.res.setLen r.res.len - 1
69 |           if flush: flushFile os[j]
70 |           if r.res.len == 0: dec n
71 |   include cligen/mergeCfgEnv
72 |   dispatch rs, help={"prefixNs": "[pfx.][-]`n`.. output paths; pfx\"\"=>stdout",
73 |     "input": "\"\" => stdin", "flush": "write to outs immediately",
74 |     "randomize": "randomize() for non-deterministic filtering"}
75 | 


--------------------------------------------------------------------------------