├── nim.cfg ├── catz.nim ├── doc ├── blur1.png ├── claw.png ├── pwise.png ├── simul.png ├── tubes.png ├── vipK.gif ├── vipO.gif ├── dfrDark.png ├── dfrLight.png ├── tFping.gif ├── consisCvg.png ├── wsz.md ├── rr.md ├── cfold.md ├── colSort.md ├── fkindc.md ├── emin.md ├── tslice.md ├── fpr.md ├── fage.md ├── tmpls.md ├── jointr.md ├── ww.md ├── ru.md ├── niom.md ├── fsids.md ├── nrel.md ├── dirt.md ├── flow.md ├── since.md ├── holes.md ├── unfold.md ├── adorn.md ├── okpaths.md ├── ndelta.md ├── tmath.md ├── memlat.md ├── chom.md ├── lncs.md ├── uce.md ├── notIn.md ├── sr.md ├── cstats.md ├── rs.md ├── newest.md ├── METAPKG.md ├── wgt.md ├── only.md ├── noc.md ├── keydowns.md ├── zeh.md ├── dups.md ├── tw.md ├── du.md ├── topn.md ├── cols.md ├── crp.md ├── ft.md ├── stripe.md ├── fread.md ├── thermctl.md ├── eve.md ├── pid2.md ├── noa.md ├── dirq.md └── cbtm.md ├── wsz.nim ├── tests ├── consisCvg.gpi ├── fage.sh ├── noa.sh └── tcatz.sh ├── ww.nim ├── bu ├── labFloats.nim ├── colSort.nim ├── esquo.nim ├── emin.nim ├── testf.nim └── rs.nim ├── keydowns.nim ├── pid2.nim ├── noc.nim ├── tslice.nim ├── LICENSE ├── widths.nim ├── rr.nim ├── flow.nim ├── fage.nim ├── uce.nim ├── tmpls.nim ├── tattr.nim ├── cfold.nim ├── noa.nim ├── notIn.nim ├── fsids.nim ├── holes.nim ├── fpr.nim ├── jointr.nim ├── sr.nim ├── fkindc.nim ├── cols.nim ├── niom.nim ├── fread.nim ├── newest.nim ├── oft.nim ├── okpaths.nim ├── ndelta.nim ├── unfold.nim ├── since.nim ├── adorn.nim ├── wits.nim ├── memlat.nim ├── man └── catz.1 ├── dirt.nim ├── topn.nim ├── lncs.nim └── tw.nim /nim.cfg: -------------------------------------------------------------------------------- 1 | --path="." 2 | -------------------------------------------------------------------------------- /catz.nim: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-blake/bu/HEAD/catz.nim -------------------------------------------------------------------------------- /doc/blur1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/blur1.png -------------------------------------------------------------------------------- /doc/claw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/claw.png -------------------------------------------------------------------------------- /doc/pwise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/pwise.png -------------------------------------------------------------------------------- /doc/simul.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/simul.png -------------------------------------------------------------------------------- /doc/tubes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/tubes.png -------------------------------------------------------------------------------- /doc/vipK.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/vipK.gif -------------------------------------------------------------------------------- /doc/vipO.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/vipO.gif -------------------------------------------------------------------------------- /doc/dfrDark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/dfrDark.png -------------------------------------------------------------------------------- /doc/dfrLight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/dfrLight.png -------------------------------------------------------------------------------- /doc/tFping.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/tFping.gif -------------------------------------------------------------------------------- /doc/consisCvg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-blake/bu/HEAD/doc/consisCvg.png -------------------------------------------------------------------------------- /doc/wsz.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | Sometimes you want to know how big your terminal window is. 5 | 6 | Usage (***NOT*** a cligen utility) 7 | ----- 8 | ```sh 9 | $ wsz 10 | cells: 80 x 72 pixels: 1280 x 2160 charCell: 16 x 30 11 | ``` 12 | `wsz` takes no arguments. 13 | -------------------------------------------------------------------------------- /wsz.nim: -------------------------------------------------------------------------------- 1 | import std/termios 2 | proc terminalSize(): IOctl_WinSize = 3 | for fd in [0, 1, 2]: 4 | if ioctl(fd.cint, TIOCGWINSZ, result.addr) != -1: return 5 | let t = terminalSize() 6 | echo "cells: " , t.ws_col , " x ", t.ws_row , 7 | " pixels: " , t.ws_xpixel, " x ", t.ws_ypixel, 8 | " charCell: ", t.ws_xpixel div max(1.cushort, t.ws_col), 9 | " x " , t.ws_ypixel div max(1.cushort, t.ws_row) 10 | -------------------------------------------------------------------------------- /doc/rr.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | This is really just `rm -rf` but able to use `cligen/dents.forPath` to 5 | maybe access faster OS interfaces for file tree traversal on Linux. 6 | 7 | Usage 8 | ----- 9 | ``` 10 | rr [optional-params] [roots: string...] 11 | 12 | Like rm -rf but a bit faster. Does nothing if no roots specified. 13 | 14 | -x, --xdev bool false block recursion across device boundaries 15 | -e, --eof0 bool false set eof0 16 | ``` 17 | -------------------------------------------------------------------------------- /tests/consisCvg.gpi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/gnuplot 2 | set term png size 1920,1080 font "Helvetica,20" 3 | set output "consisCvg.png" 4 | set title 'Consistency-Convergence Plot' 5 | set xrange [9:1003] 6 | set log x 7 | set xlab 'sample size (staggered only for plotting)' 8 | set ylab "1e-4 seconds of overhead ('')" 9 | # Manually hacked data files to be like n=10.1,10.2,.. 10 | plot 1.88 t '60kRunSampleMin', \ 11 | 'consisCvg' u 1:2:4 w yerror t 'Low Tech', \ 12 | 'consisCvgB' u ($1+2):2:4 w yerror t 'EVT-Boot' 13 | -------------------------------------------------------------------------------- /tests/fage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | now=$(date +%s) # now as epoch seconds 4 | touch -t 01010101.01 -m p1 5 | touch -t 01010101.02 -a p1 6 | touch -t 01010101.03 -m p2 7 | touch -t 01010101.04 -a p2 8 | sec=$(stat -c%Y p1) # mtime as seconds 9 | echo $((now - sec)) $(fage -fm p1) should be within rounding 10 | echo $(fage -fb p1) should be \< 1 but ultimately FS dependent 11 | echo $(fage -R/S -ra -fm p1) should be 1.0 12 | echo $(fage -Rp2 -rm -fm p1) should be 2.0 13 | echo $(fage -v2 -Rp2 -ra -fm p1) Basis for 3.0 14 | rm -f p1 p2 15 | -------------------------------------------------------------------------------- /ww.nim: -------------------------------------------------------------------------------- 1 | import strutils, cligen, cligen/textUt 2 | when not declared(stdin): import std/syncio 3 | 4 | proc ww(maxWidth=0, power=3) = 5 | ## Multi-paragraph with indent=>pre-formatted optimal line wrapping using 6 | ## badness metric *sum excess space^power*. 7 | let maxWidth = if maxWidth != 0: maxWidth else: ttyWidth 8 | stdout.write wrap(stdin.readAll, maxWidth, power) 9 | 10 | include cligen/mergeCfgEnv 11 | dispatch ww, help={"maxWidth": "maximum line width; *0* => tty width", 12 | "power" : "power of excess space for badness"} 13 | -------------------------------------------------------------------------------- /doc/cfold.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | Rather than just word-wrapping at width boundaries/word boundaries/etc., it can 5 | sometimes be useful to wrap when a pattern is seen in the input. 6 | 7 | Usage 8 | ----- 9 | ``` 10 | cfold [optional-params] [pattern: string...] 11 | 12 | cfold is to fold as csplit(1) is to split(1). pattern is an rx at which to 13 | segment input lines in file. 14 | 15 | -s, --suppress bool false exclude matched strings 16 | -i, --ignore bool false add ignore case to re flags 17 | -e, --extended bool false nim re 'extended' syntax 18 | -f=, --file= string "-" input file ("-" == stdin) 19 | ``` 20 | 21 | Related Work 22 | ------------ 23 | This can also be done with GNU sed, but ergonomics of getting \n into 24 | expressions are poor. 25 | -------------------------------------------------------------------------------- /bu/labFloats.nim: -------------------------------------------------------------------------------- 1 | import cligen/mslice, std/parseutils; export initSep 2 | when not declared(File): import std/syncio 3 | 4 | proc labFloats*(f: File, sep: Sep): (seq[string], seq[seq[float]]) = 5 | ## Read lines of a file separating into float|non-float and saving all floats 6 | ## but only the last textual context. 7 | var cols: seq[TextFrame] 8 | for line in lines(f): 9 | let ms = line.toMSlice 10 | let m = ms.frame(cols, sep) 11 | if (let dm = m - result[0].len; dm > 0): 12 | result[0].setLen m 13 | result[1].setLen m 14 | for j in 0..output lines, sorting columns [skip:] within each row. 16 | 17 | -p=, --pi= string "" path to input ; "" => stdin 18 | --po= string "" path to output; "" => stdout 19 | -i=, --iDlm= string "\t" input delimiter; w => repeated whitespace 20 | -o=, --oDlm= char '\t' output delimiter byte 21 | -s=, --skip= int 0 initial columns to NOT sort within rows 22 | -------------------------------------------------------------------------------- /keydowns.nim: -------------------------------------------------------------------------------- 1 | when not declared(stdin): import std/syncio 2 | proc keydowns(shift="~!@#$%^&*()_+|}{:\"?><", v=false): int = 3 | ## Return min key downs needed to enter all lines on stdin, optimizing SHIFTs. 4 | proc initSetChar(s: string): set[char] = 5 | for c in {'A'..'Z'}: result.incl c 6 | for c in s: result.incl c 7 | let shift = shift.initSetChar 8 | for str in stdin.lines: 9 | var down = false # BETWEEN strs, SHIFT goes key up 10 | let r0 = result 11 | for c in str: 12 | if c in shift: # Need shift 13 | if not down: down = true; inc result # Cnt key down 14 | else: down = false 15 | inc result 16 | if v: stderr.write result - r0, " ", str, "\n" 17 | 18 | when isMainModule: 19 | import cligen; include cligen/mergeCfgEnv; dispatch keydowns, echoResult=true, 20 | help={"shift": "in addition to 'A'..'Z'", "v": "err log counts & strings"} 21 | -------------------------------------------------------------------------------- /doc/fkindc.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | People often want to know what kind of files are within some file tree. This 5 | produces a nice little histogram of file(1)/libmagic(3) file types. See the 6 | [only doc](only.md) for more background. 7 | 8 | Usage 9 | ----- 10 | ``` 11 | fkindc [optional-params] 12 | 13 | Use gen and dlr1 to generate paths and histogram by file(1) type. 14 | 15 | -g=, --gen= string "find $1 -print0" generator cmd with dlr1 -> $1 16 | -d=, --dlr1= string "." $1 for gen fmt; Eg. ". -type f" 17 | -x=, --excl= set(Excl) {} tests to exclude like file(1) 18 | -j=, --jobs= int 0 use this many kids (0=auto) 19 | ``` 20 | 21 | Related Work 22 | ------------ 23 | This could probably have just been a new flag to `only`, but the code to do 24 | just this is quite a bit simpler. New option or new program is often a tough 25 | judgement call. 26 | -------------------------------------------------------------------------------- /tests/noa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | : ${idx:="-1"} 3 | n=' 4 | ' 5 | T() { 6 | d="" # Only Zsh needs this to avoid appending to $d 7 | n=' 8 | ' # read -rd works in Bash|Zsh, but not POSIX. So, 9 | while IFS= read -r line #..loop which works for all text but input with no 10 | do d="$d${line}$n" #..final newline where we add one "erroneously". 11 | done; d=${d%?} # Chop extra newline 12 | echo "$d" # This adds \n back here for test output. 13 | } 14 | noa "$idx" -- cp -a F -f "/x/maybe/mis${n}sing" | T 15 | noa "$idx" cp -- -a F -f "/x/maybe/mis${n}sing" | T 16 | noa "$idx" cp -a -- F -f "/x/maybe/mis${n}sing" | T 17 | noa "$idx" cp -a F -- -f "/x/maybe/mis${n}sing" | T 18 | noa "$idx" cp -a F -f -- "/x/maybe/mis${n}sing" | T 19 | noa "$idx" cp -a F -f "/x/maybe/mis${n}sing" -- | T 20 | echo ---- 21 | noa "$idx" cp -a F -f "${n}/x/maybe/m${n}issing${n}${n}" -- | T 22 | -------------------------------------------------------------------------------- /pid2.nim: -------------------------------------------------------------------------------- 1 | import std/[os, posix, parseutils] 2 | 3 | var pid = getpid() 4 | var last = 0.Pid 5 | var t = 300 # Linux starts Process ID table @300 6 | var cpu = 1 7 | var xSt: cint 8 | if paramCount()>0 and parseInt(1.paramStr,t)!=1.paramStr.len: 9 | quit "Expecting integer to try to wrap PID table to", 1 10 | if paramCount()>1 and parseInt(2.paramStr,cpu)!=2.paramStr.len: 11 | quit "Expecting CPU to set affinity for", 2 12 | let tgt = t.Pid 13 | 14 | template nextPid = 15 | last = pid 16 | pid = vfork() 17 | case pid # -1 quit leaves whole program 18 | of -1: quit "pid2: %s " & $errno.strerror, 2 19 | of 0: quit 0 # kid => die 20 | else : discard waitpid(pid, xSt, 0) # parent=>wait 21 | 22 | when defined(linux): 23 | import cligen/osUt;setAffinity([cpu.cint]) # Alder 14X faster 24 | 25 | while pid > tgt and last < pid: nextPid() # Get (last>=pid) 26 | while pid < tgt: nextPid() # 1st free past tgt 27 | -------------------------------------------------------------------------------- /noc.nim: -------------------------------------------------------------------------------- 1 | when not declared(stdin): import std/syncio 2 | import cligen/[mfile, osUt, sysUt, textUt], std/terminal 3 | 4 | if stdin.isatty: quit ("Usage:\n noc < someInput\n" & 5 | "strips ANSI CSI/OSC/SGR color escape sequences"), 1 6 | 7 | if (let mf = mopen("/dev/stdin", err=nil); mf.mem != nil): 8 | discard c_setvbuf(stdout, nil, IOFBF, 32768) # Boost 9 | for c in toOa[char](mf.mem, 0, mf.len - 1).noCSI_OSC: 10 | putchar c 11 | else: 12 | var io = newSeq[char](32768) # (i)nput-(o)utput buffer 13 | var nc: NoCSI_OSC # call-to-call parser state 14 | while not stdin.eof: 15 | let nI = stdin.ureadBuffer(io[0].addr, io.len) 16 | var nO = 0 17 | for c in toOa[char](io[0].addr, 0, nI-1).noCSI_OSC(nc): 18 | io[nO] = c # seq[char] faster than string here 19 | nO.inc # Clobber input w/stripped output 20 | if nO > 0: # 0 => Neither progress nor clobber 21 | if stdout.uriteBuffer(io[0].addr, nO) < nO: 22 | quit "stdout write fail; out of space?", 1 23 | -------------------------------------------------------------------------------- /doc/emin.md: -------------------------------------------------------------------------------- 1 | # Motivation / Example / Usage 2 | 3 | Sometimes a program spends a non-negligible time doing set up before some inner 4 | phase which is what you want to time. For this, [`tim`](tim.md) is 5 | inappropriate since there is more "overhead" to subtract than shell overhead, 6 | yet [`eve`](eve.md) seems more general than desirable, since you might still be 7 | using [`tim`](tim.md) to drive the experiment. So, for the case when you really 8 | just have a list of numbers in "tim-compatible layout" (Re: `--warmup`, `--k`, `--n`, 9 | `--m`), it's nice to say something like this: 10 | 11 | ``` 12 | tim="-k2 -o14 -n14 -m14" 13 | tim $tim "$prog 2>>/tmp/dts" 14 | emin $tim `NUL) 19 | -e=, --emit= set(Emit) summary Stuff to emit: summary detail 20 | ``` 21 | 22 | Related Work 23 | ------------ 24 | util-linux has `fincore`, but often I just want summary information and this is 25 | easy to compute in-program than as a wrapper program. Also, this program should 26 | work fine on OS X or many BSDs where util-linux is probably not installed. 27 | -------------------------------------------------------------------------------- /tslice.nim: -------------------------------------------------------------------------------- 1 | when isMainModule: 2 | import std/[os, syncio, strutils], cligen/[osUt, textUt] 3 | let ac = paramCount() 4 | let av1 = if ac >= 1: paramStr(1) else: "" 5 | var colon = if ac >= 1: av1.find(':') else: -1 6 | if ac < 1 or colon == -1: 7 | quit "Usage:\n "¶mStr(0)&" [a]:[b]\n" & 8 | "does UTF8-SGR aware Py-like slices of terminal columns on stdin", 1 9 | let A = av1[0 ..< colon] 10 | let B = av1[colon+1..^1] 11 | let a = if A.len > 0: A.parseInt else: 0 12 | let b = if B.len > 0: B.parseInt else: int.high 13 | let nl = "\n" 14 | if a < 0 or b < 0: 15 | for line in stdin.lines: 16 | var tot = 0 17 | for (_, w) in printedChars(line): inc tot, w 18 | let a = if a < 0: tot + a else: a 19 | let b = if b < 0: tot + b else: b 20 | let n = b - a 21 | for (s, _) in printedChars(line, a, n): stdout.urite line, s 22 | discard stdout.uriteBuffer(nl[0].addr, 1) 23 | else: 24 | let n = b - a 25 | for line in stdin.lines: 26 | for (s, _) in printedChars(line, a, n): stdout.urite line, s 27 | discard stdout.uriteBuffer(nl[0].addr, 1) 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 c-blake 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /doc/fage.md: -------------------------------------------------------------------------------- 1 | This program will be unneeded if coreutils `stat` ever grows an option to make 2 | %[WXYZ] emit full precision and/or Bash/Dash grow floating point arithmetic. 3 | (I would not hold your breath about either.) 4 | 5 | Usage 6 | ----- 7 | ``` 8 | fage [optional-params] [paths: string...] 9 | 10 | Print max resolution age (`fileTime(Ref|self,rT) - fileTime(path,fT)`) for 11 | paths. "now" =~ program start-up. Examples: 12 | 13 | `fage x y` v-age of *x* & *y* relative to "now" 14 | `fage -fb x` b-age of *x* relative to "now" 15 | `fage -Rlog logDir` v-age of *log* rel.to its *logDir* 16 | `fage -srm -fb x y` **mtime - btime** for both *x* & *y* 17 | `fage -ra -R/ ''` Like `stat -c%X /`, but high-res 18 | 19 | Last works since missing files are given time stamps of 0 (start of 1970). 20 | 21 | Options: 22 | -R=, --Ref= string "" path to ref file 23 | -r=, --refTm= char 'v' ref file stamp [bamcv] 24 | -f=, --fileTm= char 'v' file time stamp [bamcv] 25 | -s, --self bool false take ref time from file itself 26 | -v=, --verb= int 0 0: Deltas; 1: Also paths; 2: diff-ends (ns) 27 | ``` 28 | -------------------------------------------------------------------------------- /doc/tmpls.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ========== 3 | `tmpls` is largely similar to a sub-shell such as: 4 | ```sh 5 | while read a; do printf "i/%s.c\no/%s.o\n" "$s" "$s"; done 6 | ``` 7 | but it is much faster.[^1] 8 | 9 | Usage 10 | ===== 11 | ``` 12 | tmpls [optional-params] templates... 13 | 14 | Interpolate { %s)tring | %n)eed quoted | always %q)uoted | %e)scaped } into 15 | as many templates as given, writing back-to-back template-filled-in batches to 16 | stdout, with each individual template terminated by term. 17 | 18 | E.g.: 19 | find . -name '*.c' -print|sed 's/.c$//' | tmpls %s.c %s.o %n.c %e.o 20 | 21 | Options: 22 | -f=, --file= string "/dev/stdin" input file of name stubs 23 | -n=, --nl= char '\n' input string terminator 24 | -t=, --term= char '\n' output string terminator 25 | -m=, --meta= char '%' self-quoting meta for %sub 26 | ``` 27 | 28 | [^1]: I get 25X-75X improvements. As always, this depends on a lot, such as if 29 | /bin/sh is dash, bash, zsh, etc. as well as what the CPU is. /bin/sh variation 30 | is large enough, and the implementation of `tmpls.nim` simple enough that real 31 | benchmarking does not seem very pointful. 32 | 33 | 34 | -------------------------------------------------------------------------------- /tests/tcatz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | if [ $# -ne 2 ]; then cat <o' 'catz $1|cat>o' \ 24 | 'catz<$1>o' 'catz<$1|cat>o' 'cat<$1|catz>o' 'cat<$1|catz|cat>o' 25 | # iFile,oFile iFile,oPipe iPipe,oFile iPipe,oPipe 26 | do test1 "$1" "$2" "$3" "$tst" 27 | done 28 | } 29 | ln -s "$i" noExt 30 | test1 noExt "$o" "NO-EXTEN" 'catz $1>o' 31 | test1 noExt "$o" "NO-EXTEN" 'catz $1|cat>o' 32 | 33 | test6 "$i" "$o" DECODED 34 | 35 | dd seek="$SEEK" if="$i" of=noMag 2>/dev/null 36 | test6 noMag noMag PASS-THROUGH 37 | 38 | printf '' > len0 39 | test6 len0 len0 PASS-THROUGH-0 40 | 41 | rm -rf "$t" 42 | echo "SUCCESS" 43 | -------------------------------------------------------------------------------- /widths.nim: -------------------------------------------------------------------------------- 1 | import std/terminal, cligen, cligen/[sysUt, mfile], nio 2 | 3 | proc widths(outKind='\0', distro=false, paths: seq[string]) = 4 | ## Emit width/line lengths in bytes of all lines in files `paths`. 5 | ## 6 | ## If `histo` emit an exact histogram of such widths. 7 | ## 8 | ## Emits text if `outKind==NUL`, else binary in that NIO format. 9 | 10 | if outKind != '\0' and stdout.isatty: Help!!"stdout is a terminal; Full $HELP" 11 | let kout = try: kindOf(outKind) except CatchableError: 0.IOKind 12 | var obuf: array[16, char] 13 | var cnts: seq[int] 14 | var mf: MFile 15 | for path in paths: 16 | for ms in mSlices(path, mf=mf): 17 | if distro: 18 | if ms.len + 1 > cnts.len: cnts.setLen ms.len + 1 19 | inc cnts[ms.len] 20 | elif outKind == '\0': 21 | echo ms.len 22 | else: # Convert & then emit line len as `kout` 23 | var n = ms.len 24 | convert kout, lIk, obuf[0].addr, n.addr 25 | stdout.nurite kout, obuf[0].addr 26 | mf.close 27 | if distro: 28 | for i, c in cnts: 29 | if c != 0: echo i, " ", c 30 | 31 | when isMainModule: include cligen/mergeCfgEnv; dispatch widths, help={ 32 | "distro" : "emit a histogram, not individual widths", 33 | "outKind": "emit binary stream with this NIO format"} 34 | -------------------------------------------------------------------------------- /doc/jointr.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | This is utility to make it easier to read `strace -f` output. If you do 5 | something like 6 | ```sh 7 | strace --decode-fds -fvs8192 -oFoo multi-process-program 8 | ``` 9 | and there is significant clone/fork spawning then you are likely to see 10 | a great many system calls which are reported twice - once at initiation, 11 | are then suspended and then again at resumption. 12 | 13 | The problem with this is that there can be a lot of other intervening text 14 | between these two points and the parameters of the initial call are not repeated 15 | by `strace` upon resumption. 16 | 17 | So, what `jointr` does is act as a filter to either stitch the two halves 18 | together or at least repeat the call parameters at the continuation to help 19 | make sense of things. 20 | 21 | Never resumed calls are just printed in hash order at the bottom. 22 | 23 | Usage 24 | ----- 25 | 26 | ``` 27 | Usage: 28 | jointr [optional-params] strace log path (or none for stdin) 29 | 30 | -c=, --cont= string " " line suffix saying it continues 31 | -b=, --boc= string "<... " beg of contin. indication to eat 32 | -e=, --eoc= string " resumed>" end of contin. indication to eat 33 | -a, --all bool false retain "unfinished ..." in-place 34 | ``` 35 | -------------------------------------------------------------------------------- /doc/ww.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | You may want more evenly sized right margins than delivered by a greedy word 5 | wrap algorithm (filling as much as possible before breaking). cligen/textUt 6 | has this built into it (for help message formatting). It minimizes a penalty 7 | formula that is the sum of the p-th power of right margin space sizes. So this 8 | program is a razor thin CLI wrapper that is easy to use from, e.g. just :!ww 9 | from a vim visual select as I did for this very paragraph. Higher powers will 10 | penalize non-uniformity more. 11 | 12 | Usage 13 | ----- 14 | ``` 15 | ww [optional-params] 16 | Multi-paragraph with indent=>pre-formatted optimal line wrapping using badness 17 | metric sum excess space^power. 18 | -h, --help print this cligen-erated help 19 | --help-syntax advanced: prepend,plurals,.. 20 | -m=, --maxWidth= int 0 maximum line width; 0 => tty width 21 | -p=, --power= int 3 power of excess space for badness 22 | ``` 23 | 24 | Related Work 25 | ------------ 26 | Donald Knuth did some impressive work in his TeX layout engine on the much 27 | harder problem that combines kerning adjustment of proportionally spaced fonts 28 | and word wrap. This program is the kind of high school version of that, but 29 | the concept of "badness" does still show up in tex/latex error messages and is 30 | in the same general dimension. 31 | -------------------------------------------------------------------------------- /bu/colSort.nim: -------------------------------------------------------------------------------- 1 | import std/algorithm, cligen/[mslice, osUt] 2 | when not declared(stdout): import std/syncio 3 | 4 | proc colSort*(fi, fo: File; iDlm="\t", oDlm='\t', skip=0) = 5 | let sep = initSep(iDlm) 6 | var cols: seq[MSlice] 7 | for (cs, nP1) in fi.getDelims: 8 | sep.split(MSlice(mem: cs, len: nP1 - 1), cols) 9 | var wrote = false # flag saying we wrote & so need to delimit 10 | for i in 0 ..< min(skip, cols.len): 11 | if wrote: outu oDlm else: wrote = true 12 | outu cols[i] 13 | if cols.len > skip: 14 | var cols = cols[skip..^1] 15 | cols.sort 16 | for c in cols: 17 | if wrote: outu oDlm else: wrote = true 18 | outu c 19 | outu '\n' 20 | 21 | proc colSort*(pi="", po="", iDlm="\t", oDlm='\t', skip=0) = 22 | ## Copy input->output lines, sorting columns [skip:] within each row. 23 | colSort if pi.len == 0: stdin else: open(pi), 24 | if po.len == 0: stdout else: open(po, fmWrite), iDlm, oDlm, skip 25 | 26 | when isMainModule: 27 | import cligen; include cligen/mergeCfgEnv 28 | dispatch (proc(pi,po,iDlm:string; oDlm:char; skip:int))colSort, help={ 29 | "pi" : "path to input ; \"\" => stdin", 30 | "po" : "path to output; \"\" => stdout", 31 | "iDlm": "input delimiter; w* => repeated whitespace", 32 | "oDlm": "output delimiter byte", 33 | "skip": "initial columns to NOT sort within rows"} 34 | -------------------------------------------------------------------------------- /rr.nim: -------------------------------------------------------------------------------- 1 | when not declared(stderr): import std/syncio 2 | include cligen/unsafeAddr 3 | import std/[strformat, posix], cligen, cligen/[dents, posixUt, statx] 4 | 5 | proc rr*(roots: seq[string], xdev=false, eof0=false): int = 6 | ## Like rm -rf but a bit faster. Does nothing if no ``roots`` specified. 7 | if roots.len == 0: return 8 | var dfds: seq[cint] 9 | for root in roots: 10 | forPath(root, 0, false, false, xdev, eof0, stderr, 11 | depth, path, nmAt, ino, dt, lst, dfd, dst, did): 12 | if dt != DT_DIR: 13 | if unlinkat(dfd, path[nmAt..^1].cstring, 0) != 0: 14 | stderr.log &"rr({path}): {strerror(errno)}\n" 15 | elif dfds.len > 0 and dfds[^1] == dfd: discard 16 | else: dfds.add dfd 17 | do: discard # Pre-recurse 18 | do: # Post-recurse (dt == DT_DIR guaranteed) 19 | if unlinkat(dfds.pop, path[nmAt..^1].cstring, AT_REMOVEDIR) != 0: 20 | stderr.log &"rr({path}): {strerror(errno)}\n" 21 | # Future dir-unlinks are doomed to fail ENOTEMPTY except if ENOENT here 22 | # IF racing other unlinker(s). quit here forfeits any such races. 23 | quit(1) 24 | do: recFailDefault("rr", path) # Cannot recurse 25 | return 0 26 | 27 | when isMainModule: 28 | include cligen/mergeCfgEnv 29 | dispatch(rr, help = { "xdev" : "block recursion across device boundaries" }) 30 | -------------------------------------------------------------------------------- /doc/ru.md: -------------------------------------------------------------------------------- 1 | I wrote this because /usr/bin/time is very low time resolution (10 ms) with a 2 | very hard to read default format and for a very long time (early 90s?) various 3 | OSes have provided better. When faced with the question "What is CPU?", perhaps 4 | the getrusage/wait4 answer of `ru` can be a first step. 5 | 6 | ``` 7 | Usage: (***NOT*** a cligen utility) 8 | 9 | ru [-whatiscpu] [prog args...] 10 | 11 | No options => as if -hit; else selected subset. 12 | 13 | Flags all in arg 1 & mean: 14 | w w)rapped output without row labels (to get fields by row, e.g. grep) 15 | h h)uman readable formats with (h)our:minute:seconds, MiB, etc. units 16 | a a)ll of the below, in the same order 17 | t t)ime,mem (wall, user, system time, CPU utilization, max Resident) 18 | i i)o (inBlocks, outBlocks, swaps, majorFaults, minorFaults) 19 | s s)witch/stack/sharing (volCtxSw, involSw, stack, txtResShr, datResShr) 20 | c interprocess (c)ommunications (signals, IPC sent, IPC received) 21 | p p)lain output (no ANSI SGR color escapes) 22 | u u)nwrapped output with field labels (to get fields by column, e.g. awk) 23 | ``` 24 | 25 | `man getrusage` | `man time` give more details on the various stats this small 26 | Nim program can print. 27 | 28 | You can put options in the `RU` environment variable. Compared to time(1), this 29 | is higher precision with more modern and controlled units. 30 | -------------------------------------------------------------------------------- /doc/niom.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ========== 3 | This little 50-liner mostly only exists & lives here since I try to keep some 4 | core library packages like `cligen`, `adix, and `nio` hard-dependency-free.[^1] 5 | 6 | Usage 7 | ===== 8 | ``` 9 | niom [optional-params] [paths: 1|more paths to NIO files] 10 | Print selected statistics over all columns of all paths. 11 | 12 | -f=, --fmt= string ".4g" Nim floating point output format 13 | -s=, --stats= set(MomKind) min,max n min max sum avg sdev skew kurt histo 14 | -q=, --qs= floats {} desired quantiles 15 | -a=, --a= float 1e-16 min absolute value histo-bin edge 16 | -b=, --b= float 1e+20 max absolute value histo-bin edge 17 | -n=, --n= int 8300 number of lg-spaced histo bins 18 | ``` 19 | 20 | An Example 21 | ========== 22 | 23 | ```sh 24 | $ zipf -n10_000_000 -fbg 1..3 | niom -s,= -sh .Nl 25 | .Nl:0 n: 8300 a: 1e-16 b: 1e+20 26 | aLn: -36.841361487904734 h: 0.00998831947798357 hInv: 100.11694181430796 27 | bins,cnts: 28 | [ -1e-16 , 1e-16 ): 6467866 29 | [ 0.9955705858181852 , 1.0055644909629682 ): 2287419 30 | [ 1.9832854562249305 , 2.003194407942553 ): 1244715 31 | totalCount: 10000000 nonZeroBins: 3 32 | ``` 33 | Note that (2287419/6467866)**-(2./3) = 1.999601833743278, thus also a spot check 34 | of [zipf](zipf.md) with a default alpha=3/2. 35 | 36 | [^1]: If someone is living life with a few git clone's per year they are still 37 | cool to try out my packages even if `nimble` fails them. 38 | -------------------------------------------------------------------------------- /doc/fsids.md: -------------------------------------------------------------------------------- 1 | Usage: 2 | ``` 3 | fsids [optional-params] [roots: string...] 4 | 5 | Print a histogram of uids and/or gids used by a file tree 6 | 7 | -k=, --kind= IdKind both kind of ids to report user, group, both 8 | -o=, --order= Order id sort order: up by id or down by count 9 | -r=, --recurse= int 0 recursion limit for dirs in roots; 0=unbounded 10 | -f, --follow bool false follow symbolic links to dirs in recursion 11 | -x, --xdev bool false block recursion from crossing devices 12 | -e, --eof0 bool false set eof0 13 | ``` 14 | 15 | This produces a very simple filesystem id histogram. E.g., you might run `pwck` 16 | and get a report about misconfigured users and then have the question: should 17 | these users just be garbage collected? Or you might otherwise be interested in 18 | diversity of file ownership under various sub-trees. 19 | 20 | For example, 21 | ``` 22 | fsids -r0 /etc 23 | ``` 24 | might produce 25 | 26 | ``` 27 | #Uid Nentry Name 28 | 0 1845 root 29 | 23 8 www 30 | 70 5 postgres 31 | 102 2 openvpn 32 | 250 77 portage 33 | 439 4 ldap 34 | 13615 3 MISSING 35 | #Gid Nentry Name 36 | 0 1833 root 37 | 7 8 lp 38 | 8 3 mem 39 | 23 8 www 40 | 70 5 postgres 41 | 110 5 fcron 42 | 250 91 portage 43 | 391 1 unbound 44 | 439 4 ldap 45 | ``` 46 | which indicates there are 3 files with archaic/obsolete UIDs (labeled 47 | "MISSING" here). 48 | -------------------------------------------------------------------------------- /flow.nim: -------------------------------------------------------------------------------- 1 | when not declared stdin: import std/syncio 2 | import std/[algorithm, sugar], cligen/[textUt, tab], cligen 3 | 4 | proc flow*(input="", output="", pfx="", width=0, gap=1, byLen=false, maxPad=99)= 5 | ## Read maybe utf8-colored lines from `input` & flow them into shortest height 6 | ## table of top-to-bottom, left-to-right columns & write to `output`. 7 | let i = if input.len > 0: open(input) else: stdin 8 | var strs = collect(for line in i.lines: line) 9 | if byLen: strs.sort cmp=proc(a, b: string): int = a.printedLen - b.printedLen 10 | let wids = collect(for str in strs: -str.printedLen) # - here means left-align 11 | let o = if output.len > 0: open(output, fmWrite) else: stdout 12 | if gap < 0: (for x in strs: o.write x) 13 | else: 14 | let W = if width == 0: ttyWidth elif width < 0: ttyWidth + width else: width 15 | let w = W - pfx.len 16 | var nrow, ncol: int; let m = 1 17 | var colWs = layout(wids, w, gap, maxPad, m, nrow, ncol) 18 | colPad(colWs, w, maxPad, m) 19 | o.write(strs, wids, colWs, m, nrow, ncol, 0, pfx) 20 | 21 | dispatch flow, help={"input" : "use this input file; \"\"=>stdin", 22 | "output": "use this output file; \"\"=>stdout", 23 | "pfx" : "pre-line prefix (e.g. indent)", 24 | "width" : "rendered width; 0: auto; <0: auto+THAT", 25 | "gap" : "max inter-column gap; <0: 1-column", 26 | "byLen" : "sort by printed-length of row", 27 | "maxPad": "max per-column padding"} 28 | -------------------------------------------------------------------------------- /fage.nim: -------------------------------------------------------------------------------- 1 | import std/[strutils, times], cligen/[sysUt, statx], cligen 2 | when not declared(stdout): import std/syncio 3 | 4 | proc fage(Ref="", refTm='v', fileTm='v', self=false, verb=0, paths:seq[string])= 5 | ## Print max resolution age (`fileTime(Ref|self,rT) - fileTime(path,fT)`) 6 | ## for paths. "now" =~ program start-up. Examples: 7 | ## `fage x y` v-age of *x* & *y* relative to "now" 8 | ## `fage -fb x` b-age of *x* relative to "now" 9 | ## `fage -Rlog logDir` v-age of *log* rel.to its *logDir* 10 | ## `fage -srm -fb x y` **mtime - btime** for both *x* & *y* 11 | ## `fage -ra -R/ ''` Like `stat -c%X /`, but high-res 12 | ## Last works since missing files are given time stamps of 0 (start of 1970). 13 | if paths.len == 0: Help !! "Need >= 1 path; $HELP" 14 | let tR = if self: 0i64 # just to skip unneeded syscall(s) 15 | else: Ref.fileTime(refTm, int64(epochTime() * 1e9)) 16 | for path in paths: 17 | let tR = if self: path.fileTime(refTm) else: tR 18 | let tF = fileTime(path, fileTm) 19 | let age = float(tR - tF) * 1e-9 20 | stdout.write formatFloat(age, ffDecimal, 9) 21 | if verb > 1: 22 | stdout.write " ", tR, " ", tF 23 | if verb > 0: 24 | echo " ", path else: echo "" 25 | 26 | when isMainModule: include cligen/mergeCfgEnv; dispatch fage, help={ 27 | "Ref" : "path to ref file", 28 | "refTm" : "ref file stamp [bamcv]", 29 | "fileTm": "file time stamp [bamcv]", 30 | "self" : "take ref time from file itself", 31 | "verb" : "0: Deltas; 1: Also paths; 2: diff-ends (ns)"} 32 | -------------------------------------------------------------------------------- /bu/esquo.nim: -------------------------------------------------------------------------------- 1 | when not declared(stderr): import std/syncio 2 | import std/strutils, cligen/[sysUt, strUt, mslice, osUt] 3 | 4 | type EsQuo* = enum eqNeed, eqAlways, eqEscape ## Quoting mode enum 5 | 6 | proc esQuoParse*(q: string): EsQuo = 7 | ## Parse a quoting mode string into its enum or raise `ValueError`. 8 | case (if q.len > 0: q[0].toLowerAscii else: 'X') 9 | of 'n': result = eqNeed 10 | of 'q': result = eqAlways 11 | of 'e': result = eqEscape 12 | else: Value !! "Unknown quote mode: \"" & q & "\"." 13 | 14 | const needQuo* = {'\t', '\n', ' ', '!', '"', '#', '$', '&' , '\'', '(', ')', 15 | '*', ';', '<', '=', '>', '?', '?', '[', '`' , '{', '|', '~'} 16 | 17 | # Can save empty string ('') catenation if you can *know* starts|ends with ' 18 | var quoHunks: seq[MSlice] 19 | proc sQuote*(f: File, s: SomeString; hunks: var seq[MSlice] = quoHunks) = 20 | ## Shell Single-Quoter. `hunks` is just for MT-safety if you need that. 21 | f.urite '\'' 22 | discard s.msplit(hunks, '\'', 0) 23 | for i, hunk in hunks: 24 | f.urite hunk 25 | if i != 0: f.urite "'\\''" 26 | f.urite '\'' 27 | 28 | proc escape*(f: File, s: SomeString, esc='\\', need={'\0'..'\x7F'}) = 29 | ## Escape every byte with `esc`. Not very unicode-friendly. 30 | for c in s: 31 | if c in need: f.urite esc 32 | f.urite c 33 | 34 | proc emit*(f: File, s: SomeString, qmode=eqNeed, esc='\\') = 35 | ## Emit `s` to `f`, quoting or escaping as specified. 36 | case qmode 37 | of eqNeed: (if needQuo in s: stdout.sQuote s else: stdout.urite s) 38 | of eqAlways: stdout.sQuote s 39 | of eqEscape: stdout.escape s 40 | -------------------------------------------------------------------------------- /doc/nrel.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | The Nim package manager nimble identifies versions by the most recent git tag. 4 | This must match in the .nimble file and the git repository. It is pretty easy 5 | to forget changing it one place or the other when making new releases. 6 | 7 | Usage 8 | ----- 9 | ``` 10 | nrel [NEED,optional-params] 11 | Bump version in .nimble, commit, tag & push using just nim, this prog & git. 12 | Final optional stage uses github-cli's gh release creation. 13 | 14 | -v=, --vsn= string "" New version; "": auto bump 15 | -b=, --bump= VSlot patch Version slot to bump: Major, minor, patch 16 | -m=, --msg= string "" .nimble commit; "": Bump versions pre-release 17 | -s=, --stage= Stage push nimble, commit, tag, push, release 18 | -t=, --title= string "" Release title 19 | -n=, --notes= string "" Path to release notes markdown 20 | ``` 21 | 22 | Examples 23 | -------- 24 | ```sh 25 | cd myRepo 26 | nrel 27 | # Now go to github and draft a release 28 | ``` 29 | or if you have `gh` installed from github-cli 30 | ``` 31 | cd myRepo 32 | edit /tmp/RELNOTE # add release notes 33 | nrel -sr -t 'This is my new release title' -n /tmp/RELNOTE 34 | ``` 35 | 36 | Future Work 37 | ----------- 38 | It would be nice to also update all dependency versions in `requires` in 39 | the nimble file to whatever their latest versions are since this is the most 40 | likely testing case by far. That is a bit more work, though. 41 | 42 | Related Work 43 | ------------ 44 | I feel just assuming & using a command-line `git` program is a simpler approach 45 | than done in https://github.com/disruptek/bump 46 | -------------------------------------------------------------------------------- /doc/dirt.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | File times on directories are funny things. On the one hand, it can be nice 5 | to see when you last renamed something inside or deleted an entry. On the 6 | other hand, you may prefer after several to many such edits, that the hierarchy 7 | of directories "represent" what it contains and maybe only the ctime reflects 8 | the last edit. This latter conceptual mode is what motivates `dirt`. 9 | 10 | Maybe a simpler way to describe it is operationally: it makes `ls -lt` show 11 | things in the order of what is "most recently modified below", recursively. 12 | 13 | This can be a divisive transformation. Some will decry it as ruining the 14 | utility of `ls -lt`. Others will praise it as making it much more useful. 15 | The right response varies with use case-specific, but without this tool it's 16 | not easy to even have a choice. Only "never useful" hardliners can truly 17 | object to its existence. 18 | 19 | Usage 20 | ------ 21 | ``` 22 | dirt [optional-params] [roots: string...] 23 | 24 | Set mtimes of dirs under roots to mtime of its newest kid. 25 | 26 | This makes directory mtimes "represent" content age at the expense of erasing 27 | evidence of change which can be nice for time-sorted ls in some archival file 28 | areas. 29 | 30 | -v, --verbose bool false print utimes calls as they happen 31 | -q, --quiet bool false suppress most OS error messages 32 | -n, --dry-run bool false only print what system calls are needed 33 | -p=, --prune= strings {} prune exactly matching paths from recursion 34 | -x, --xdev bool false block recursion across device boundaries 35 | ``` 36 | -------------------------------------------------------------------------------- /doc/flow.md: -------------------------------------------------------------------------------- 1 | ## Motivation 2 | 3 | This is a height-optimizing "tabulator" program that can reduce output terminal 4 | scrolling by several dozen times. (E.g., 40..80 per row for 1 byte columns.) 5 | 6 | ## Example With 28X Improvement 7 | 8 | ```sh 9 | $ seq 1 84|flow # Run on an 80-column terminal 10 | 1 4 7 10 13 16 19 22 25 28 31 34 37 40 43 46 49 52 55 58 61 64 67 70 73 76 79 82 11 | 2 5 8 11 14 17 20 23 26 29 32 35 38 41 44 47 50 53 56 59 62 65 68 71 74 77 80 83 12 | 3 6 9 12 15 18 21 24 27 30 33 36 39 42 45 48 51 54 57 60 63 66 69 72 75 78 81 84 13 | ``` 14 | 15 | 3 rows instead of 84. 84/3 = 28.0. 16 | 17 | Another example might be with ls[^1] on a wide terminal: 18 | ``` 19 | ls -lt /var/log|flow 20 | ``` 21 | 22 | ## Usage 23 | ``` 24 | flow [optional-params] 25 | 26 | Read maybe utf8-colored lines from input & flow them into shortest height table 27 | of top-to-bottom, left-to-right columns & write to output. 28 | 29 | Options: 30 | -i=, --input= string "" use this input file; ""=>stdin 31 | -o=, --output= string "" use this output file; ""=>stdout 32 | -p=, --pfx= string "" pre-line prefix (e.g. indent) 33 | -w=, --width= int 0 rendered width; 0: auto; <0: auto+THAT 34 | -g=, --gap= int 1 max inter-column gap; <0: 1-column 35 | -b, --byLen bool false sort by printed-length of row 36 | -m=, --maxPad= int 99 max per-column padding 37 | ``` 38 | ## Related Work 39 | 40 | GNU/BSD `column` does something similar but does not support a concept of 41 | printed/rendered length (i.e. utf8/ANSI SGR color escape sequences). 42 | 43 | [^1]: Though [lc](https://github.com/c-blake/lc) is nicer in many ways. 44 | -------------------------------------------------------------------------------- /uce.nim: -------------------------------------------------------------------------------- 1 | when not declared(addFloat): import std/formatfloat 2 | import cligen/[mfile, mslice, strUt], adix/uniqce 3 | 4 | proc uce*(input="/dev/stdin", k=1024, re=0..5, fmt1="$val0 +- $err0", 5 | expF="($valMan +- $errV)$valExp") = 6 | ## Emit Unique Count Estimate of `input` lines to stdout. Algo is fast, low 7 | ## space 1-pass KMV over mmap | stream input. (For exact, see `lfreq`.) 8 | var uce = initUniqCe[float](k) 9 | for line in mSlices(input, eat='\0'): # RO mmap | slices from stdio 10 | when defined(cHash): 11 | let h = float(cast[uint64](hash(line)))*(1.0/1.8446744073709551615e19) 12 | else: # std/hashes(data) sadly only 32-bits! 13 | let h = float(cast[uint32](hash(line)))*(1.0/4294967295.0) 14 | uce.push h 15 | if fmt1.len == 0: # The 2 estimates to full float prec 16 | echo uce.nUnique, " ", uce.nUniqueErr 17 | else: # Near-exact to fmt as "15.00"; Err is technically hash-collision rate 18 | echo fmtUncertain(uce.nUnique, max(uce.nUniqueErr, 0.1), fmt1, expF, re) 19 | 20 | when isMainModule: 21 | import cligen # Wide defaults => drop 22 | clCfg.hTabCols = @[clOptKeys, clDflVal, clDescrip] #..the data type column 23 | include cligen/mergeCfgEnv # Allow cfg files for +- 24 | dispatch uce, help={"input": "input data path", 25 | "k" : "size of the sketch in float64 elts", 26 | "re" : "range of 10expon defining 'near 1'", 27 | "fmt1" : "fmt for uncertain num near 1", 28 | "expF" : "fmt for uncertain num beyond `re`"} 29 | -------------------------------------------------------------------------------- /doc/since.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | This is (mostly) a convenience program for something I often want to know or 4 | do in scripts. 5 | 6 | Usage 7 | ----- 8 | ``` 9 | since [NEED,optional-params] [paths: string...] 10 | 11 | Print files whose time is since|before refTime of refPath. 12 | 13 | Files examined = UNION of paths + optional delim-delimited input file (stdin if 14 | "-"|if "" & stdin is not a terminal), maybe recursed as roots. 15 | 16 | To print regular files m-older than LAST under CWD: 17 | since -t-m -pLAST -r0 . 18 | 19 | Options: 20 | -p=, --refPath= string NEED path to ref file 21 | -T=, --refTime= string "" stamp of ref file to use (if different) 22 | -t=, --time= string "m" stamp to compare ({-}[bamcv]*) 23 | -r=, --recurse= int 1 recurse n-levels on dirs; 0:unlimited 24 | -c, --chase bool false chase symlinks to dirs in recursion 25 | -D, --Deref bool false dereference symlinks for file times 26 | -k=, --kinds= set(FileKind) file i-node type like find(1): [fdlbcps] 27 | -q, --quiet bool false suppress file access errors 28 | -x, --xdev bool false block recursion across device boundaries 29 | -f=, --file= string "" optional input ("-"|!tty=stdin) 30 | -d=, --delim= char '\n' input file record delimiter 31 | -e, --eof0 bool false read dirents until 0 eof 32 | -n, --noDot bool false remove a leading . from names 33 | -u, --unique bool false only print a string once 34 | ``` 35 | Related Work 36 | ------------ 37 | GNU `find -*newer` does not support the new-ish Linux b-time and is also slow. 38 | -------------------------------------------------------------------------------- /doc/holes.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ========== 3 | 4 | Virtual machine or ISO disc images or other "file systems within a file" or 5 | sometimes object / database files can have intentionally large holes / sparsity. 6 | Corrupted torrent downloads may, meanwhile, have very large holes by accident. 7 | 8 | While these files can be easily identified with `ls -ls` or `stat` comparing the 9 | allocated blocks and seek-addressable file size, I could find no standard Unix 10 | command-line tool to count/list holes. (A quick web search will show numerous C 11 | programming examples to use the Unix API to list holes, though.) So here is 12 | (probably another) one. 13 | 14 | Usage 15 | ===== 16 | 17 | ``` 18 | holes [optional-params] [files: string...] 19 | 20 | Show hole & data segments for files 21 | 22 | -f=, --format= string "" emit format interpolating: 23 | $count : number of data|hole segments 24 | $path : path name of REGULAR FILE from $* 25 | $map : map of all data&hole segments 26 | $nul : a NUL byte 27 | "" => "$count\t$path\n$holes\n" 28 | ``` 29 | 30 | Example 31 | ======= 32 | 33 | ```sh 34 | truncate -s 5000 x; printf hi >>x; holes x 35 | ``` 36 | 37 | prints on a file system with 4096-byte blocks: 38 | 39 | ``` 40 | 2 x 41 | hole 4096 42 | data 906 43 | ``` 44 | 45 | Related 46 | ======= 47 | 48 | `filefrag` is a similar but distinct utility which uses a less portable Linux 49 | FIEMAP `ioctl`. Distinctness-wise, for example, I get "4 extents" from a 50 | `filefrag foo.xfs`, while `holes` reports 42814 hole|data segments[^1]. 51 | 52 | [^1]: This is admittedly after running `xfs_fsr`. 53 | -------------------------------------------------------------------------------- /doc/unfold.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | Programs sometimes have multi-row/multi-line outputs with decent regularity. 5 | You may want to "table-ify" such outputs for further processing, e.g. to do some 6 | quick arithmetic work across columns with an `awk` or [rp](rp.md) at the end of 7 | a shell pipeline. 8 | 9 | This can also be useful in extract-transform-load (ETL) contexts where you want 10 | to re-shape inputs to a table loading pipeline. 11 | 12 | Sometimes it seems more natural to create multi-row outputs and then pipe them 13 | to `unfold`. For example: 14 | ```sh 15 | cat /sys/devices/system/cpu/cpu0/cpufreq/base_frequency \ 16 | /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq | 17 | unfold -n2 | awk '{print $1/$2}' # Float arithmetic here 18 | ``` 19 | 20 | Usage 21 | ----- 22 | ``` 23 | unfold [optional-params] 24 | Join blocks of stdin lines into one line sent to stdout. 25 | -h, --help print this cligen-erated help 26 | --help-syntax advanced: prepend,plurals,.. 27 | -s=, --sep= string "\t" separates the old lines within the new 28 | -n=, --n= int 0 Join |n| lines into 1 29 | -b=, --before= string "" join blocks beginning with a matching line 30 | -a=, --after= string "" join blocks ending with a matching line 31 | -i, --ignore bool false regex are case-insensitive 32 | -e, --extended bool false regexes are nim re 'extended' syntax 33 | ``` 34 | 35 | Related Work 36 | ------------ 37 | There are ways to do this with `awk`|etc. directly, but require either state 38 | machine-think that distracts in the heat of the analysis moment or else some 39 | devoted `awk`|etc. scripts. You could think of this program as a replacement 40 | for some such scripts (that probably runs faster than them). 41 | -------------------------------------------------------------------------------- /tmpls.nim: -------------------------------------------------------------------------------- 1 | when not declared(stderr): import std/syncio 2 | import std/sugar, cligen, cligen/[sysUt, strUt, mslice, mfile, osUt], bu/esquo 3 | 4 | proc interPrint(f: File; tmpl: string, prs: seq[MacroCall]; str: SomeString) = 5 | for (id, arg, call) in prs: 6 | if id.idIsLiteral: f.urite tmpl, arg 7 | elif tmpl[id.a] == 's': f.urite str 8 | elif tmpl[id.a] == 'n': (if needQuo in str: f.sQuote str else: f.urite str) 9 | elif tmpl[id.a] == 'q': f.sQuote str 10 | elif tmpl[id.a] == 'e': f.escape str, need=needQuo 11 | else: f.urite tmpl, call 12 | 13 | proc tmpls(inp="/dev/stdin", nl='\n', outp="/dev/stdout", term='\n', meta='%', 14 | templates: seq[string]): int = 15 | ## Interpolate { %s)tring | %n)eed quoted | always %q)uoted | %e)scaped } into 16 | ## as many templates as given, writing back-to-back template-filled-in batches 17 | ## to stdout, with each individual template terminated by `term`. E.g.: 18 | ## ``find . -name '\*.c' -print|sed 's/.c$//' | tmpls %s.c %s.o %n.c %e.o`` 19 | if templates.len < 1: Help !! "Need some template; Full $HELP" 20 | let prs = collect(for t in templates: t.tmplParsed(meta)) 21 | let f = try: (if outp == "/dev/stdout": stdout else: open(outp, fmWrite)) 22 | except Ce: quit "could not open output: " & outp, 1 23 | for ms in mSlices(inp, sep=nl, eat='\0'): 24 | for i in 0 ..< templates.len: 25 | f.interPrint templates[i], prs[i], ms 26 | f.urite term 27 | 28 | when isMainModule: 29 | include cligen/mergeCfgEnv; dispatch tmpls, help={"templates": "templates...", 30 | "inp" : "input file of name 'stubs'", 31 | "nl" : "input string terminator", 32 | "outp" : "output file of expansions", 33 | "term" : "output string terminator", 34 | "meta" : "self-quoting meta for %sub"} 35 | -------------------------------------------------------------------------------- /tattr.nim: -------------------------------------------------------------------------------- 1 | import std/os, cligen/[sysUt, humanUt], cligen 2 | when not declared(stdout): import std/syncio 3 | 4 | proc tattr(attrs: seq[string]) = 5 | ## Emit to stdout an escape string activating text colors/styles, honoring 6 | ## $NO_COLOR & also reading ~/.config/cligen for $LC_THEME-based aliases. 7 | ## 8 | ## Non-color styles; Prefix with '-' to turn off. 9 | ## bold, faint, italic, inverse, hid, struck, blink (slow), BLINK (fast), 10 | ## under{line double dot dash curl}, over. 11 | ## 12 | ## Regular color keywords are in lower case; Bright bank in UPPER CASE: 13 | ## black, red, green, yellow, blue, purple, cyan, white 14 | ## BLACK, RED, GREEN, YELLOW, BLUE, PURPLE, CYAN, WHITE 15 | ## Colors are foreground by default. Pre-pend "on_" for Background. 16 | ## 17 | ## 256-color or true color terminals like xterm|st|kitty also support: 18 | ## {fbu}[0..23] for F)ORE/B)ACKgrnd U)NDER grey scale 19 | ## {fbu}RGB where R, G, B are in 0..5 20 | ## {fbu}RRGGBB with RR, GG, BB are in hexadecimal (true color) 21 | ## 22 | ## An element of color scale NAME {viridis hue wLen gray pm3d} can be chosen 23 | ## via: 24 | ## {fbu}sNAME<0.-1>[,..] 25 | ## where only `hue` and `wLen` take [,sat,val] optionally. "wLen" is for 26 | ## "waveLength" - (yes, I know RGB light is a mixture; terms are just to imply 27 | ## *rough* "spectral order" or hot..cold / cold..hot / "heat" map). 28 | ## 29 | ## off, none, NONE turn off all special graphics renditions while -fg, -bg 30 | ## turn off just ForeGround, BackGround embellishment. 31 | ## 32 | ## *NOTE* May Need "--" for -bg, -bold, etc. 33 | if attrs.len == 0: Value !! "\n Need >= 1 attrs. See tattr --help" 34 | stdout.write textAttrOn(attrs, plain=existsEnv("NO_COLOR")) 35 | 36 | dispatch tattr 37 | -------------------------------------------------------------------------------- /cfold.nim: -------------------------------------------------------------------------------- 1 | import std/re, cligen, cligen/sysUt 2 | when not declared(lines): import std/syncio 3 | 4 | iterator csplit(s: string, pat: Regex): tuple[body: string, sep: string] = 5 | ## Iterate over segments of a string split by a pattern, yielding (body,sep) 6 | ## tuples. This correctly handles cases where the string does or does not end 7 | ## in a sep and where all bodies are empty strings. 8 | var a, b: int 9 | var beg = 0 10 | while true: 11 | (a, b) = findBounds(s, pat, start = beg) #s[a..b] if found else (-1,0) 12 | if a == -1: 13 | break 14 | yield (s[beg .. a-1], s[a .. b]) 15 | beg = b + 1 16 | if beg < s.len: 17 | yield (s[beg..^1], "") 18 | 19 | proc cfold(suppress=false, ignore=false, extended=false, file="-", 20 | pattern: seq[string]) = 21 | ## `cfold` is to `fold` as `csplit`(1) is to `split`(1). ``pattern`` is an rx 22 | ## at which to segment input lines in `file`. This can also be done with GNU 23 | ## sed, but ergonomics of getting \\n into expressions are poor. 24 | var flags = {reStudy} 25 | if ignore: flags.incl reIgnoreCase 26 | if extended: flags.incl reExtended 27 | if pattern.len != 1: 28 | Help !! "Need exactly one pattern; Full $HELP" 29 | let pat = re(pattern[0], flags) 30 | for line in lines(if file != "-": open(file) else: stdin): 31 | for segment in csplit(line, pat): 32 | stdout.write(segment.body) 33 | if not suppress: 34 | stdout.write(segment.sep) 35 | stdout.write("\n") 36 | 37 | include cligen/mergeCfgEnv 38 | dispatch cfold, help={"file" : "input file (\"-\" == stdin)", 39 | "ignore" : "add ignore case to re flags", 40 | "extended": "nim re 'extended' syntax", 41 | "suppress": "exclude matched strings"} 42 | -------------------------------------------------------------------------------- /doc/adorn.md: -------------------------------------------------------------------------------- 1 | # Motivation 2 | 3 | A useful semi-frequent transformation of a text table is something like: 4 | 5 | ```sh 6 | awk '{$col = prefix $col suffix; print $0}' 7 | ``` 8 | BUT this normalizes whitespace between columns, messing up terminal alignment 9 | if any was present. `adorn` seeks to be less disruptive. 10 | 11 | There are, of course, Perl/Python solutions, but the body of the Nim code is 12 | only 28 lines and it runs much faster (>7X in informal timings). 13 | 14 | # Usage 15 | ``` 16 | adorn [optional-params] colNums (origin-origin column numbers) 17 | 18 | input-output filter to adorn fields by adding prefix &| suffix to delim-ited 19 | colNums, preserving ambient text. colNums, prefix, suffix SHARE INDEXING (so 20 | you may need to pad with ""). E.g.: 21 | 22 | paste <(seq 1 3) <(seq 4 6) <(seq 7 9) | adorn -pA -sB 1 -pC 3 23 | 24 | Options: 25 | --origin= int 1 origin for colNums; 0 => signed indexing 26 | -O, --O0 bool false shorthand for --origin=0 27 | -p=, --prefix= strings {} strings to prepend to listed columns 28 | -s=, --suffix= strings {} strings to append to listed columns 29 | -i=, --input= string "" path to mmap|read as input; "" => stdin 30 | -r=, --rowDlm= char '\n' input row delimiter character 31 | -d=, --delim= string "w" input field dlm chars; len>0=>fold;w=white 32 | -o=, --output= string "" path to write output file; "" => stdout 33 | ``` 34 | 35 | # Examples 36 | 37 | Add an explicit field label somewhere (possibly for additional post-processing): 38 | ```sh 39 | seq 1 9 | adorn -p 'label: ' 1 40 | ``` 41 | 42 | Make `$argv[0]` inverse { using [`tattr`](tattr.md) } in a cb0 `--style=basic` 43 | [`procs display`](https://github.com/c-blake/procs) listing: 44 | ```sh 45 | pd -sb | adorn -p$(tattr inverse) -s$(tattr -- -inverse) 8 46 | ``` 47 | -------------------------------------------------------------------------------- /doc/okpaths.md: -------------------------------------------------------------------------------- 1 | Basics 2 | ------ 3 | Usage: 4 | ``` 5 | okpaths ENVAR [DELIM(:) [ITYPE{bcdpfls}(d) [PERMS{rwx}(x) [DEDUP{FL*}(F)]]]] 6 | ``` 7 | The [] notation here indicates optionality and defaults are in (). 8 | 9 | This program echos re-assembled value for `$ENVAR` delimited by ASCII character 10 | `DELIM`. Each retained element is i-node type `ITYPE` with permissions `PERMS`. 11 | 12 | & optional de-duplication. 13 | 14 | Eg., PATH=`okpaths PATH` keeps only existing (d)irs executable(x) by an invoking 15 | user. DEPDUP starting with 'F' means keep F)irst use, while 'L' keeps L)ast use 16 | & other means no de-dup (this is case-insensitive). So, eval `okpaths PATH` is 17 | nice in rc/init scripts for Unix shells. 18 | 19 | Blocks of the 5 params can repeat (since fork&exec add to shell init time). 20 | 21 | The i-node type abbreviation is the somewhat standard (`ls -l` | `find`): 22 | * b (B)lock device 23 | * c (C)haracter device 24 | * d (D)irectory 25 | * p named (P)ipe/FIFO 26 | * f Regular (F)ile 27 | * l Symbolic (L)ink 28 | * s Unix domain (S)ocket 29 | 30 | Motivation 31 | ---------- 32 | `eval $(okpaths PATH : d rx u)` is useful in shell start-up scripts (like 33 | `~/.profile`) where you might assemble a search path or man path or et cetera 34 | from a variety of *possible* locations, but then want to trim the value down to 35 | locations valid at shell init. 36 | 37 | This trimming makes `echo $ENVAR` less noisy and may prevent annoying extra, 38 | unneeded work during start-up of dependent programs. Sometimes this extra work 39 | can be quite a lot, (e.g. with a slow NFS automounter), although just running 40 | `okpaths` will have to do it at least once. 41 | 42 | Note that login shells can be very long-lived and FS availability dynamic. So, 43 | validity at `okpaths`/shell start-up-time is not a perfect solution. 44 | -------------------------------------------------------------------------------- /doc/ndelta.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | Often one runs a program twice - one way and the other way - to see how some 4 | parameter/input/mode/.. changes things. One then wants to compare the output of 5 | something reported - maybe resource consumption like time/space or success 6 | amounts or other accuracy parameters or other numeric outputs. So, the output 7 | is "largely" identical (or can be made so with sorting) except for "numbers" in 8 | different areas of the report. This situation is what `ndelta` is for. 9 | 10 | Usage 11 | ----- 12 | ``` 13 | ndelta [optional-params] [paths: string...] 14 | 15 | Replace numbers in token-compatible spots of paths[0] & paths[1] with (absolute 16 | | ratio | relative | perCent) deltas. To trap out-of-order data, differences in 17 | context are highlighted unless sloppy is true. 18 | 19 | -k=, --kind= DKind ratio DiffKind: absolute, ratio, relative, perCent 20 | -d=, --delims= string "white" repeatable delim chars 21 | -n=, --n= int 3 FP digits to keep 22 | -s, --sloppy bool false allow non-numerical context to vary silently 23 | ``` 24 | A relative difference here is the `ratio - 1.0` while `perCent` is that 25 | multiplied by 100. 26 | 27 | Presently, `ndelta` has some sanity checks (total token count equality) to 28 | help the report be meaningful in the way intended and a sloppy mode to let 29 | context/delimiters vary but be reported in the output. 30 | 31 | Related Work 32 | ------------ 33 | There is, of course, the ever-present `diff` possibly combined with my `hldiff` 34 | to highlight sections, but this only presents textual differences while one 35 | often wants numeric (one of the 4 kinds currently supported by `ndelta`). 36 | `ndelta` is a very simple program. Variants of it have surely been done many 37 | times. If me not mentioning one here bugs you, bug me and I'll mention it. :) 38 | -------------------------------------------------------------------------------- /noa.nim: -------------------------------------------------------------------------------- 1 | import std/cmdline {.all.} # cmdCount, cmdLine 2 | from std/strutils import strip 3 | from std/parseutils import parseInt 4 | from std/sugar import collect 5 | 6 | # Wrap a command to emit last non-option arg 7 | const use = "(n)on-(o)ption (a)rgument usage:\n\n" & 8 | " noa {index} options-and-args\n\n" & 9 | "E.g.: noa -1 cp -a foo -f -- /exists/maybe/missing\n" & 10 | "emits \"/exists/maybe/missing\" no matter where \"--\" is.\n" & 11 | "Can be nice in scripts to e.g. ensure must-haves exist." 12 | 13 | iterator nonOpts(): int = # Any alternative to Unix -- convention? 14 | var optsDone = false 15 | for i in 2 ..< cmdCount: # skip $0 = noa and $1 = idx .. BUT 16 | if optsDone: yield i #..yield unadjusted `cmdLine` indices. 17 | else: 18 | let a = cmdLine[i] #NOTE: All OSes terminate with \0 19 | if a[0] == '-': # Some kind of option | end of options 20 | if a[1] == '-' and a[2] == '\0':# "--": end of options 21 | optsDone = true 22 | else: yield i 23 | 24 | if cmdCount < 3: quit use, 1 # Use cmdCount,Line not paramCount,Str.. 25 | let dlr1 = $cmdLine[1] #..to avoid string creation w/giant argv 26 | if dlr1 in ["-h", "--help"]: echo use; quit 0 27 | 28 | var ix: int 29 | let bare = dlr1.strip 30 | if bare.len == 0 or parseInt(bare, ix) != bare.len: 31 | quit "\"" & bare & "\"" & " is not an integer.\n\n" & use, 2 32 | 33 | if ix < 0: 34 | let ixes = collect(for i in nonOpts(): i) 35 | let ix = ixes.len + ix 36 | if ix >= 0 and ix < ixes.len: echo cmdLine[ixes[ix]] 37 | else: quit "noa index " & bare & " out of bounds\n\n" & use, 3 38 | else: 39 | var ixCt = ix 40 | for i in nonOpts(): 41 | if ixCt == 0: echo cmdLine[i]; quit 0 42 | dec ixCt 43 | quit "noa index " & bare & " out of bounds\n\n" & use, 3 44 | -------------------------------------------------------------------------------- /notIn.nim: -------------------------------------------------------------------------------- 1 | import std/[sets, os, strutils], cligen/[sysUt, osUt] 2 | when not declared(stdin): import std/syncio 3 | 4 | proc doNotIn*(file="", delim='\0', term='\0', pattern="$1", invert=false, 5 | roots: seq[string]) = 6 | ## Find files under `roots` NOT matching `pattern` applied to any `file` entry. 7 | ## E.g.: 8 | ## `(cd D1; find . -print0) | notIn D2 D3 | xargs -0 echo` 9 | ## echoes every entry under *D2* or *D3* not also under *D1*. Input paths are 10 | ## normalized to nix empty path components (e.g. 1st & 3rd in "./foo/./bar"). 11 | ## `find -path A -o -path B ..` can do this, but is hard for many paths. 12 | if "$1" notin pattern: 13 | Value !! "`pattern` must contain \"$1\" somewhere" 14 | var pats = initHashSet[string]() # Build up a big HashSet[string] 15 | let file = if file.len == 0: stdin else: open(file) 16 | for pat in getDelim(file, delim): 17 | if (let pat = pat.normalizedPath; pat.len > 0): 18 | pats.incl pattern % [ pat ] 19 | let filter = {pcFile, pcLinkToFile, pcDir, pcLinkToDir} 20 | for root in roots: # Now walk roots listing (mis)matches 21 | let root = if root.endsWith("/"): root[0..^2] else: root 22 | try: 23 | for path in walkDirRec(root, filter): 24 | let pat = path[root.len+1..^1] 25 | if (invert and pat in pats) or pat notin pats: 26 | stdout.write path, term 27 | except Ce: 28 | erru "could not recurse into ",root,"\n" 29 | 30 | when isMainModule: 31 | import cligen; include cligen/mergeCfgEnv 32 | dispatch doNotIn, cmdName="notIn", short={"invert": 'v'}, help={ 33 | "file" : "delimited input ( `\"\"` => ``stdin`` )", 34 | "delim" : "input path delimiter", 35 | "term" : "output path terminator", 36 | "pattern": "a \\$1-containing under `roots` pattern", 37 | "invert" : "find files that *do* match a `file` entry"} 38 | -------------------------------------------------------------------------------- /doc/tmath.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ========== 3 | Sometimes rather than converting to/from epoch seconds you prefer to embrace the 4 | International Date Line. ;) E.g., often the fastest way to do "date subtraction" 5 | is converting to [rata die](https://en.wikipedia.org/wiki/Rata_Die). Or a media 6 | player needs input in [[H:][M:]S and you prefer `$(tmath hms 2345)="39:05"` or 7 | worse there are start time/end time/length calcs to toil through. 8 | 9 | Since this may be useful as a lib not just as a CLI, the module is under bu/. 10 | This is amongst the least novel code here likely with *many* copies of the core 11 | ideas out in the world, but it's simple enough and I've used it enough over the 12 | last few years that it seemed worth including. 13 | 14 | Examples 15 | ======== 16 | ```sh 17 | $ echo $(($(tmath r 2017-06-06)-$(tmath r 2001-01-01))) 18 | 6000 19 | $ tmath + 10:20:30 \ -4:5:6 20 | 6:15:24 21 | ``` 22 | 23 | Usage 24 | ===== 25 | While `tmath h` will dump it all, these subcommands do not take real options, 26 | just lists of what they say they take. Y4-M-D refers to a date formatted like 27 | 2000-1-31 or 1996-07-04. 28 | ``` 29 | Various calendar & time-of-day math routines that operate directly on broken 30 | down representations with a convenient CLI. 31 | 32 | tmath {SUBCMD} [sub-command options & parameters] 33 | where {SUBCMD} is one of: 34 | help print comprehensive or per-cmd help 35 | julians Julian Days for given Y4-M-D Gregorian dates 36 | dates Get Gregorian date for a given Julian Day in 8 integer divides 37 | rataDies Days since Gregorian 1/1/1 for given Y4-M-D dates (1div, 1cacheLn) 38 | gregorys Gregorian dates given days since 1/1/1 (in 4 int divs). 39 | toHMS Get all elements of seconds as H:M:S 40 | seconds Get all elements of hmses as seconds 41 | addHMS H:M:S sum of H:M:S args[0] and H:M:S args[1] (quote "space-") 42 | + alias for addHMS 43 | ``` 44 | -------------------------------------------------------------------------------- /doc/memlat.md: -------------------------------------------------------------------------------- 1 | A little utility to measure latency at various levels of the memory hierarchy. 2 | Only READ/load latency right now, though. 3 | 4 | This was basically inspired by the discussion here (forum.nim-lang.org seems 5 | to no longer render many historical posts - sorry - that is one reason why 6 | I am re-posting this code here): 7 | 8 | https://forum.nim-lang.org/t/5734#35832 9 | 10 | as well as some lame arguments about cache locality in the context of very hard 11 | to measure cold-cache (& cold branch predictor) hash tables where vanilla linear 12 | probing basically always wins (sometimes by a lot) in spite of the fact that hot 13 | everything L1 micro-benchmarks can make it seem like "pseudorandom probing" can 14 | have a (small, probably not CPU-portable) edge. 15 | 16 | The `--kind=ranElt` and `=truRan` tests here basically emulate hash lookups 17 | while the `--kind=shuff` emulates cold cache memory loads (but branch predictors 18 | are still hot cache) or a load pattern more like hopping a long linked list or 19 | a very deep tree. (`truRan` only works on Linux right now.) 20 | 21 | This utility (in shuffle mode) is actually not so bad a way to measure memory 22 | systems against each other at various data scales. I see a great deal of 23 | variation in main memory/DIMM latencies which are not (often) covered in 24 | marketing speak like "DDR-N", but often very impactful on performance. 25 | 26 | ``` 27 | Usage: 28 | lat [optional-params] 29 | Time latency three ways. shuffle measures real latency. 30 | -k=, --kind= Algo shuff shuff: chase ran perm 31 | ranElt: access ran elt 32 | truRan: pre-read getrandom 33 | -s=, --sizeKiB= int 1048576 set sizeKiB 34 | -n=, --nAcc= int 1000000 set nAcc 35 | -a=, --avgN= int 4 set avgN 36 | -m=, --minN= int 4 set minN 37 | --seed= int 0 0=>random, else set 38 | ``` 39 | -------------------------------------------------------------------------------- /doc/chom.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | On multi-user systems and servers filesystem permissions can matter. Often one 5 | wants a restrictive umask (e.g. 077) for file/directory creation as a "safe 6 | default". Then sometimes you want to "open up" perms on some entire file 7 | sub-tree..say to collaborate with some other user. 8 | 9 | While you can do `chown -R` there is no "only directories" filter or handle 10 | user-executable files differently option. One can do wrapper scripts with 11 | `find`, but using `chom` is more efficient for both users and the system. 12 | One can also do fancy Zsh recursive globbing like `**(.x)`, but at least to 13 | me the ergonomics of `chom` are better than either of these options. 14 | 15 | Usage 16 | ----- 17 | ``` 18 | chom [optional-params] [paths: string...] 19 | 20 | This enforces {owner, group owner, permissions} for {dirs, non-executable other 21 | files, and user-executable files}. This only makes chown/chmod syscalls when 22 | needed, both for speed & not to touch ctime unnecessarily. It does not handle 23 | ACLs, network FS defined access, etc. Return zero if no calls are needed. 24 | 25 | Options: 26 | -v, --verbose bool false print chown and chmod calls as they happen 27 | -q, --quiet bool false suppress most OS error messages 28 | -n, --dry-run bool false only print what system calls are needed 29 | -r=, --recurse= int 0 max recursion depth for any dir in paths 30 | -c, --chase bool false follow symbolic links to dirs in recursion 31 | -x, --xdev bool false block recursion across device boundaries 32 | -o=, --owner= string "" owner to set; may need root; defl=self 33 | -g=, --group= string "" group owner to set; defl=primaryGid(self) 34 | -d=, --dirPerm= Perm 2755 permission mask for dirs 35 | -f=, --filePerm= Perm 664 permission mask for files 36 | -e=, --execPerm= Perm 775 permission mask for u=x files 37 | -------------------------------------------------------------------------------- /doc/lncs.md: -------------------------------------------------------------------------------- 1 | `lncs` (pronounced links) is much like `cligen/examples/dups` but for clusters 2 | of hard-links, not elsewise duplicate files. 3 | 4 | `lncs` searches within paths of maybe-chasing, maybe-recursive closure of the 5 | UNION of roots and optional dlm-delimited input file (stdin if "-"|if "" & stdin 6 | not a tty). 7 | 8 | Exit code is min(255, num.clusters >= thresh). 9 | 10 | Eg., 11 | ``` 12 | find -print0|lncs -d\0 -o\0 -e\0 13 | ``` 14 | makes a report reliably splittable on double-NUL then single-NUL for fully 15 | general path names while `lncs -ls -n0 -r0 /` echoes a summary. 16 | 17 | There are a few knobs to filter out some common cases like small files or 18 | only include regular files, etc., but `find` can of course do all this and 19 | much more as an input generator. 20 | 21 | ``` 22 | Usage: 23 | lncs [optional-params] filesystem roots 24 | 25 | -f=, --file= string "" optional input ("-"|!tty=stdin) 26 | -d=, --dlm= char '\n' input file delimiter (0->NUL) 27 | -r=, --recurse= int 1 recurse n-levels on dirs; 0:unlimited 28 | -c, --chase bool false follow symlinks to dirs in recursion 29 | -X, --xdev bool false block recursion across device boundaries 30 | -0, --eof0 bool false read dirents until 0 eof 31 | -k=, --kinds= set(FileKind) file i-node type like find(1): [fdlbcps] 32 | -m=, --minSize= int 0 minimum file size 33 | -t=, --thresh= int 2 smallest hard link cluster to count 34 | -q, --quiet bool false suppress file access errors 35 | -l=, --log= set(LncsLog) osErr >stderr{osErr, summary} 36 | -n=, --nEcho= int -1 num to print; 0: none; -1: unlimited 37 | -., --noDot bool false remove a leading . from names 38 | -o=, --outDlm= string "\t" output internal delimiter 39 | -e=, --endOut= string "\n" output record terminator 40 | ``` 41 | -------------------------------------------------------------------------------- /doc/uce.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ========== 3 | It can sometimes help to have an estimate of the number of unique/distinct items 4 | from a large, possibly compressed-on-storage input stream. E.g., you may want to 5 | spend 1-pass over the data to know if you could fit a hash table containing all 6 | keys in available space (& even pre-size such a table to avoid growth costs). 7 | If you cannot then you may need some other estimation approach. 8 | 9 | KMV Sketch Method 10 | ================= 11 | For a low price of just `k` max value entries[^1] you can get a pretty good 12 | estimate with error ~ 1/sqrt(k). 13 | [adix/uniqce.nim](https://github.com/c-blake/adix/blob/master/adix/uniqce.nim) 14 | has more details.[^2] 15 | 16 | Usage[^3] 17 | ========= 18 | ``` 19 | uce [optional-params] 20 | 21 | Emit Unique Count Estimate of input lines to stdout. Algo is fast, low space 22 | 1-pass KMV over mmap | stream input. (For exact, see lfreq.) 23 | 24 | -i=, --input= "/dev/stdin" input data path 25 | -k=, --k= 1024 size of the sketch in float64 elts 26 | -r=, --re= 0..5 range of 10expon defining 'near 1' 27 | -f=, --fmt1= "$val0 +- $err0" fmt for uncertain num near 1 28 | -e=, --expF= "($valMan +- $errV)$valExp" fmt for uncertain num beyond `re` 29 | ``` 30 | Empty string for `fmt1` produces two columns of float at full precision. 31 | 32 | Examples 33 | ======== 34 | ```sh 35 | $ (seq 1 50; seq 1 50) | uce 36 | 50.00 +- 0.10 37 | 38 | $ (seq 1 5000000; seq 1 5000000) | uce 39 | (5.09 +- 0.16)e+06 40 | ``` 41 | 42 | [^1]: A `k` fitting in an L1 data cache yields O(1%) estimates. 43 | 44 | [^2]: This is by far the simplest sketch family along these lines - 45 | conceptually, code, etc. 46 | 47 | [^3]: BTW, in my head I pronounce "uce" like the tail of "Bruce". { And yes I am 48 | aware that UCE also stands for Unsolicited Commercial Email aka "spam". I hope 49 | you like this tool better than that, at least. ;-) } 50 | -------------------------------------------------------------------------------- /fsids.nim: -------------------------------------------------------------------------------- 1 | when not declared(stderr): import std/syncio 2 | include cligen/unsafeAddr 3 | import std/[tables, algorithm, posix], cligen/[dents, osUt, posixUt, statx] 4 | type IdKind = enum user, group, all="both" 5 | type Order = enum id, count 6 | 7 | proc print*[Id](hdr: string, ids: Table[Id, int], nm: Table[Id, string], 8 | order=id) = 9 | var pairs, sorted: seq[tuple[id: Id, count: int]] 10 | for id, cnt in ids: pairs.add (id, cnt) 11 | case order 12 | of id : sorted = pairs.sortedByIt( it[0]) 13 | of count: sorted = pairs.sortedByIt(-it[1]) 14 | echo hdr, "\tNentry\tName" 15 | for tup in sorted: 16 | echo tup[0], "\t", tup[1], "\t", nm.getOrDefault(tup[0], "MISSING") 17 | 18 | proc fsids*(roots: seq[string], kind=all, order=id, 19 | recurse=0, follow=false, xdev=false, eof0=false) = 20 | ## Print a histogram of uids and/or gids used by a file tree 21 | var uids: Table[Uid, int] 22 | var gids: Table[Gid, int] 23 | let doU = kind in { user , all } 24 | let doG = kind in { group, all } 25 | for root in (if roots.len > 0: roots else: @[ "." ]): 26 | forPath(root, recurse, true, follow, xdev, eof0, stderr, 27 | depth, path, nmAt, ino, dt, lst, dfd, dst, did): 28 | if doU: uids.mgetOrPut(lst.st_uid, 0).inc 29 | if doG: gids.mgetOrPut(lst.st_gid, 0).inc 30 | do: discard # No pre-recurse 31 | do: discard # No post-recurse 32 | do: recFailDefault("fsids", path) # cannot recurse 33 | if doU: print "#Uid", uids, users() , order 34 | if doG: print "#Gid", gids, groups(), order 35 | 36 | when isMainModule:import cligen;include cligen/mergeCfgEnv;dispatch fsids,help={ 37 | "kind" : "kind of ids to report user, group, both", 38 | "order" : "sort order: up by id or down by count", 39 | "recurse": "recursion limit for dirs in `roots`; 0=unbounded", 40 | "follow" : "follow symbolic links to dirs in recursion", 41 | "xdev" : "block recursion from crossing devices" } 42 | -------------------------------------------------------------------------------- /holes.nim: -------------------------------------------------------------------------------- 1 | import std/[posix, strutils] 2 | when not declared(File): import std/syncio 3 | 4 | iterator holes*(fd: cint): (bool, int) = 5 | const SEEK_DATA = cint(3) 6 | const SEEK_HOLE = cint(4) 7 | const what = [SEEK_HOLE, SEEK_DATA] 8 | let eof = lseek(fd, 0, SEEK_END) 9 | var pos = lseek(fd, 0, SEEK_HOLE) 10 | var hole = pos == 0 11 | if pos > 0: 12 | yield (hole, pos) 13 | errno = 0.cint # Clear any earlier ENXIO's 14 | while pos < eof and errno != ENXIO: 15 | if (let new = lseek(fd, pos, what[hole.int]); new != -1): 16 | if new - pos > 0: 17 | yield (hole, new - pos) 18 | pos = new 19 | hole = not hole 20 | if eof - pos > 0: 21 | yield (hole, eof - pos) 22 | 23 | proc sholes(format="", files: seq[string]) = 24 | ## Show hole & data segments for `files` 25 | const name = ["data", "hole"] 26 | let format = if format.len != 0: format else: "$count $path\n$map" 27 | let needMap = "$map" in format or "${map}" in format 28 | let userTerm = "$zero" in format or "${zero}" in format 29 | var m: string 30 | for file in files: 31 | if (let fd = open(file.cstring, O_RDONLY); fd >= 0): 32 | m.setLen 0 33 | var n = 0 34 | for (hole, size) in fd.holes: 35 | inc n 36 | if needMap: 37 | m.add '\t'; m.add name[hole.int] 38 | m.add '\t'; m.add $size 39 | m.add '\n' 40 | discard fd.close # Can fail on netFSes; No recovery really possible 41 | stdout.write format % ["count",$n, "path",file, "map",m, "nul","\0"] 42 | if not userTerm and not needMap: 43 | stdout.write '\n' 44 | 45 | when isMainModule: 46 | import cligen;include cligen/mergeCfgEnv;dispatch sholes,cmdName="holes",help={ 47 | "format": """emit format interpolating (braces ok for flush-text): 48 | $count : number of data|hole segments 49 | $path : path name of REGULAR FILE from $\* 50 | $map : map of all data&hole segments 51 | $zero : a NUL byte 52 | \"\" => \"$count\\t$path\\n$holes\\n\""""} 53 | -------------------------------------------------------------------------------- /bu/emin.nim: -------------------------------------------------------------------------------- 1 | import std/[stats, algorithm], bu/eve 2 | 3 | type MinEst* = tuple[est, err: float] ## An uncertain estimate of a minimum 4 | 5 | template eMin*(k=2, n=7, m=3, get1): untyped = 6 | ## This template takes as its final parameter any Nim code block giving one 7 | ## `float` (probably a delta time) and gives a `MinEst` by a best k/n m-times 8 | ## approach. `doc/tim.md` has details; `bu/tim.nim` is a CLI utility example. 9 | #IDEA: Check m-sampling same via Anderson-Darling(minTail-weighted/clipped). 10 | var xall: seq[float] 11 | var sest: RunningStat 12 | let a = k.a_ik 13 | for outer in 1..m: 14 | var samp: seq[float] 15 | for inner in 1..n: samp.add (block: get1) 16 | samp.sort 17 | sest.push samp.eLE(a) 18 | xall.add samp 19 | (est: xall.eLE(a_ik(2*k)), err: sest.standardDeviation) #/sqrt(m.float)4big m? 20 | 21 | when isMainModule: 22 | import cligen 23 | when defined test: 24 | when not declared(addFloat): import std/formatFloat 25 | proc minE(k: int, x: seq[float]) = 26 | var x = x; x.sort 27 | echo eLE(x, k.a_ik) 28 | x.reverse; echo "flipped method, just basic estimate" 29 | let off = 2*x[0] + - x[^1] 30 | echo "off: ", off 31 | for e in mitems x: e = off - e 32 | echo off - x.ere(k.a_ik) # , off # for debugging 33 | dispatch minE, help={"k":"2k=num of order stats", "x":"1-D / univar data.."} 34 | else: 35 | import cligen/strUt; include cligen/mergeCfgEnv 36 | proc minE(warmup=1, k=2, n=7, m=3, ohead=0, x: seq[float]) = 37 | ## Emit a minimum estimator of `x` with its uncertainty 38 | if x.len != warmup + n*m: 39 | quit "warmup, n, m mismatch given x[]; Run with --help for more.", 1 40 | var i = warmup - 1 41 | let (est, err) = eMin(k, n, m, (inc i; x[i])) 42 | echo fmtUncertain(est, err, e0= -2..5) 43 | dispatch minE,cmdName="emin", help={"x":"x1 x2..", "warmup":"initial skip", 44 | "k":"k for eLE", "n":"n for eLE", "m":"outer reps", "ohead":"ignored"} 45 | -------------------------------------------------------------------------------- /doc/notIn.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | https://github.com/c-blake/ndup has a POSIX shell script, sh/ndup which keeps 5 | a mirrored set of files related to some source files. A natural course of file 6 | management (especially with duplicate/near duplicate removal in play) is the 7 | user creating new files or directories, renaming old ones, etc. With any sort 8 | of mirrored hierarchy of derived files, this induces a need to clean up stale 9 | derivations. This is what `notIn` helps do. 10 | 11 | The salient line in the above mentioned script is: 12 | ```sh 13 | notIn -f$w/f0 $w/digs $w/sets | xargs -0 rm -fv 14 | ``` 15 | This will remove any files under digs/ or sets/ that are *not* in the path list 16 | file `f0` (hence the program name `notIn`). 17 | 18 | I have not used it for this personally, but another example use case might be a 19 | parallel hierarchy used for `lc -X,--extra` extra parameter values for 20 | [lc](https://github.com/c-blake/lc) only just where a user has permission to 21 | write. In this case, due to the nature of `lc`, you probably only care about 22 | stale directories. 23 | 24 | In general, parallel file trees can be an interesting tool both conceptually 25 | and practically and `notIn` can help to maintain them/query disparities/etc. 26 | 27 | Usage 28 | ----- 29 | ``` 30 | notIn [optional-params] [roots: string...] 31 | 32 | Find files under roots NOT matching pattern applied to any file entry. E.g.: 33 | (cd D1; find . -print0) | notIn D2 D3 | xargs -0 echo 34 | echoes every entry under D2 or D3 not also under D1. 35 | 36 | Input paths are normalized to nix empty parts (e.g. 1st&3rd in "./foo/./bar"). 37 | 38 | find -path A -o -path B .. can do this, but is hard for many paths. 39 | 40 | -f=, --file= string "" delimited input ( "" => stdin ) 41 | -d=, --delim= char '\x00' input path delimiter 42 | -t=, --term= char '\x00' output path terminator 43 | -p=, --pattern= string "$1" a $1-containing under roots pattern 44 | -v, --invert bool false find files that do match a file entry 45 | ``` 46 | -------------------------------------------------------------------------------- /fpr.nim: -------------------------------------------------------------------------------- 1 | when not declared(stderr): import std/syncio 2 | import cligen, cligen/[mfile, osUt], std/strutils 3 | 4 | iterator fprs(paths: (iterator(): string)): 5 | tuple[pages: tuple[resident, total: int], path: string, err: int] = 6 | var empty: tuple[resident, total: int] 7 | for path in paths(): 8 | when defined(windows): 9 | yield (pages: empty, path: path, err: 1) 10 | else: 11 | if (let mf = mopen(path); mf != nil): 12 | yield (pages: mf.inCore, path: path, err: 0) 13 | mf.close 14 | else: 15 | yield (pages: empty, path: path, err: 1) 16 | 17 | type Emit = enum summary, detail, errors 18 | 19 | proc fpr(file="", delim='\n', emit={summary}, paths: seq[string]): int = 20 | ## File Pages Resident. Examine UNION of `paths` & optional `delim`-delimited 21 | ## input `file` (stdin if "-"|"" & stdin not a tty). Eg., `find -print0 | fpr 22 | ## -d\\0`. Like util-linux `fincore`, but more Unix-portable & summarizing. 23 | var nErr, r, t, nFile: int # Track numErr, resid, total pages 24 | for y in fprs(both(paths, fileStrings(file, delim))): 25 | nFile.inc # Update number of files & stats for 26 | r.inc y.pages.resident # y)ielded tuples 27 | t.inc y.pages.total 28 | nErr.inc y.err 29 | if errors in emit and y.err != 0: # Ignore errors from zero length files? 30 | stderr.write "fpr: error: \"", y, "\" (zero length/special file?)\n" 31 | if detail in emit: 32 | echo y.pages.resident," of ",y.pages.total," pages resident in ",y.path 33 | if summary in emit and nFile > 0: 34 | echo r," of ",t," pages ", formatFloat(r.float/t.float*100.0, ffDecimal, 2), 35 | "% resident in ",nFile," files ",nErr," errors" 36 | min(nErr, 127) # Exit with appropriate status 37 | 38 | include cligen/mergeCfgEnv 39 | dispatch fpr, help={"file" : "optional input (\"-\"|!tty=stdin)", 40 | "delim": "input file delimiter (\\0->NUL)", 41 | "emit" : "Stuff to emit: *summary* *detail*"} 42 | -------------------------------------------------------------------------------- /bu/testf.nim: -------------------------------------------------------------------------------- 1 | ##[ `vip` caches these answers for UI logic simplicity & efficiency. Stale info 2 | can be displayed if FS changes fast relative to interactive pick sessions. ]## 3 | 4 | import std/posix # .so source for use in `dirs|vip -k libtestf.so:cdable` 5 | 6 | var cpath = "" # Reused path buffer to be NUL/\0-term 7 | proc cdable(path: pointer, nPath: clong): cint {.noconv, exportc, dynlib.} = 8 | if nPath == 0 or path.isNil: 9 | return 0 10 | cpath.setLen nPath # `vip` does not open any files post `parseIn()` 11 | copyMem cpath[0].addr, path, nPath 12 | cint(chdir(cast[cstring](cpath[0].addr)) == 0) 13 | #NOTE: Above assumes strings come as rooted paths ("/a/b/leafDir"). Lacking a 14 | # leading "/" makes it a relative path which can succeed relative to the 15 | # (newly, per all our chdirs) current working directory, but which would 16 | # fail relative to the original parent process. 17 | 18 | #[ To cursor down, `vip` must test one at a time until a success. To get more 19 | async/scalable needs a batch interface with forked kids which an ok idea since a 20 | hanging NFS mount can hang a kid process & we might want to kill it. Laziness 21 | of outer validation may mean only 0..3 timeouts in any given UI interaction. 22 | So, they could be made 50..100ms. There may be a way to build a critbit tree, 23 | monitor /proc/mounts, and only time out once per mount prefix or etc. Of 24 | course, a file system could also come back online at any moment as well. 25 | 26 | On systems with nice enough terminal interaction for `vip` to make sense but 27 | poor dynamically loadable lib support (are there any??), `vip` *could* add a 28 | `--validation-coprocess=foo` to make a kid process to delegate requests to, 29 | mostly blocked on IPC read, but ready to read a path, do whatever user-program 30 | tests & write 1-byte. While 2 syscalls per request (in both parent & kid + 31 | whatever validation work), they are at least fast-ish pipe-IO calls. 32 | 33 | This module could grow a large family of `bu/ft`-like tests. PRs welcome if 34 | this would help your specific application setting. ]# 35 | -------------------------------------------------------------------------------- /jointr.nim: -------------------------------------------------------------------------------- 1 | import std/[tables, strutils], cligen/[sysUt, osUt, mfile, mslice], cligen 2 | 3 | proc jointr*(cont=" ", boc="<... ", eoc=" resumed>", all=false, 4 | path: seq[string]) = 5 | ## Multi-process programs are often usefully debugged via something like 6 | ## `strace --decode-fds -fvs8192 -oFoo multi-process-program` 7 | ## but this breaks up a system call execution suspension into top & bottom 8 | ## halves with "..." indicators like: 9 | ## PID (.\*)" \\\n .. samePID <... CALL resumed> 10 | ## where top-half parameters are elided in bottom half resumption. This 11 | ## program joins these lines for easier reading, optionally retaining the 12 | ## "unfinished" to aid temporal reasoning. In said retention mode, 13 | ## never-resumed calls print in hash order at the end. 14 | var top: Table[MSlice, MSlice] 15 | let sep = initSep "white" 16 | var cols: seq[MSlice] 17 | for line in mSlices(if path.len<1: "/dev/stdin" else: path[0], keep=true): 18 | sep.split line, cols, 2 19 | if cols.len != 2: continue 20 | let (pid, rest) = (cols[0], cols[1]) 21 | if rest.startsWith boc: # Skip to 1st eoc & output top,bottom 22 | let ix = line.find eoc 23 | if ix == -1: IO !! "missing \"" & eoc & "\"" 24 | outu alignLeft($pid, 5), " ", top[pid], line[ix+eoc.len..^1], "\n" 25 | top.del pid 26 | elif rest.endsWith cont: # Save for bottom half 27 | top[pid] = rest[0 ..^ (cont.len + 1)] 28 | if all: outu line, '\n' 29 | else: outu line, '\n' 30 | if not all: 31 | for pid, rest in top: # Would be nicer to emit in orig order, 32 | outu alignLeft($pid, 5), rest #..but that needs more subtle buffering 33 | outu cont, '\n' #..& never-resumed calls must be rare. 34 | 35 | include cligen/mergeCfgEnv; dispatch jointr, help={ 36 | "path": "strace log path (or none for stdin)", 37 | "cont": "line suffix saying it continues", 38 | "boc" : "beg of contin. indication to eat", 39 | "eoc" : "end of contin. indication to eat", 40 | "all" : "retain \"unfinished ...\" in-place"} 41 | -------------------------------------------------------------------------------- /doc/sr.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | Linux can misbehave, especially on more exotic hardware or with untested 5 | software. So, the kernel provides a way to interpret special keys on the 6 | keyboard and a more remote-friendly "/proc/sysrq-trigger" interpretation. 7 | This program wraps all of that to make it easy to just run a command to 8 | get the intended results. 9 | 10 | For example, if programs and libc are all breaking but you have a statically 11 | linked `sr` and can still get a few pages off the disk you may be able to run 12 | `sr u; sr s; sr b` with some success. 13 | 14 | Usage (***NOT*** a cligen utility) 15 | ----- 16 | Usage (as root!): 17 | sr 18 | where CODE is: 19 | b immediately reboot without syncing or unmounting 20 | c crash system by NULL pointer deref, leave crashdump if configured 21 | d shows locks that are held 22 | e send SIGTERM to all processes, except for init 23 | f call OOM killer to kill memory hogs; No panic if nothing can be killed 24 | g used by kgdb (kernel debugger) 25 | i send SIGKILL to all processes, except for init 26 | j forcibly "Just thaw it" - filesystems frozen by FIFREEZE ioctl 27 | k secure Access Key (SAK); Kill programs on current virtual console 28 | l shows stack backtrace for active CPUs 29 | m dump current memory info to your console 30 | n used to make RT tasks nice-able 31 | o shut your system off (if configured & supported) 32 | p dump current registers & flags to your console 33 | q dump armed hrtimers (NOT regular timer_list timers) & clockevent dev info 34 | r turns off keyboard raw mode & sets it to XLATE 35 | s attempt to sync mounted filesystems 36 | t dump current tasks & their information to your console 37 | u attempt to remount mounted filesystems read-only 38 | v forcefully restores framebuffer console; causes ETM buffer dump on ARM 39 | w dumps tasks that are in uninterruptable (blocked) state 40 | x used by xmon on PPC; Show global PMU Regs on sparc64; Dump TLBs on MIPS 41 | y show global CPU Registers [SPARC-64 specific] 42 | z dump FTRACE buffer 43 | 0-9 set console log level; 0=emergency messages (PANICs|OOPSes) only 44 | -------------------------------------------------------------------------------- /sr.nim: -------------------------------------------------------------------------------- 1 | import std/[os, posix, strutils] 2 | const SRQ = "/proc/sysrq-trigger" 3 | const use = """Usage (as root!): sr [..] [DELAY(ms)] where CODE is: 4 | b immediately reboot without syncing or unmounting 5 | c crash system by NULL pointer deref, leave crashdump if configured 6 | e send SIGTERM to all processes, except for init 7 | f call OOM killer to kill memory hogs; No panic if nothing can be killed 8 | i send SIGKILL to all processes, except for init 9 | j forcibly \"Just thaw it\" - filesystems frozen by FIFREEZE ioctl 10 | k secure Access Key (SAK); Kill programs on current virtual console 11 | l shows stack backtrace for active CPUs 12 | m dump current memory info to your console 13 | n used to make RT tasks nice-able 14 | o shut your system off (if configured & supported) 15 | p dump current registers & flags to your console 16 | q dump armed hrtimers (NOT regular timer_list timers)&clockevent dev info 17 | r turns off keyboard raw mode & sets it to XLATE 18 | s attempt to sync mounted filesystems 19 | t dump current tasks & their information to your console 20 | u attempt to remount mounted filesystems read-only 21 | w dumps tasks that are in uninterruptable (blocked) state 22 | x used by xmon on PPC; Show global PMU Regs on sparc64; Dump TLBs on MIPS 23 | y show global CPU Registers [SPARC-64 specific] 24 | z dump FTRACE buffer 25 | 0-9 set console log level; 0=emergency messages (PANICs|OOPSes) only""" 26 | 27 | if paramCount() < 1 or paramStr(1).len < 1 or paramStr(1) == "h": 28 | quit use, 0 29 | if geteuid() != 0: 30 | quit "only root can use "&SRQ&"\n", 2 31 | let delay = if paramCount() > 1: parseInt(paramStr(2)) else: 250 32 | for i, c in paramStr(1): 33 | if c in {'b','c', 'e','f', 'i'..'u', 'w'..'z', '0'..'9'}: 34 | if (let fd = open(SRQ, O_WRONLY); fd >= 0): 35 | var buf = [c, '\n'] 36 | if write(fd, buf[0].addr, 2) != 2: 37 | quit "write()!=2: " & $errno.strerror, 4 38 | discard close(fd) # Nim bug: No discard => err@wrong lineNo 39 | else: 40 | quit "open "&SRQ&": " & $errno.strerror, 3 41 | else: 42 | quit use, 1 43 | if i + 1 < paramStr(1).len: 44 | sleep delay 45 | -------------------------------------------------------------------------------- /fkindc.nim: -------------------------------------------------------------------------------- 1 | import std/[posix, strutils, tables], cligen/[osUt, mslice, magic, procpool] 2 | when not declared(stderr): import std/syncio 3 | 4 | when haveMagic: 5 | type Excl = enum compress,tar,soft,apptype,elf,text,cdf,tokens,encoding,ascii 6 | const e2Flag = { # CSV & json missing; Maybe cligen/magic needs updating? 7 | apptype : MAGIC_NO_CHECK_APPTYPE , ascii : MAGIC_NO_CHECK_ASCII , 8 | encoding: MAGIC_NO_CHECK_ENCODING, tokens : MAGIC_NO_CHECK_TOKENS , 9 | cdf : MAGIC_NO_CHECK_CDF , compress: MAGIC_NO_CHECK_COMPRESS, 10 | elf : MAGIC_NO_CHECK_ELF , soft : MAGIC_NO_CHECK_SOFT , 11 | tar : MAGIC_NO_CHECK_TAR , text : MAGIC_NO_CHECK_TEXT }.toTable 12 | 13 | var gFlags = 0.cint 14 | proc count(histo: var CountTable[string], s: MSlice) = histo.inc $s 15 | 16 | proc classify(r, w: cint) = # Reply with same path as input if it passes filter 17 | var m = magic_open(gFlags) 18 | if m == nil or magic_load(m, nil) != 0: 19 | stderr.write "cannot load magic DB: %s\n\t", m.magic_error, "\n" 20 | quit 1 21 | for path in r.open.getDelim('\0'): 22 | let fileType = $m.magic_file(path.cstring) 23 | discard wrLenBuf(w, fileType) 24 | 25 | proc fkindc*(gen="find $1 -print0", dlr1=".", excl: set[Excl]={}, jobs=0) = 26 | ## Use ``gen`` and ``dlr1`` to generate paths and histogram by `file(1)` type. 27 | var histo: CountTable[string] 28 | for e in excl: gFlags = gFlags or cint(e2Flag[e]) # Set up gFlags for libmagic 29 | let inp = popen(cstring(gen % dlr1), "r".cstring) # Fire input path generator 30 | var pp = initProcPool(classify, framesLenPfx, jobs) # Start & drive kids 31 | pp.eval0term(inp.getDelim('\0'), histo.count) # Replies=0-term file types 32 | discard inp.pclose 33 | histo.sort 34 | for k, ct in histo: echo ct, '\t', k 35 | 36 | when isMainModule: 37 | import cligen; include cligen/mergeCfgEnv 38 | dispatch fkindc, short={"excl": 'x'}, help={ 39 | "gen" : "generator cmd with dlr1 -> $1", 40 | "dlr1": "$1 for gen fmt; Eg. *\". -type f\"*", 41 | "excl": "tests to exclude like `file(1)`", 42 | "jobs": "use this many kids (0=auto)" } 43 | else: quit "libmagic from file was not found when this program was built.", 1 44 | -------------------------------------------------------------------------------- /cols.nim: -------------------------------------------------------------------------------- 1 | when not declared(fmWrite): import std/syncio 2 | import std/sets, cligen, cligen/[mfile, mslice, osUt] # mSlices MSlice Sep 3 | 4 | type Ranges = seq[Slice[int]] # cg wants `seq[T]` syntax *OR* its semantics 5 | proc cols(input="/dev/stdin", rowDlm='\n', delim="white", output="/dev/stdout", 6 | sepOut=" ", blanksOk=false, cut=false, origin=1, O0=false, term='\n', 7 | colRanges: Ranges) = 8 | ## Write just some columns of input to output; Memory map input if possible. 9 | let origin = if O0: 0 else: origin 10 | var outFile = open(output, fmWrite) 11 | var colSet = initHashSet[int](colRanges.len) 12 | if cut: 13 | for r in colRanges: 14 | for c in r: colSet.incl c 15 | let sep = initSep delim 16 | var cols: seq[MSlice] = @[ ] 17 | for line in mSlices(input, sep=rowDlm, eat='\0'): # RO mmap | 1-use slices 18 | var wrote = false # wrote something &so need sepOut|\n 19 | sep.split line, cols 20 | if cut: 21 | for j, f in cols: 22 | if (origin + j) in colSet or (origin + j - cols.len) in colSet: 23 | continue 24 | if wrote: outFile.urite sepOut 25 | outFile.urite f 26 | wrote = true 27 | else: 28 | for r in colRanges: 29 | for i in r: 30 | let j = if i < 0: i + cols.len else: i - origin 31 | if j < 0 or j >= cols.len: 32 | continue 33 | if wrote: outFile.urite sepOut 34 | outFile.urite cols[j] 35 | wrote = true 36 | if wrote or blanksOk: outFile.urite term 37 | 38 | when isMainModule: include cligen/mergeCfgEnv; dispatch cols, help={ 39 | "colRanges": "colNums or A..B | X:Y (in|ex)clusive ranges thereof", 40 | "input" : "path to mmap|read as input", 41 | "rowDlm" : "inp *row* delimiter character", 42 | "delim" : "inp *field* dlm chars; len>0 => fold", 43 | "output" : "path to write output file", 44 | "sepOut" : "output field separator", 45 | "blanksOk" : "allow blank output rows", 46 | "cut" : "cut/censor specified columns, not keep", 47 | "origin" : "origin for colNums; 0=>signed indexing", 48 | "O0" : "shorthand for `--origin=0`", 49 | "term" : "set output row terminator (e.g. \\\\0)"}, short={"O0": '0'} 50 | -------------------------------------------------------------------------------- /doc/cstats.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | Programs will often dump out numbers with varying amounts of context in rows. 4 | This might be resource usage information, like [ru](ru.md) or various results 5 | or really a great many things. It is very simple/natural to go from one report 6 | to many for various parameters/input files and so on in a shell loop, but with 7 | such data either collected or at the start of a pipeline, at other stages of 8 | inquiry it can be nice to have summary statistics. 9 | 10 | Usage 11 | ----- 12 | ``` 13 | cstats [optional-params] [stats: string...] 14 | 15 | This consumes any stdin looking like regular intercalary text with embedded 16 | floats & prints a summary with the LAST such text & requested stats for any 17 | varying float column. If table!="", context is joined via hsep into headers 18 | for associated reduced numbers, with columns separated by table (eg. ','). 19 | Available stats (ms if none given).. 20 | 21 | mn: mean sd: sdev se: stderr(mean) (i.e. sdev/n.sqrt) 22 | sk: skewness kt: kurtosis ms: mn +- se via pm exp nd unity sci params 23 | iq: interQuartileRange sq: semi-interQuartileRange n: len(nums) 24 | qP: General Parzen interpolated quantile P (0<=P<=1 float; 0=min; 1=max) 25 | 26 | ..print as separate rows in the table mode or else joined by join. 27 | 28 | -d=, --delim= string "white" inp delims; Repeats=>fold 29 | -t=, --table= string "" labels -> header of a 30 | table-separated table 31 | --hsep= string "strip" header sep|strip if=strip 32 | -p=, --pm= string " +- " plus|minus string 33 | -e=, --exp= Slice -2..4 pow10 range for 'unity' 34 | -n=, --nd= int 2 n)um sig d)igits of sigma 35 | -u=, --unity= string "$val0${pm}$err0" near unity format 36 | -s=, --sci= string "($valMan $pm $errV)$valExp" scientific format 37 | -j=, --join= string "," intern st-delim for 1-row 38 | -m=, --min= int 0 use min-most numbers 39 | -M=, --max= int 0 use max-most numbers 40 | ``` 41 | -------------------------------------------------------------------------------- /doc/rs.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | Since data sets can be large, random fair subsets/re-samplings can be useful. 4 | 5 | https://en.wikipedia.org/wiki/Reservoir_sampling has more details. Note that 6 | the cost comparison there between Algorithm R & L neglects IO costs which are 7 | around 50+% dominant for this little utility even on a RAM filesystem (just from 8 | `memchr` for line splitting even on very short lines). So, rather than the big 9 | O(n/k)-ish asymptotic speed-up factor, Algorithm L is likely only <~ 2X faster, 10 | at least with the `.add` API of `bu/rs.nim`. 11 | 12 | The random sampling with replacement algorithm is quite slow and should be 13 | replaced, but to detail its logic here, all slots in the reservoir table evolve 14 | identically & independently, and the evolution of the first slot looks like: 15 | ``` 16 | data 1 2 3 4 5 ... N 17 | slot0 1 -> 1 -> 1 -> 1 -> 1 ... N, p=1/2*2/3*3/4*4/5*...=1/N 18 | slot0 2 -> 2 -> 2 -> 2 ... N, p= 1/3*3/4*4/5*...=1/N 19 | slot0 3 -> 3 -> 3 ... N, p= 1/4*4/5*...=1/N 20 | ``` 21 | So, each slot has a similar 1/N independent chance of surviving until the end. 22 | 23 | Some care was put into the command-line API here, in particular the ability to 24 | `--flush` the outputs to give immediate reads to possible FIFO workers. Also, 25 | you can create as many random subsets/samples of whatever various sizes as you 26 | like in various files rather easily by just listing them. 27 | 28 | Usage 29 | ----- 30 | ``` 31 | rs [optional-params] [pfx.][-]n.. output paths; pfx""=>stdout 32 | 33 | Write ranSubsets|Samples of rows of input -> prefix.ns. If n>0 do random 34 | subsets else sample with replacement. O(Σns) space. Examples: 35 | 36 | seq 1 100 | rs 10 .-5 or (after maybe mkfifo f1 f2) 37 | workOn f1 & workOn f2 & seq 1 1000 | rs -f f1.10 f2.-20 38 | 39 | Options: 40 | -i=, --input= string "" "" => stdin 41 | -f, --flush bool write to outs immediately 42 | -r, --randomize bool randomize() for non-deterministic filtering 43 | ``` 44 | 45 | Examples 46 | -------- 47 | Input: 48 | ```sh 49 | seq 1 1000 | rs foo1.9 foo2.9 foo3.9 foo4.9 50 | for f in foo*; do cstats q.5 < $f; done 51 | ``` 52 | Output: 53 | ``` 54 | 281.0 55 | 442.0 56 | 370.0 57 | 591.0 58 | ``` 59 | -------------------------------------------------------------------------------- /doc/newest.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | This is (mostly) a convenience program for something I often want to know. 4 | 5 | Usage 6 | ----- 7 | ``` 8 | newest [optional-params] [paths: string...] 9 | 10 | Echo ended by outEnd the <= n newest files in file time order {-}[bamcv] for 11 | Birth, Access, Mod, Ctime, Version=max(MC); { - | CAPITAL means oldest }. 12 | 13 | Examined files = UNION of paths + optional delim-delimited input file (stdin 14 | if "-"|if "" & stdin is not a terminal), maybe recursed as roots. 15 | 16 | E.g. to echo the 3 oldest regular files by m-time under the CWD: 17 | newest -t-m -n3 -r0 . 18 | 19 | Options: 20 | -n=, --n= int 1 number of 'newest' files 21 | -t=, --time= string "m" timestamp to compare ({-}[bamcv]*) 22 | -r=, --recurse= int 1 recurse n-levels on dirs; 0:unlimited 23 | -c, --chase bool false chase symlinks to dirs in recursion 24 | -D, --Deref bool false dereference symlinks for file times 25 | -k=, --kinds= set(FileKind) file i-node type like find(1): [fdlbcps] 26 | -q, --quiet bool false suppress file access errors 27 | -x, --xdev bool false block recursion across device boundaries 28 | -o=, --outEnd= string "\n" output record terminator 29 | -f=, --file= string "" optional input ("-"|!tty=stdin) 30 | -d=, --delim= char '\n' input file record delimiter 31 | -e, --eof0 bool false set eof0 32 | ``` 33 | 34 | Related Work 35 | ------------ 36 | `find -printf` does not support the new-ish Linux b-time. Even if it did one 37 | would need to pipe its output to something like `topn 3` (see [topn](topn.md)) 38 | that maintained a heap sorted by the desired time (in the desired order) to be 39 | as memory efficient. `sort` is highly wasteful for this use case, as is `topn` 40 | really considering all the binary -\> ASCII -\> binary that must happen. 41 | 42 | This also uses my `cligen/dents` tree walker to be faster than `find` (much 43 | faster with `-d:batch` & a custom kernel module) on Linux. GNU `find` worries 44 | about arbitrary FS depth while I've never seen a (non-artificial) depth > 30. 45 | Common cases should not suffer from pathologies. Default open fd limits that 46 | hail from 1970s memory costs are are already pretty dumb. 47 | -------------------------------------------------------------------------------- /doc/METAPKG.md: -------------------------------------------------------------------------------- 1 | Missing 2 | ------- 3 | 4 | To people who say "Tool XYZ is missing", I say "PRs welcome|publish yourself". 5 | 6 | Duplicative 7 | ----------- 8 | 9 | To people who say "Tool XYZ is duplicative", I say "Not for me when I wrote it, 10 | but yes, truly exhaustive related research is often harder than writing code. 11 | Anyway, enjoy|not and I am happy to add refs to analogues to per-tool docs." 12 | 13 | Layout 14 | ------ 15 | 16 | To people not liking the file tree layout, I agree, but this seemed the easiest 17 | when some code like "bu/eve" is both Nim import-able lib and nimble-installable 18 | binary. I think nimble micro-manages these things to its detriment. 19 | 20 | Over-bundling/Too Many/Mah Head Asplode 21 | --------------------------------------- 22 | 23 | To people who say "this package is over-bundled", my rejoinder is: 24 | 25 | - Packages package; Pros & cons 26 | 27 | - I did not want to overly bias the nimbleverse toward even more packages 28 | needing [cligen](https://github.com/c-blake/cligen) (maybe only because I 29 | wrote both). 30 | 31 | - It's not such a nose-bleed percentile in the context of healthy Unix package 32 | ecosystems. Using `qtl` from [fitl](https://github.com/c-blake/fitl), I get: 33 | ```sh 34 | (for p in `q list -Iv`;do echo `q files $p|grep /bin/|wc -l` $p;done)| 35 | awk '{if($1>0)print $1}'|qtl .08 .5 .92 36 | ``` 37 | gives on one of my Gentoo's: 38 | ``` 39 | 1.0 2.008333333333333 14.72363636363637 40 | ``` 41 | (at the start, anyway, `bu` had 15 bins..so, about 92nd percentile). Anyway, 42 | util-linux has 73, coreutils has 127 and we will (probably) never get near 200. 43 | 44 | [`cligen/examples`](https://github.com/c-blake/cligen/tree/master/examples) will 45 | mostly move here in the near-term because >1 person has complained about that 46 | having too much. This will almost double the size of the collection. I should 47 | probably port a dozen or two more from C, but I still consider this all quite 48 | restrained. I have 1100 scripts & programs in `~/bin` | `/usr/local/bin`. Most 49 | are not in Nim. I expect <200 are of "broad" interest or ongoing relevance..a 50 | structural hazard of writing programs over several decades with an eye toward 51 | generality. Many of them replace|generalize earlier variants. It is a bit of 52 | work to even curate & better document this (small) collection for what might 53 | interest others. 54 | -------------------------------------------------------------------------------- /niom.nim: -------------------------------------------------------------------------------- 1 | # This is like `nio moments`, but has `adix` as a hard dep for histo & qs. 2 | when not declared(addFloat): import std/formatFloat 3 | import std/math, nio, adix/[mvstat, lghisto], cligen/osUt 4 | 5 | type MomKind = enum mkN="n", mkMin="min", mkMax="max", mkSum="sum", mkAvg="avg", 6 | mkSdev="sdev", mkSkew="skew", mkKurt="kurt", mkHisto="histo" 7 | 8 | proc fmtStat(ms: MovingStat, mk: MomKind, fmt: string): string = 9 | case mk 10 | of mkN: ms.n.float64 .formatFloat(fmt) 11 | of mkMin: ms.min .formatFloat(fmt) 12 | of mkMax: ms.max .formatFloat(fmt) 13 | of mkSum: ms.sum .formatFloat(fmt) 14 | of mkAvg: ms.mean .formatFloat(fmt) 15 | of mkSdev: ms.standardDeviation.formatFloat(fmt) 16 | of mkSkew: ms.skewness .formatFloat(fmt) 17 | of mkKurt: ms.kurtosis .formatFloat(fmt) 18 | else: "" 19 | 20 | proc niom(fmt=".4g", stats={mkMin, mkMax}, qs: seq[float] = @[], 21 | a=1e-16, b=1e20, n=8300, paths: Strings): int = 22 | ## Print selected statistics over all columns of all `paths`. 23 | let opt = if mkHisto in stats or qs.len > 0: {OrderStats} else: {} 24 | for path in paths: 25 | var inp = nOpen(path) 26 | var sts: seq[MovingStat[float64,uint32]] 27 | for c in inp.rowFmt.cols: 28 | sts.add initMovingStat[float64,uint32](a, b, n, opt) 29 | var num: float 30 | block fileLoop: 31 | while true: 32 | for j in 0 ..< sts.len: 33 | if not inp.read(num): break fileLoop 34 | if not num.isNaN: sts[j].push num 35 | for j in 0 ..< sts.len: 36 | outu path, ":", j 37 | for mk in [mkN, mkMin, mkMax, mkSum, mkAvg, mkSdev, mkSkew, mkKurt]: 38 | if mk in stats: outu " ", $mk, ": ", fmtStat(sts[j], mk, fmt) 39 | for i, q in qs: outu (if i>0: " " else: ""), sts[j].quantile(q) 40 | if mkHisto in stats: outu " ", $sts[j].lgHisto 41 | outu "\n" 42 | inp.close 43 | 44 | when isMainModule: 45 | import cligen; include cligen/mergeCfgEnv; dispatch niom, help={ 46 | "paths": "[paths: 1|more paths to NIO files]", 47 | "fmt" : "Nim floating point output format", 48 | "stats": "*n* *min* *max* *sum* *avg* *sdev* *skew* *kurt* *histo*", 49 | "a" : "min absolute value histo-bin edge", 50 | "b" : "max absolute value histo-bin edge", 51 | "n" : "number of lg-spaced histo bins", 52 | "qs" : "desired quantiles"} 53 | -------------------------------------------------------------------------------- /doc/wgt.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | Sometimes you want to simulate workloads from logs/records or other bases of 4 | synthesis. There may be many ways to "score" items in terms of desired sampling 5 | frequency. Here each kind of score is a "source" of weight (e.g. requires a DB 6 | query or required device IO or whatever). These are listed in a weight meta 7 | file specified by `wgt make -w`. E.g.: 8 | ``` 9 | # SOURCE WEIGHT LABEL 10 | # Some base weight for all known tokens 11 | BASE 100 "" 12 | # tokens that need a real DB query 13 | NeedDB 25 DB 14 | # tokens that needed IO 15 | NeededIO 25 IO 16 | ``` 17 | So, the idea is we want to "skew" sampling toward (or away from) items/tokens of 18 | costs that vary. (There can even be uses besides "cost" such as desirability.) 19 | What `wgt` does is calculate the total weight over all scores and then use this 20 | to create a sampling of tokens weighted however. `wgt make` additionally allows 21 | an updating state machine to effect a self-avoiding random-walk style sampler 22 | with a colorful sample-to-sample weight delta report. 23 | 24 | Usage 25 | ----- 26 | ``` 27 | ⁞ wgt [-d|--dir=(".")] {SUBCMD} [sub-command options & parameters] 28 | 29 | SUBCMDs: 30 | 31 | help print comprehensive or per-cmd help 32 | make Write keyOff,Len,Wgt,Why dictionary implied by source & keys 33 | print Emit WEIGHT TOKEN SOURCE(s) for all/some keys to stdout 34 | assay Emit aggregate stats assay for given .NC3CS6C files 35 | sample Emit n-sample of keys {nl-delim file} weighted by table weights 36 | diff Emit color-highlighted diff of old & new weights for keys 37 | 38 | wgt {-h|--help} or with no args at all prints this message. 39 | wgt --help-syntax gives general cligen syntax help. 40 | Run "wgt {help SUBCMD|SUBCMD --help}" to see help for just SUBCMD. 41 | Run "wgt help" to get *comprehensive* help 42 | ``` 43 | 44 | A few more "whys" 45 | ----------------- 46 | `--dir` is included here since the tokens & weights files are closely related 47 | and may often be co-located in the file tree in some directory. 48 | 49 | You may also want to limit/cap the total number of samples any given token can 50 | realize, a kind of back stop against over-skewed scores. 51 | 52 | This all rather complex to drive. So, you also may want to report & explain 53 | total weights on a per token basis (`print`). 54 | 55 | Related Work 56 | ------------ 57 | [rs](rs.md) is a flat-weighted sampler. 58 | -------------------------------------------------------------------------------- /doc/only.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | People often want to know what kind of file something is. For a long time, 4 | Apple had some whole resource fork in its FS for this metadata. Maybe it still 5 | does. On Unix the tradition is a magic number and something like `file(1)` or 6 | `libmagic(3)` that instead opens & partially parses files. This procedure is, 7 | however, very slow and often CPU bound (depending upon what the OS has cached). 8 | 9 | Cost is relative, of course. For one file it does not take long in human terms, 10 | but you can have a *lot* of files. Modern CPUs have many cores to deploy work 11 | to along these lines, but at least the Linux libmagic is very MT-UNSAFE. So, 12 | forked kids are the best way to go multi-core and cligen/procpool is an easy 13 | way to do that. This program was basically the original motivation for procpool 14 | in Nim and its original demo program. 15 | 16 | One example usage might be `rm $(only ELF)` as a kind of ghetto "make clean", 17 | assuming you have a way to rebuild any ELF object files that is. 18 | 19 | Usage 20 | ----- 21 | ``` 22 | only [optional-params] [patterns: string...] 23 | 24 | Use gen and dlr1 to generate paths, maybe skip trim and then emit any path 25 | (followed by eor) whose file(1) type matches any listed pattern. 26 | 27 | all & no can combine to mean not all patterns match. 28 | 29 | -g=, --gen= string "find $1 -print0" generator cmd with dlr1 -> $1 30 | -d=, --dlr1= string "." $1 for gen fmt; Eg. ". -type f" 31 | -t=, --trim= string "./" output pfx to trim (when present) 32 | -e=, --eor= char '\n' end of record delim; Eg.'\0' 33 | -a, --all bool false all patterns match (vs. any) 34 | -n, --no bool false no patterns match (vs. any) 35 | -i, --insens bool false regexes are case-insensitive 36 | -x=, --excl= set(Excl) {} tests to exclude like file(1) 37 | -j=, --jobs= int 0 use this many kids (0=auto) 38 | ``` 39 | 40 | Related Work 41 | ------------ 42 | `find|xargs -PN stdout -oL file -F:Xx:|grep ":Xx: .$@"|sed -e 's/:Xx: .$//'` is 43 | slower & needs some ":Xx:" delimiter guaranteed to be neither in paths nor types 44 | and does not have all the boolean combiner gadgets. There is probably a way to 45 | make it work with some xargs helper program, though it is debatable if that is 46 | simpler than the Nim code. 47 | -------------------------------------------------------------------------------- /fread.nim: -------------------------------------------------------------------------------- 1 | when not declared(addFloat): import std/formatFloat 2 | import std/times, cligen/[mfile, mslice] 3 | when defined(windows): 4 | import std/winlean 5 | let sin = getStdHandle(STD_INPUT_HANDLE) 6 | proc read(fd: Handle, buf: pointer, len: int): int = 7 | let len = min(int32.high.int, len).int32 8 | var nRd: cint 9 | if readFile(fd, buf, len, nRd.addr, nil) == 0: -1 else: int(nRd) 10 | else: import std/posix 11 | 12 | proc fread*(bsz=65536, lim=0i64, nPass=1, off=64, verb=false,paths:seq[string])= 13 | ## This is like `cat`, but just discards data. Empty `paths` => just read 14 | ## from stdin. That can be useful to ensure data is in an OS buffer cache 15 | ## or try to evict other data (more portably than /proc/sys/vm/drop_caches) 16 | ## for cold-cache runs, measure drive/pipe or device throughput, etc. Eg. in 17 | ## Zsh you can say: `fread \*\*` or `fread -l $((1<<30)) < /dev/urandom`. 18 | ## 19 | ## Users may pass paths to FIFOs/named pipes/other block-on-open special files 20 | ## which are skipped. Anything named is only used if mmap-able & only 1 byte 21 | ## (really 1 cache line) per 4096 is used by the process. Can use multiple 22 | ## passes to measure DIMM bandwidth through a CPU prefetching lens. 23 | var buf = newString(bsz) 24 | var n = 0i64 25 | let mx = if lim != 0: lim else: int64.high 26 | let t0 = if verb: epochTime() else: 0 27 | var s = 0 28 | if paths.len == 0: 29 | when defined(windows): 30 | while n < mx and (let k = read(sin, buf[0].addr, bsz); k > 0): inc n, k 31 | else: 32 | while n < mx and (let k = read(0, buf[0].addr, bsz); k > 0): inc n, k 33 | else: 34 | for path in paths: 35 | if (let mf = mopen path; mf.mem != nil): 36 | for pass in 0.. mx: break 40 | inc n, mf.mslc.len # Pass1: OS do VM page; Pass2+: CPU do 1 cache line 41 | mf.close # BUT above^can vary; Can measure via Lin.Regress. 42 | if verb: 43 | let dt = epochTime() - t0 44 | echo "fread ",n," bytes in ",dt," s: ",n.float/dt/1e9," GB/s par",s and 1 45 | 46 | when isMainModule:import cligen;include cligen/mergeCfgEnv;dispatch fread,help={ 47 | "bsz": "buffer size for stdin IO", "lim": "max bytes to read; 0=>unlimited", 48 | "nPass": "passes per file", "off": "total [off*0-origin-pass within pages]", 49 | "verb": "print bytes read", "paths": "paths: paths to read in"} 50 | -------------------------------------------------------------------------------- /doc/noc.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ========== 3 | ANSI CSI/OSC/SGR color escape sequences are divisive. Many enjoy the extra, 4 | categorical emphasis colors can yield. Others dislike their interference with 5 | tools oriented around unembellished text. One compromise is `$NOCOLOR`, as 6 | advocated by https://nocolor.org/. Another idea is an easy tool to wedge into a 7 | pipeline to sanitize input for a next stage &| to test "uncolored readability" 8 | (e.g. for the color blind). 9 | 10 | In the latter case, a simple `sed 's/[[^m]*m//g'` filter "mostly" does the job, 11 | *but* corner cases of [CSI/OSC syntax](en.wikipedia.org/wiki/ANSI_escape_code) 12 | exist not handled by the above. E.g., a stray newline embedded in an Esc-[.. 13 | can cause trouble. So, a new, more careful filter utility is motivated. 14 | 15 | Usage 16 | ===== 17 | `noc` (short for "nocolor" or "noCSIOSC") is just a standard input-to-standard 18 | output filter with no options or other command syntax. 19 | 20 | If given a whole, memory mappable file, `noc` does a single pass. Otherwise a 21 | stdio buffered mode is used. 22 | 23 | A Subtlety 24 | ========== 25 | Broken &| hostile input can leave a CSI/OSC construct unterminated potentially 26 | to EOF. This can cause expansion of an IO buffer to all-input and more notably, 27 | unless one propagates parser state across buffers, a parse re-start after each 28 | read, repeating work & making total CPU time quadratic (to emit very little!). 29 | So, while a naive take away reading the code might seem like "Much sound & fury 30 | to optimize work non-repetition", it's actually there to work on bad input. 31 | 32 | For example, one can create a 100 MB file of input: 33 | ```sh 34 | $ printf '\e]%100000000sm' | tr ' ' '\n' > /dev/shm/hard 35 | ``` 36 | This input breaks a 3.5-ish second `sed` (producing 100 MLines of output, not 0 37 | bytes). It also blows up CPU time on a naive buffered implementation of `noc` 38 | to many more seconds. But `noc` itself dispatches the work in 38 millisec as a 39 | whole file and about 135ms in a pipeline[^1], producing correct, empty output 40 | both ways at about 750..2600 MB/s.[^2] 41 | 42 | [^1]: While faster here, the memory mapped way was more done to verify the more 43 | complex, pipeline-friendly buffered implementation. 44 | 45 | [^2]: This is fast enough for my purposes here - hey 25-100X faster than a `sed` 46 | that I never felt too slow. `cligen/textUt.noCSI_OSC` only works byte-at-a-time. 47 | So, `memchr`-like SIMD optimization (possibly just using `memchr-\e`) can surely 48 | speed it up, at the cost of substantial complexity; CFRO anyone? ;-) 49 | -------------------------------------------------------------------------------- /newest.nim: -------------------------------------------------------------------------------- 1 | when not declared(stderr): import std/syncio 2 | include cligen/unsafeAddr 3 | import std/posix, cligen, cligen/[osUt, posixUt, dents, statx], adix/topk 4 | 5 | type TimePath = tuple[tm: int64, path: string] 6 | 7 | proc newest*(n=1, time="m", recurse=1, chase=false, Deref=false, kinds={fkFile}, 8 | quiet=false, xdev=false, outEnd="\n", file="", delim='\n', 9 | eof0=false, paths: seq[string]) = 10 | ##[ Echo ended by *outEnd* <= *n* newest files in file *time* order 11 | `{-}[bamcv]` for Birth, Access, Mod, Ctime, Version=max(MC); { `-` | CAPITAL 12 | means ***oldest*** }. Examined files = UNION of *paths* + optional 13 | *delim*-delimited input *file* ( ``stdin`` if `"-"`|if `""` & ``stdin`` is 14 | not a terminal ), **maybe recursed** as roots. E.g. to echo the 3 oldest 15 | regular files by m-time under the CWD: ``newest -n3 -t-m -r0 .``. ]## 16 | let err = if quiet: nil else: stderr 17 | let tO = fileTimeParse(time) #- or CAPITAL=oldest 18 | let it = both(paths, fileStrings(file, delim)) 19 | var t = initTopK[TimePath](n) # topk accumulator 20 | for root in it(): 21 | if root.len == 0: continue # skip any improper inputs 22 | forPath(root, recurse, false, chase, xdev, eof0, err, 23 | depth, path, nmAt, ino, dt, lst, dfd, dst, did): 24 | if dt != DT_UNKNOWN: # unknown here => disappeared 25 | if (dt==DT_LNK and Deref and doStat(dfd,path,nmAt,lst,Deref,quiet)) or 26 | lst.stx_nlink != 0 or doStat(dfd,path,nmAt,lst,Deref,quiet): 27 | if lst.stx_mode.match(kinds): 28 | t.push (fileTime(lst, tO.tim, tO.dir), path) 29 | do: discard 30 | do: discard 31 | do: recFailDefault("newest", path) 32 | for tp in t.ascending: stdout.write tp.path, outEnd # Emit in given tmOrd 33 | 34 | when isMainModule: # Exercise this with an actually useful CLI wrapper. 35 | include cligen/mergeCfgEnv; dispatch newest, help={ 36 | "n" : "number of 'newest' files", 37 | "time" : "timestamp to compare ({-}[bamcv]\\*)", 38 | "recurse": "recurse n-levels on dirs; 0:unlimited", 39 | "chase" : "chase symlinks to dirs in recursion", 40 | "xdev" : "block recursion across device boundaries", 41 | "Deref" : "dereference symlinks for file times", 42 | "kinds" : "i-node type like find(1): [fdlbcps]", 43 | "quiet" : "suppress file access errors", 44 | "outEnd" : "output record terminator", 45 | "file" : "optional input (\"-\"|!tty=stdin)", 46 | "delim" : "input file record delimiter" } 47 | -------------------------------------------------------------------------------- /doc/keydowns.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ========== 3 | People often discuss human computer interaction (HCI) "ergonomics". This has 4 | many dimensions. One is data entry. Such conversation is often less objective 5 | than it might be, and one way this is true is measuring only numbers of bytes &| 6 | rendered string lengths. `keydowns` is an attempt to elevate the conversation 7 | ever so slightly by making it trivial to copy-paste into a terminal to measure 8 | needed keyboard depressions to enter a string. 9 | 10 | Doing this in one's head/manually is not so hard, but it is also monotonous, and 11 | error-prone. So, a little program can help. 12 | 13 | Usage 14 | ===== 15 | ``` 16 | keydowns [optional-params] 17 | 18 | Return min key downs needed to enter all lines on stdin, optimizing SHIFTs. 19 | 20 | -s=, --shift= string "~!@#$%^&*()_+|}{:\"?><" in addition to 'A'..'Z' 21 | -v, --v bool false err log counts & strings 22 | ``` 23 | 24 | Example 25 | ======= 26 | ```sh 27 | $ keydowns -v 28 | awk -F, 'BEGIN{a=1;b=2;c=3}{print $a,$b+$c}' 29 | rp -d, -fa,b,c 'echo s[a],b.f+c.i.float' 30 | ^D 31 | 51 awk -F, 'BEGIN{a=1;b=2;c=3}{print $a,$b+$c}' 32 | 41 rp -d, -fa,b,c 'echo s[a],b.f+c.i.float'1234 33 | 92 34 | ``` 35 | The byte-length measurement makes it seems like the Nim is "4 bytes easier" 36 | while it is 10 fewer keydowns. I mostly wrote this program because I had a 37 | vague sense that Nim generally scores well on this metric compared to other PLs. 38 | 39 | More examples are in the [rp docs](rp.md#comparing-examples-to-awk) and limited 40 | only by your imagination. 41 | 42 | Caveats 43 | ======= 44 | Some people don't enter via keyboards at all, but rather with voice or other 45 | interfaces. This metric obviously does not apply there. 46 | 47 | Real people may or may not optimize their use of the SHIFT key or of Caps-Lock 48 | modes; `keydowns` always does. So, in some sense it is a lower bound. 49 | 50 | Defaults are set up for a US-style keyboard and people and societies can & do 51 | re-map keys. `--shift` allows some adaptation to that. An ambitious person can 52 | adjust their keyboard mappings to their input text families to minimize this 53 | metric (at some perhaps significant re-learning costs) making it useless. 54 | I don't do that. So, I don't find it useless. :-) 55 | 56 | Presently `keydowns` does not optimize for keys which are traditionally 57 | unmodified by SHIFT. The headliner example of this is the space bar where you 58 | can continue holding SHIFT across the space in "{ }". My own typing rarely 59 | optimizes for this, but since it is possible, the program should grow an option 60 | to measure both ways. 61 | -------------------------------------------------------------------------------- /doc/zeh.md: -------------------------------------------------------------------------------- 1 | # Motivation 2 | The [Zsh](https://zsh.org/) `setopt EXTENDEDHISTORY` feature is nice. It adds 3 | to the basic multi-line command format the starting epoch time & command 4 | duration to the history file. 5 | 6 | However, if you have various user accounts and different computers you may want 7 | to merge history files. The history file may just get too big for the shell to 8 | load efficiently. `zeh` goes at about 1..1.5 GB/s for me and yet still takes 9 | 10s of milliseconds for a 600k/26MB file and seems faster than Zsh itself. 10 | Enough 10s and you have noticeable delay. 11 | 12 | So, you may want to perform various manipulations to thin your history. E.g., 13 | discarding very short commands, less than say 5 bytes. Trailing newlines are 14 | easy to enter by mistake (either with \ or by pasting newlines) but do not 15 | add any value (at least to my command histories). And so on. The format has 16 | been the same for like 30 years and it's easy enough to whip up tools to do 17 | basic things. This is one. 18 | 19 | # Usage 20 | ``` 21 | zeh [optional-params] [paths: string...] 22 | 23 | Check|Merge, de-duplicate&clean short cmds/trailing \n Zsh EXTENDEDHISTORY 24 | (format ": {t0%d}:{dur%d};CMD-LINES[\]"); Eg.: zeh -tm3 h1 h2 >H. Zsh saves 25 | start & duration @FINISH TIME => with >1 shells in play, only brief cmds match 26 | the order of timestamps in the file => provide 3 more modes on to of --check: 27 | --endT, --sort, --begT. 28 | 29 | -m=, --min= int 0 Minimum length of a command to keep 30 | -t, --trim bool false Trim trailing whitespace 31 | -c, --check bool false Only check validity of each of paths 32 | -s, --sort bool false sort exactly 1 path by startTm,duration 33 | -b, --begT bool false add dur to take startTm,dur -> endTm,dur 34 | -e, --endT bool false sub dur to take endTm,dur -> startTm,dur 35 | ``` 36 | 37 | # Testing 38 | This is new code a new `adix/ways.kWayMerge` iterator. So, it's very possible 39 | there are bugs, but this works anyway, and maybe constitutes an example: 40 | ```sh 41 | seq2zh() { sed 's/^\(.*\)$/: 1000000\1:0;cmd\1/' ;} 42 | zh2cmd() { sed 's/.*;//' ;} 43 | seq -w 1 3 100|seq2zh>by3 44 | seq -w 2 2 100|seq2zh>by2 45 | seq -w 4 4 100|seq2zh>by4 46 | cat by[234]|zh2cmd|sort>costly 47 | zeh by*|zh2cmd>cheap 48 | cmp cheap costly 49 | ``` 50 | 51 | # Examples 52 | Check a file { or do a parsing benchmark :-) } : 53 | 54 | `zeh -c $ZDOTDIR/history` 55 | 56 | XXX should really add a bunch of vignettes here. 57 | 58 | # Future work 59 | An idea for near-term extensions might be adding a fancier filter language than 60 | just "length >= min", such as the first whitespace delimited command is not in 61 | some set (e.g. `ps`). 62 | -------------------------------------------------------------------------------- /oft.nim: -------------------------------------------------------------------------------- 1 | import cligen, cligen/[mfile, mslice, osUt], adix/amoft, std/math 2 | from std/strutils as su import nil 3 | when not declared(stderr): import std/syncio 4 | 5 | proc pyIx[T](vs: openArray[T], i: int): T = vs[if i < 0: i + vs.len else: i] 6 | 7 | proc oft*(input="/dev/stdin", delim=" ", mxCol=0, errate=0.005, cover=0.98, 8 | salts: seq[int] = @[], specs: seq[string]) = 9 | ## Write most often seen N keys in various columns to outFile's. Specs are 10 | ## `[,(0)[,outFile(stdout)]]`. ColNos are Py-like 0-origin,signed. 11 | ## Algorithm is approximate fast one-pass over mmap|stream input. E.g., to 12 | ## write most frequent final column to stdout do: ``oft 10,-1``. (For exact, 13 | ## see `lfreq`, possibly with column splitting to FIFOs). 14 | let k = specs.len # Handle all `k` keys in one pass 15 | if k < 1: stderr.write "No specs requested. -h for help.\n"; return 16 | var keyC = newSeq[int](k) 17 | var oFil = newSeq[File](k) 18 | var amos = newSeq[AMOft[string, uint32]](k) 19 | let w = ceil(exp(1.0)/errate).int # Qs: Make per key-col? Snap to pow2? 20 | let nTab = ceil(-ln(1.0 - cover)).int 21 | for i, spec in specs: # Parse key-output specifiers 22 | let params = su.split(spec, ',') 23 | if params.len < 1: 24 | stderr.write "too few sub-params in spec ", spec, "\n"; continue 25 | amos[i] = initAMOft[string, uint32](su.parseInt(params[0]), w, nTab, salts) 26 | keyC[i] = if params.len > 1: su.parseInt(params[1]) else: 0 27 | oFil[i] = if params.len > 2: open(params[2], fmWrite) else: stdout 28 | let sep = initSep(delim) # Init into-seq[MSlice] splitter 29 | var row: seq[MSlice] = @[] 30 | let mf = mopen(input) 31 | 32 | template sweep(mf, T) {.dirty.} = 33 | for line in mSlices(mf, eat='\0'): # RO mmap | slices from stdio 34 | sep.split(line, row, mxCol) # Split into columns 35 | for i in 0 ..< k: # Update our 1-to-several AMOfts 36 | amos[i].inc $pyIx(row, keyC[i]) 37 | for i in 0 ..< k: # Pops out like sort -gk|tail -n 38 | for (k, c) in amos[i].mostCommon: oFil[i].urite c, " ", k, "\n" 39 | if not oFil[i].isNil and oFil[i] != stdout: oFil[i].close 40 | 41 | if mf.mem.isNil: sweep(input, string) else: sweep(mf, MSlice) 42 | 43 | when isMainModule: include cligen/mergeCfgEnv; dispatch oft, help={ 44 | "input" : "input data path", 45 | "delim" : "delimiting (repeats=>any num; \"white\")", 46 | "mxCol" : "max columns in input to parse", 47 | "errate": "size tables to make err `nSamp\\*this`", 48 | "cover" : "enough tables to make coverage this", 49 | "salts" : "override random salts"} 50 | -------------------------------------------------------------------------------- /okpaths.nim: -------------------------------------------------------------------------------- 1 | import std/[os, posix, strutils, sets] 2 | 3 | if paramCount() < 1 or paramCount() mod 5 != 0: quit """Usage: 4 | okpaths ENVAR [DELIM(:) [ITYPE{bcdpfls}(d) [PERMS{rwx}(x) [DEDUP{FL*}(F)]]]] 5 | 6 | echos re-assembled value for $ENVAR delimited by char DELIM where each element 7 | kept is i-node type ITYPE with permissions PERMS & optional de-duplication. 8 | 9 | Eg., PATH=`okpaths PATH` keeps only existing (d)irs executable(x) by an invoking 10 | user. DEPDUP starting with 'F' means keep F)irst use, while 'L' keeps L)ast use 11 | & other means no de-dup (this is case-insensitive). So, eval `okpaths PATH` is 12 | nice in rc/init scripts for Unix shells. 13 | 14 | Blocks of the 5 params can repeat (since fork&exec add to shell init time).""",0 15 | 16 | for shf in countup(0, paramCount()-1, 5): 17 | let delim = if paramCount()>1+shf: paramStr(2+shf)[0] else: ':' 18 | let kinds = if paramCount()>2+shf: paramStr(3+shf) else: "d" 19 | let perms = if paramCount()>3+shf: paramStr(4+shf) else: "rx" 20 | let dedup = if paramCount()>4+shf: paramStr(5+shf).toUpper[0] else: 'F' 21 | 22 | func kind(mode: Mode): char = 23 | if mode.S_ISBLK : 'b' 24 | elif mode.S_ISCHR : 'c' 25 | elif mode.S_ISDIR : 'd' 26 | elif mode.S_ISFIFO: 'p' 27 | elif mode.S_ISREG : 'f' 28 | elif (when not defined(windows): mode.S_ISLNK else: false): 'l' 29 | elif (when not defined(windows): mode.S_ISSOCK else: false): 's' 30 | else: '.' 31 | 32 | proc perm(perms: string): cint = 33 | if 'r' in perms: result = result or R_OK 34 | if 'w' in perms: result = result or W_OK 35 | if 'x' in perms: result = result or X_OK 36 | 37 | let prms = perm(perms) 38 | var res: seq[string] # Result to output (re-joined with delim) 39 | var ids: seq[Ino] # i-node identity; [i] tracks res[i] 40 | var st: Stat 41 | var did: HashSet[Ino] 42 | for e in paramStr(1+shf).getEnv.split(delim): 43 | let ec = e.cstring 44 | if stat(ec, st) == 0 and st.st_mode.kind in kinds and access(ec, prms) == 0: 45 | if dedup == 'F': # F)irst retention 46 | if st.st_ino notin did: # Only add if have not already 47 | res.add e 48 | did.incl st.st_ino 49 | elif dedup == 'L': # L)ast retention 50 | if st.st_ino in did: # Already added; First delete [old] 51 | let ino = ids.find(st.st_ino) 52 | res.delete ino 53 | ids.delete ino 54 | did.incl st.st_ino # Add it 55 | ids.add st.st_ino 56 | res.add e 57 | else: # Not de-duplicating 58 | res.add e 59 | 60 | echo paramStr(1+shf),"=",join(res, $delim) 61 | -------------------------------------------------------------------------------- /ndelta.nim: -------------------------------------------------------------------------------- 1 | import cligen/[sysUt, mfile, mslice], cligen 2 | import std/parseutils; import std/strutils except parseFloat 3 | when not declared(stderr): import std/syncio 4 | 5 | type DKind = enum absolute, ratio, relative, perCent 6 | 7 | proc load(path: string): MSlice = 8 | if (let m = mopen(path); m != nil): m.toMSlice 9 | else: path.readFile.toMSlice(keep=true) 10 | 11 | proc delta(num0, num1: float, kind: DKind, n: int): string = 12 | template ffDec(x: float): untyped = formatFloat(x, ffDecimal, n) 13 | case kind: 14 | of absolute: ffDec(num1 - num0) 15 | of ratio : (if num0 != 0.0: ffDec(num1/num0) else: "INF") 16 | of relative: (if num0 != 0.0: ffDec(num1/num0 - 1) else: "INF") 17 | of perCent : (if num0 != 0.0: ffDec(100.0*num1/num0 - 1) else: "INF") 18 | 19 | proc ndelta(paths: seq[string], kind=ratio, delims="white", n=3, sloppy=false) = 20 | ## Replace numbers in token-compatible spots of `paths[0]` & `paths[1]` with 21 | ## (absolute | ratio | relative | perCent) deltas. To trap out-of-order data, 22 | ## differences in context are highlighted unless `sloppy` is true. 23 | if paths.len != 2: Help !! "Need 2 paths; Full $HELP" 24 | let sep = initSep(delims) 25 | let tok0 = paths[0].load.frame(sep) # Fully split both files into 2.. 26 | let tok1 = paths[1].load.frame(sep) #.. seq[TextFrame]s of tokens. 27 | if tok0.len != tok1.len: # Check compatibility 28 | stderr.write "WARNING: files have different token structure\n" 29 | for i in 0 ..< tok0.len: # Now loop: identify & compare floats 30 | if tok0[i].ms.len == 0: continue # Empty data frame (if not repeat) 31 | if tok0[i].isSep: 32 | stdout.write tok0[i].ms 33 | else: # Both tokens are non-separator text 34 | var num0, num1: float 35 | let s0 = $tok0[i].ms # An interesting but tricky extension.. 36 | let s1 = $tok1[i].ms #.. would be optional parse of x +- dx 37 | if s0.parseFloat(num0) == s0.len and s1.parseFloat(num1) == s1.len: 38 | stdout.write delta(num0, num1, kind, n) 39 | if kind == perCent: stdout.write '%' 40 | elif not sloppy and tok0[i].ms != tok1[i].ms: # Differing Context 41 | stdout.write "\e[1m", tok0[i].ms, "\e[22m<>\e[3m", tok1[i].ms, "\e[23m" 42 | else: # Same context/labels/etc. 43 | stdout.write tok0[i].ms 44 | 45 | include cligen/mergeCfgEnv 46 | dispatch ndelta,help={"kind" : "DiffKind: absolute, ratio, relative, perCent", 47 | "delims": "repeatable delim chars", 48 | "n" : "FP digits to keep", 49 | "sloppy": "allow non-numerical context to vary silently"} 50 | -------------------------------------------------------------------------------- /doc/dups.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | Many processes in a system can create duplicate files. These (usually) waste 5 | space, but at the very least one often wants to "map out" such duplication. 6 | 7 | `lncs` lets you map out varying names for the same i-node. This utility lets 8 | you map out clusters of i-nodes with exactly duplicate data (under the name of 9 | the first found hard link for a file). 10 | 11 | This was the original `cligen/examples` utility program. There are many open 12 | source tools like this out on the internet. One tool author even popped up 13 | on a [cligen issue thread](https://github.com/c-blake/cligen/issues/99). 14 | This one is pretty efficient. I continue to benchmark it as ~2X faster than 15 | jdupes in a fully RAM-cached test case, but for uncached use cases it is of 16 | course very dominated by IO speed/organization. 17 | 18 | A related case is *near* duplicate data, but that deserves [its own github 19 | repo](https://github.com/c-blake/ndup). 20 | 21 | Usage 22 | ----- 23 | 24 | ``` 25 | dups [optional-params] [paths: string...] 26 | 27 | Print sets of files with duplicate contents. Examined files are UNION of paths & 28 | optional delim-delimited input file ( stdin if "-"|if ""& stdin not a tty ). 29 | 30 | E.g.: 31 | find -type f -print0 | dups -d\\0. 32 | Exits non-0 if a dup exists. 33 | 34 | Trusting hashes can give false positives, but sorting can be slow w/many large 35 | files of the same size|hash. slice can reduce IO, but can also give false pos. 36 | {False negatives not possible. 0 exit => surely no dups.}. 37 | 38 | Within-set sort is by st_blocks if summ is logged, then by requested file time 39 | {v=max(m,c)} & finally by st_ino. 40 | 41 | Options: 42 | -f=, --file= string "" optional input ( "-" | !tty = stdin ) 43 | -d=, --delim= char '\n' input file delimiter; \0 -> NUL 44 | -r=, --recurse= int 1 recurse n-levels on dirs; 0: unlimited 45 | -F, --follow bool false follow symlinks to dirs in recursion 46 | -x, --xdev bool false block cross-device recursion 47 | -D, --Deref bool false dereference symlinks 48 | -m=, --minLen= int 1 minimum file size to consider 49 | -s=, --slice= string "" file slice (float|%:frac; <0:tailRel) 50 | -H=, --Hash= Digest wy hash function [size|wy|nim|SHA] 51 | -c, --cmp bool false compare; do not trust hash 52 | -j=, --jobs= int 1 Use this much parallelism 53 | -l=, --log= set(Lg) osErr >stderr{ osErr, summ } 54 | -b, --brief bool false do NOT print sets of dups 55 | -t=, --time= string "" sort each set by file time: {-}[bamcv].* 56 | -o=, --outDlm= string "\t" output internal delimiter 57 | -e=, --endOut= string "\n" output record terminator 58 | ``` 59 | -------------------------------------------------------------------------------- /doc/tw.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | Sometimes you have maybe-colorized, maybe-utf8 output which has "tabular shape" 4 | but a trailing final column prone to line wrap which then makes the table hard 5 | to read. 6 | 7 | For example, C compiles often have very long command lines due to the lack of 8 | any other standard source of compiler options. So, some `ps ww` invocation may 9 | create a "table" with a dozen terminal rows. 10 | 11 | Having an easy way to "clip" or "crop" lines to fit in your terminal can thus be 12 | nice. 13 | 14 | Usage (***NOT*** a `cligen` utility) 15 | ----- 16 | 17 | With no argument, this roughly reproduces what many VTXXX compatible terminals 18 | can do with `printf '\033[?7l'; command; printf '\033[?7h'`: 19 | ```sh 20 | $ input-generator|tw 21 | ``` 22 | 23 | Unlike the VTXXX approach, though, with `tw` you can optionally pass a first 24 | argument which is an integer number of rows to limit wrapping to. For the 25 | motivating compiler example, this can be useful:[^1] 26 | ```sh 27 | $ pd -w|tw 2 28 | ``` 29 | 30 | Finally, with a second argument you can override the terminal width detected 31 | by $COLUMNS and ioctls, as in 32 | 33 | ```sh 34 | $ pd -w|tw 2 40 35 | ``` 36 | One application of this last mode might be useful to "re-format" a table given 37 | easily split leading & trailing text per row for re-assembly fitting in bounds. 38 | 39 | Related Work 40 | ------------ 41 | While I did look, I did not find any one really doing this anywhere, but it is 42 | hard to make such searches truly exhaustive. The core of this is just a 30-line 43 | state machine. The idea is pretty obvious though - basically the "width-wise" 44 | version of `head` or `tail`. In fact, in combination they let you crop to your 45 | viewable terminal via e.g. `tail -n $LINES|tw`.[^2] 46 | 47 | If you know the input has neither ANSI SGR Color escape sequences nor multi-byte 48 | utf8 characters then you can, of course, just `cut -c "1-${COLUMNS:-80}"`. 49 | 50 | If you are willing to depend upon regex and terminal libraries as well as do 51 | terminal manipulation (like alternate screen buffers etc.) and you never want 52 | bounded-but-multiple rows then you can do `less -RES --redraw-on-quit 53 | --rscroll=-`. That's a lot of IFs, though.[^3] `tw` is also several times 54 | faster due to its more limited scope. 55 | 56 | Future Work 57 | ----------- 58 | The current impl does handle Unicode combining characters (including as the 59 | final non-clipped character) but not double wide or grapheme extension type 60 | renders. 61 | 62 | [^1]: `pd` here is `procs display` as per https://github.com/c-blake/procs 63 | 64 | [^2]: For me this is just `|t|tw` which may become `|ttw` or `|crop` someday. 65 | 66 | [^3]: [noc](noc.md) lets you enforce no escape sequence part of the IFs. 67 | -------------------------------------------------------------------------------- /doc/du.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | This is a very small program, mostly recapitulating functionality from GNU `du` 4 | that began just as a simple cligen/examples program, but might conceivably have 5 | broader use/popularity. 6 | 7 | Some of the value add differences are adding some missing short form flags for 8 | commonly desirable giga/tera/peta `--block-size=`s, de-conflating a few baggage 9 | of history things like shell patterns vs. regexes & --bytes => apparent-size. 10 | 11 | (Also, it does not try to do anything with file times. That seems like weird 12 | mission creep in the GNU `du`.) 13 | 14 | Usage 15 | ----- 16 | ``` 17 | du [optional-params] [roots: string...] 18 | 19 | Mostly compatible replacement for GNU du using my 1.4-2x faster file tree walk 20 | that totals st_blocks*512 with more/better short options. Notable differences: 21 | drops weakly motivated options {time, [aDHt], max-depth, separate-dirs} 22 | outEnd replaces null|-0; patterns are all PCRE not shell and need ".*" 23 | bytes does not imply apparent-size 24 | dereference does not imply chase. 25 | 26 | Options: 27 | -?, --help print this cligen-erated help 28 | -f=, --file= string "" optional input ("-"|!tty=stdin) 29 | -d=, --delim= char '\n' input file record delimiter 30 | -x, --one-file-system bool false block recursion across devices 31 | --chase bool false chase symlinks in recursion 32 | -L, --dereference bool false dereference symlinks for size 33 | -a, --apparent-size bool false instead total st_bytes 34 | -i, --inodes bool false instead total inode count 35 | -l, --count-links bool false count hard links multiple times 36 | -X=, --exclude-from= string "" exclude all pattern(s) in named file 37 | -e=, --exclude= strings {} exclude paths matching pattern(s) 38 | -b, --bytes bool false like --block-size=1 39 | -k, --kilo bool false like --block-size=1[Kk] (DEFAULT) 40 | -m, --mega bool false like --block-size=1[Mm] 41 | -g, --giga bool false like --block-size=1[Gg] 42 | -t, --tera bool false like --block-size=1[Tt] 43 | -p, --peta bool false like --block-size=1[Pp] 44 | -B=, --block-size= string "" units; CAPITAL sfx=metric else binary 45 | -s, --summarize bool false echo only total for each argument 46 | --si bool false -[kmgt] mean powers of 1000 not 1024 47 | -h, --human-readable bool false print sizes in human readable format 48 | -c, --total bool false display a grand total 49 | -o=, --outEnd= string "\n" output record terminator 50 | -q, --quiet bool false suppress most OS error messages 51 | ``` 52 | -------------------------------------------------------------------------------- /doc/topn.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | Sometimes you have a pipeline emitting various numbers and you want to get (in 4 | one pass since input is a pipeline, but also for memory bandwidth efficiency) 5 | reports of the top-N (N biggest) according to various columns of the input. 6 | This is what `topn` is for. Internally, it is a very thin wrapper around 7 | [adix/topk](https://github.com/c-blake/adix/blob/master/adix/topk.nim). 8 | 9 | Usage 10 | ----- 11 | ``` 12 | topn [optional-params] [specs: string...] 13 | 14 | Write spec'd cols of topN-rows-by-various-other-cols to outFile's. 15 | 16 | A spec is [,(0)[,outCol(same)[,outFile(stdout)]]]. 17 | 18 | ColNos are Py-like 0-origin,signed. 19 | 20 | outCol can be an A:B exclusive or A..B slice. 21 | 22 | Algo is fast one-pass over (mmap|stream) input. 23 | 24 | Simple & Fancy E.g.s: 25 | find . -type f -printf '%C@ %p\n' | topn -m1 5 # newest 5 by ctime 26 | topn 9,1,-1,x # writes last col of top 9-by-col-1 rows to file x. 27 | 28 | If n!=0 then can end in '%' to instead mean 100*pct/n rows. 29 | 30 | Options: 31 | -i=, --input= string "/dev/stdin" input data path 32 | -d=, --delim= string " " delimiting (repeats=>any num; "white") 33 | -m=, --mxCol= int 0 max columns in input to parse 34 | -n=, --n= int 0 scale for '%' amounts 35 | -o=, --order= TopKOrder Cheap order: Cheap, Ascending, Descending 36 | -p=, --partn= Partn last partition: last, ran 37 | ``` 38 | 39 | Very Simple Example 40 | ------------------- 41 | ```sh 42 | $ paste <(seq 1 100) <(seq 1 10 1000) | topn 5 43 | 96 951 44 | 97 961 45 | 98 971 46 | 99 981 47 | 100 991 48 | ``` 49 | 50 | Fancier Example 51 | --------------- 52 | This will recurse in `.` emitting c-time, m-time, and path names to a pipeline. 53 | ```sh 54 | find . -printf '%Cs %Ts %P\n' | 55 | topn 3,0,2 4,1,:,/dev/stderr 56 | ``` 57 | The `topn` part collects the top-3 paths (2) by 0-origin column 0 (ctime) and 58 | *whole rows* of the top-4 by 0-origin column 1 (mtime), emitting the first to 59 | stdout and the second to stderr. (Yes, [`newest`](newest.md) handles this 60 | *exact* example and mismatched 3/4 are weird, but it's just an *example*). 61 | 62 | Any Python-like `[a]:[b]` exclusive slice or Nim `[a]..[b]` inclusive slice is 63 | ok, but non-numeric|missing a/b become 0 and out of bounds refs map to `""`. 64 | 65 | If you want a top fraction like 10% (instead of an absolute number like "3") 66 | then you can also get that ***IF*** you provide the scale via `-n` and also 67 | tell `topn` to use it via, e.g., `topn -n4321 10%,0,2`. (Yes, this is mostly 68 | just a convenience to multiply 0.1 by 4321 - if you do not know `n` ahead of 69 | time, a one-pass, tiny memory algo is not possible.) 70 | -------------------------------------------------------------------------------- /unfold.nim: -------------------------------------------------------------------------------- 1 | when not declared(stdin): import std/syncio 2 | include cligen/unsafeAddr 3 | import cligen/[sysUt, osUt], std/re, cligen # cligen is early for `HelpError` 4 | 5 | proc unfold(sep="\t", n=0, before="", after="", ignore=false, extended=false) = 6 | ## Join blocks of stdin lines into one line sent to stdout. 7 | var eolBuf = "\n"; let eol = eolBuf[0].addr 8 | let nS = sep.len; let sep = sep[0].unsafeAddr 9 | var i = 0 10 | var need = false 11 | var str: string 12 | var flags = {reStudy} 13 | if ignore: flags.incl reIgnoreCase 14 | if extended: flags.incl reExtended 15 | template wrLine = 16 | if stdout.uriteBuffer(ln, nLn-1) != nLn-1: return 17 | template wrEOL = 18 | if stdout.uriteBuffer(eol, 1) != 1: return else: need = false 19 | template wrSep = 20 | if stdout.uriteBuffer(sep,nS) != nS: return else: need = true 21 | if n > 0 and before.len == 0 and after.len == 0: 22 | for (ln, nLn) in stdin.getDelims: # :( My ancient rec_rdln is ~1.3x faster 23 | inc i 24 | wrLine() # Always output the input 25 | if i == n: wrEOL(); i = 0 # but EOL only after n cycles 26 | else : wrSep() # otherwise just sep 27 | elif after.len != 0 and n == 0 and before.len == 0: 28 | let rx = re(after, flags) 29 | for (ln, nLn) in stdin.getDelims: 30 | inc i 31 | wrLine() # Always output the input 32 | str.setLen nLn-1; copyMem str[0].addr, ln, nLn-1 33 | if rx in str: wrEOL() # but EOL only only if line matches 34 | else : wrSep() # otherwise just sep 35 | elif before.len != 0 and n == 0 and after.len == 0: 36 | let rx = re(before, flags) # A somewhat different state machine 37 | for (ln, nLn) in stdin.getDelims: 38 | inc i 39 | if i == 1: 40 | wrLine(); need = true # Write 1st line unconditionally 41 | else: # Copy `ln` to `str` for pattern match 42 | str.setLen nLn-1; copyMem str[0].addr, ln, nLn-1 43 | if rx in str: wrEOL() # Then terminate only if line matches 44 | else : wrSep() # otherwise just sep 45 | wrLine() # Then output the input 46 | else: Help !! "Set `n` | `before` | `after`; Full $HELP" 47 | if need: wrEOL() # May need final \n (non-delimiting sep gives user clue) 48 | 49 | include cligen/mergeCfgEnv; dispatch unfold, help={ 50 | "n" : "Join `|n|` lines into 1", 51 | "after" : "join blocks ending with a matching line", 52 | "before" : "join blocks beginning with a matching line", 53 | "sep" : "separates the old lines within the new", 54 | "ignore" : "regex are case-insensitive", 55 | "extended": "regexes are nim re 'extended' syntax", 56 | } 57 | -------------------------------------------------------------------------------- /doc/cols.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | This is a faster to key stroke (& execute) version of `awk '{print $X}'`. It 5 | also acts as demo/example code for some library APIs from 6 | [`cligen/`](https://github.com/c-blake/cligen) & was a very early member of 7 | `cligen/examples/` itself. 8 | 9 | Something it provides over the `awk` invocation is 10 | - the ability to delete indicated columns (with `-c, --cut`). 11 | 12 | Things it provides over both `awk` & GNU coreutils `cut` are the ability to: 13 | - shift column-numbering origins (e.g. 0 | 1-origin) 14 | - do either inclusive (..) OR exclusive (:) ranges/slices 15 | - allows numbers < 0 to mean from-the-end (like Python) { use `--` or \\-escape 16 | (or quote) whitespace before `'-'` to avoid treatment as an option }. 17 | 18 | Over just `cut` it provides: 19 | - default to keep; more terse CL syntax than "--complement" 20 | - ability to split 1 column on repeated bytes (like `awk`) 21 | 22 | Usage 23 | ----- 24 | ``` 25 | cols [optional-params] colNums or A..B | X:Y (in|ex)clusive ranges thereof 26 | 27 | Write just some columns of input to output; Memory map input if possible. 28 | 29 | -i=, --input= string "/dev/stdin" path to mmap|read as input 30 | -r=, --rowDlm= char '\n' inp row delimiter character 31 | -d=, --delim= string "white" inp field dlm chars; len>0 => fold 32 | -o=, --output= string "/dev/stdout" path to write output file 33 | -s=, --sepOut= string " " output field separator 34 | -b, --blanksOk bool false allow blank output rows 35 | -c, --cut bool false cut/censor specified columns, not keep 36 | --origin= int 1 origin for colNums; 0=>signed indexing 37 | -0, --O0 bool false shorthand for --origin=0 38 | -t=, --term= char '\n' set output row terminator (e.g. \0) 39 | ``` 40 | 41 | Examples 42 | -------- 43 | After: 44 | ``` 45 | (echo 1 2 3 4; echo; echo 4 5 6 7) > /tmp/d 46 | ``` 47 | you get: 48 | ``` 49 | cols 2 4 < /tmp/d 50 | ``` 51 | producing 52 | ``` 53 | 2 4 54 | 5 7 55 | ``` 56 | With `cols -c0 -- -4..-3` you get: 57 | ``` 58 | 3 4 59 | 6 7 60 | ``` 61 | since you are cutting 0-origin 4th from end & 3rd from end. 62 | Meanwhile with `cols -0 1:3` you get: 63 | ``` 64 | 2 3 65 | 5 6 66 | ``` 67 | since you are keeping the exclusive slice indicating 0-origin 1 & 2. 68 | 69 | With all of them if you add `-b` the blank row propagates, or you can make the 70 | output separated TAB or terminator NUL, etc. 71 | 72 | That's it, really. This intends to be a very simple utility. Among the most 73 | advanced examples I can think of is : 74 | ``` 75 | ls -l --zero | cols -cr\\0 1..4 -t\\0 76 | ``` 77 | to produce a list of 0-terminated rows where (for GNU ls) the first 4 columns 78 | are guaranteed to be space separated and any newlines are from path names. The 79 | consumer of that output data needs to remain careful, of course. 80 | -------------------------------------------------------------------------------- /doc/crp.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | This is a port of `rp.nim` to a Nim-written C code generator. The point was 5 | (mostly) to experiment with how much noisier pure C syntax is than the Nim-Nim 6 | `rp`, in awk 1-liner-like problem settings. Side interest was speed of machine 7 | code generation with `tcc` and speed of execution of said fast-generated code. 8 | 9 | Consult [doc/rp.md](rp.md) for more pontificating on the idea space which would 10 | require few words if it were more commonly used. Personally, I think C even 11 | aided with macros is a bit too noisy for the ergonomics to be great. 12 | 13 | Usage 14 | ----- 15 | 16 | ``` 17 | crp [optional-params] C stmts to run (guarded by where); none => echo row 18 | 19 | Gen+Run prelude,fields,begin,where,stmts,epilog row processor against input. 20 | 21 | Defined within where & every stmt are: 22 | s[idx] & row => C strings, i(idx) => int64, f(idx) => double. 23 | nf & nr (AWK-ish), rowLen=strlen(row); idx is 0-origin. 24 | 25 | A generated program is left at outp.c, easily copied for "utilitizing". If you 26 | know AWK & C, you can learn crp FAST. 27 | 28 | Examples (most need data): 29 | seq 0 1000000 | crp -w'rowLen<2' # Print short rows 30 | crp 'printf("%s %s\n", s[1], s[0])' # Swap field order 31 | crp -vt=0 t+=nf -e'printf("%g\n", t)' # Prn total field count 32 | crp -vt=0 -w'i(0)>0' 't+=i(0)' -e'printf("%g\n", t)' # Total>0 33 | crp 'float x=f(0)' 'printf("%g\n", (1+x)/x)' # cache field 0 parse 34 | crp -d, -fa,b,c 'printf("%s %g\n",s[a],f(b)+i(c))' # named fields 35 | crp -mfoo 'printf("%s\n", s[2])' # column if row matches 36 | 37 | Add niceties (eg. prelude="#include ") to ~/.config/crp. 38 | 39 | Options: 40 | -p=, --prelude= strings {} C code for prelude/include section 41 | -b=, --begin= strings {} C code for begin/pre-loop section 42 | -v=, --var= strings {} preface begin with double var decl 43 | -m=, --match= string "" row must match this regex 44 | -w=, --where= string 1 C code for row inclusion 45 | -e=, --epilog= strings {} C code for epilog/end loop section 46 | -f=, --fields= string "" delim-sep field names (match row0) 47 | -g=, --genF= string "$1" make Field names from this Fmt;Eg c_$1 48 | -c=, --comp= string "" "" => tcc {if run: "-run"} {args} 49 | -r, --run bool true Run at once using tcc -run .. < input 50 | -a=, --args= string "" "" => -I$HOME/s -O 51 | -o=, --outp= string /tmp/crpXXX output executable; .c NOT REMOVED 52 | -i=, --input= string "" path to read as input; ""=stdin 53 | -d=, --delim= string " \t" inp delim chars for strtok 54 | -u, --uncheck bool false do not check&skip header row vs fields 55 | -M=, --MaxCols= int 0 max split optimization; 0 => unbounded 56 | ``` 57 | -------------------------------------------------------------------------------- /doc/ft.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | The "usability idea" is to leverage user recall of `test` flags by staying as 5 | close as reasonable to that set. The only real difference is that `ft` {for 6 | f)ile t)ype} uses `h` to mean a h)ard link not an alias for -L. 7 | 8 | Yes, there are more verbose ways to do this with `man 1 find` and shell `for` 9 | loops and more terse ways to do it with Zsh extended globbing (`man 1 zshexpn`). 10 | 11 | I suspect most people are more comfortable with a shell loop that would also be 12 | portable, but OTOH, it really is a very small program. 13 | 14 | Usage 15 | ----- 16 | ``` 17 | ft [optional-params] [paths: string...] 18 | 19 | Batch (in both predicates & targets) test / [ . Emit subset of paths that 20 | pass expr. E.g.: $(ft -eL *) =~ Zsh extended glob *(@). Can also read stdin 21 | as in find -type f|ft -ew. (Yes, can cobble together less tersely w/GNU find 22 | -files0-from | find GLOB -maxdepth 0 PREDICATE.) Maybe counter-intuitively, 23 | exit with status = match count (0=NONE). 24 | 25 | -f=, --file= string "" optional input ( `"-"` | !tty = ``stdin`` ) 26 | -d=, --delim= char '\n' input file delimiter; `\\0` -> NUL 27 | -t=, --term= char '\n' output path terminator 28 | -p=, --pattern= string "$1" emit a \$1-using pattern; E.g. "match:\$1" 29 | -q, --quiet bool false Do not emit; Just count as exit status 30 | -s, --stat bool false Use stat not lstat; Others say "dereference" 31 | -e=, --expr= string "e" Concatenated extended one-letter test(1) codes 32 | e (e)xists in any way 33 | b is (b)lock special 34 | c is (c)haracter special 35 | d is a (d)irectory 36 | f is a regular (f)ile 37 | l|L is a symbolic (l)ink; NOTE: h differs! 38 | p is a named (p)ipe {aka FIFO} 39 | S is a (S)ocket;CASE differs from ls/find 40 | s has a (s)ize greater than zero 41 | h is a (h)ard link; Link count > 1 42 | N (N)ew; modify time > access time 43 | k has its stic(k)y bit set 44 | u its set-(u)ser-ID bit is set 45 | g is set-(g)roup-ID 46 | O is (O)wned by the effective user ID 47 | G is owned by effective (G)roup ID 48 | r|R|A user|World|Group can (r)ead 49 | w|W|I user|World|Group can (w)rite 50 | x|X|E user|World|Group can e(x)ecute|travers 51 | In all cases a file must exist for 'true' 52 | Codes are logically ANDed; '^' prefix => NOT 53 | ``` 54 | -------------------------------------------------------------------------------- /since.nim: -------------------------------------------------------------------------------- 1 | when not declared(stderr): import std/syncio 2 | include cligen/unsafeAddr 3 | import std/[posix, sets, strutils], cligen, cligen/[osUt, posixUt, dents, statx] 4 | 5 | proc since*(refPath: string, refTime="", time="m", recurse=1, chase=false, 6 | Deref=false, kinds={fkFile}, quiet=false, xdev=false, file="", 7 | delim='\n', eof0=false, noDot=false, unique=false, 8 | paths: seq[string]) = 9 | ## Print files whose *time* is since|before *refTime* of *refPath*. Files 10 | ## examined = UNION of *paths* + optional *delim*-delimited input *file* ( 11 | ## ``stdin`` if `"-"`|if `""` & ``stdin`` is not a terminal ), **maybe 12 | ## recursed** as roots. To print regular files m-older than LAST under CWD: 13 | ## ``since -t-m -pLAST -r0 .`` 14 | let err = if quiet: nil else: stderr 15 | let tO = fileTimeParse(time) #- or CAPITAL=oldest 16 | let tR = if refTime.len > 0: fileTimeParse(refTime) else: tO 17 | var refStat: Statx 18 | if stat(refPath, refStat) != 0: quit(1) 19 | let r = fileTime(refStat, tR.tim, tR.dir) 20 | var dip = initHashSet[string]() 21 | let it = both(paths, fileStrings(file, delim)) 22 | var roots: seq[string] 23 | for root in it(): (if root.len > 0: roots.add root) 24 | for rt in (if roots.len == 0 and paths.len == 0: @["."] else: roots.move): 25 | forPath(rt, recurse, true, chase, xdev, eof0, err, 26 | depth, path, nmAt, ino, dt, lst, dfd, dst, did): 27 | if dt != DT_UNKNOWN: # unknown here => disappeared 28 | if (dt == DT_LNK and Deref and not chase and 29 | doStat(dfd,path,nmAt,lst,Deref,quiet)) or 30 | lst.stx_nlink != 0 or doStat(dfd,path,nmAt,lst,Deref,quiet): 31 | if lst.stx_mode.match(kinds) and fileTime(lst, tO.tim, tO.dir) > r: 32 | let path = if noDot: 33 | if path.startsWith("./."): path[3..^1] 34 | elif path.startsWith("./"): path[2..^1] 35 | else: path 36 | else: path 37 | if not unique or path notin dip: 38 | stdout.write path, "\n" 39 | dip.incl path 40 | do: discard 41 | do: discard 42 | do: recFailDefault("since", path) 43 | 44 | when isMainModule: # Exercise this with an actually useful CLI wrapper. 45 | include cligen/mergeCfgEnv; dispatch since, help={ 46 | "refPath": "path to ref file", 47 | "time" : "stamp to compare ({-}[bamcv]\\*)", 48 | "refTime": "stamp of ref file to use (if different)", 49 | "recurse": "recurse n-levels on dirs; 0:unlimited", 50 | "chase" : "chase symlinks to dirs in recursion", 51 | "xdev" : "block recursion across device boundaries", 52 | "Deref" : "dereference symlinks for file times", 53 | "kinds" : "i-node type like find(1): [fdlbcps]", 54 | "quiet" : "suppress file access errors", 55 | "file" : "optional input (\"-\"|!tty=stdin)", 56 | "delim" : "input file record delimiter", 57 | "eof0" : "read dirents until 0 eof", 58 | "noDot" : "remove a leading . from names", 59 | "unique" : "only print a string once"}, short={"refTime":'T', "refPath":'p'} 60 | -------------------------------------------------------------------------------- /doc/stripe.md: -------------------------------------------------------------------------------- 1 | Description 2 | ----------- 3 | 4 | `stripe` is parallelization/rudimentary job distribution utility and its library 5 | optimization nano-shell module `bu/execstr`. It only runs 1 command at a time. 6 | 7 | When commands fit into the restricted nano-shell language, this is about as low 8 | overhead as any new ELF/executable process creating tool can be (which, yes, 9 | remains about 50-100X worse than just fork|`cligen/procpool`). 10 | 11 | ``` 12 | Usage: 13 | 14 | stripe [optional-params] [posArgs: string...] 15 | 16 | where posArgs is either a number or , reads job lines from 17 | stdin and keeps up to N | M running at once. 18 | 19 | In sub mode, each job has $STRIPE_SUB set, in turn, to subJ. Eg.: 20 | 21 | find . -printf "ssh $STRIPE_SUB FileJob '%P'\n" | stripe X Y 22 | 23 | runs FileJobs first on host X then on host Y then on whichever finishes first. 24 | Repeat X or Y to keep more jobs running on each host. 25 | 26 | $STRIPE_SLOT (arg slot index) & optionally $STRIPE_SEQ (job seqNum) are also 27 | provided to jobs. In N-mode SIGUSR[12] (in|de)creases N. If before uses $tot, 28 | job lines are read upfront to provide that count. 29 | 30 | -r=, --run= string "/bin/sh" run job lines via this interpreter 31 | -n, --nums bool false provide STRIPE_SEQ to job procs 32 | -s=, --secs= float 0.0 sleep SECS before running each job 33 | -l=, --load= int -1 0/1/2: 1/5/15-minute load average < N 34 | -b=, --before= string "" "D": $tm \e[1mslot: $nm $cmd\e[m 35 | alsoAvail: $seq $tot 36 | -a=, --after= string "" "D": $tm \e[7mslot: $nm usr: $u sys: $s\e[m 37 | alsoAvail: wall $w MiBRSS $m $ct $pcpu $cmd 38 | -i=, --irupt= string "" "D": $tm interrupted $nm after $w: $cmd 39 | alsoAvail: substitution $sub 40 | ``` 41 | 42 | There is no need for `STRIP_SUB` to be ssh targets. Any regular pool of work 43 | labels will do. For example, you could do a 2-way or 4-way tile of images with 44 | some dispatcher savvy about screen-halves/quadrants/etc. 45 | 46 | Related Work 47 | ------------ 48 | 49 | There are almost too many to even begin mentioning. The closest is probably 50 | `xargs -n1 -P9 --process-slot-var=STRIPE_SUB`, but that doesn't provide sequence 51 | numbers. (You may be able to work around that e.g. with e.g. `EPOCHREALTIME` or 52 | other unique Ids.) Mostly I like my job log format, `execstr` shell-avoidance 53 | optimization, and the C version of this dates back to the very early 00s, long 54 | before `xargs` even had `-P` never mind 2012's `--process-slot-var`. I also 55 | like not having to worry about shell array portability to convert from a numeric 56 | process-slot-var to string keys. This is all trivial enough that it's probably 57 | been done many times by many folks to suit their idiosyncratic tastes. 58 | 59 | Anyway, "chunks" of work need to be >30-100 microsec for this to make sense[^1]. 60 | If per-job code is shell-ish, you may be able to do a lower overhead (fork scale 61 | rather than exec scale) system with `wait -n` added to Bash in 2014, IIRC. 62 | 63 | [^1]: or even larger if a real shell launch per command is involved.. 64 | -------------------------------------------------------------------------------- /doc/fread.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | 4 | Sometimes you want to ensure hot-cache or cold-cache or measure only pure read 5 | behaviors. 6 | 7 | For example: 8 | ```sh 9 | dd if=/dev/zero blocksize=16384 count=16384 | fread 10 | fread *.dat & # get pre-loading of all that going. 11 | ``` 12 | 13 | Usage 14 | ----- 15 | ``` 16 | fread [optional-params] paths: paths to read in 17 | 18 | This is like `cat`, but just discards data. Empty `paths` => just read from 19 | stdin. That can be useful to ensure data is in an OS buffer cache or try to 20 | evict other data (more portably than /proc/sys/vm/drop_caches) for cold-cache 21 | runs, measure drive/pipe or device throughput, etc. Eg. in Zsh you can say: 22 | `fread \*\*` or `fread -l $((1<<30)) < /dev/urandom`. 23 | 24 | Users may pass paths to FIFOs/named pipes/other block-on-open special files 25 | which are skipped. Anything named is only used if mmap-able & only 1 byte 26 | (really 1 cache line) per 4096 is used by the process. Can use multiple passes 27 | to measure DIMM bandwidth through a CPU prefetching lens. 28 | 29 | Options: 30 | -b=, --bsz= int 65536 buffer size for stdin IO 31 | -l=, --lim= int64 0 max bytes to read; 0=>unlimited 32 | -n=, --nPass= int 1 passes per file 33 | -o=, --off= int 64 total [off0-origin-pass within pages] 34 | -v, --verb bool false print bytes read 35 | ``` 36 | 37 | Example: 38 | -------- 39 | As just one example benchmark-y kind of sketch, we can easily create a 1 GiB 40 | file from `/dev/urandom`[^1], and then (w/`taskset` & `chrt`[^2] to lessen 41 | noise) loop over a series of numbers of passes and fit run times to a [linear 42 | model](https://github.com/c-blake/fitl): 43 | 44 | ```sh 45 | export j=/dev/shm/junk 46 | dd if=/dev/random of=$j bs=32k count=32k 47 | taskset -c 2 chrt 99 sh -c \ 48 | 'for n in `seq 1 64`;do printf "$n ";fread -vn$n $j;done'| 49 | fitl -b99 -c,=,n,b 6 0 1 50 | ``` 51 | That yields for me, on one rather old bare metal machine: 52 | ``` 53 | $6= 0.070379 + 1.32172e-03 *$1 54 | 55 | bootstrap-stderr-corr matrix 56 | 0.0001110 -0.8542 57 | 3.245e-06 58 | ``` 59 | The very small errors on slope & intercept suggest a good fit & the fit suggests 60 | initial pass time of 70.38 +- 0.11 ms & per pass times of 1.3217 +- 0.0032 ms. 61 | Since each pass after the first only hits one 64B cache line per page this 62 | translates to about 1./64/1.3217e-3 =~ 11.82 GiB/s throughput. 63 | 64 | Of course, there is more going on than *only* memory transfer (not a lot more!), 65 | but this is also just one example benchmark. `perf stat` (on Linux, anyway) may 66 | afford a more refined understanding of this kind of throughput. Similarly, a 67 | smaller 2MiB file on a HugeTLB FS might eliminate all TLB misses to study L3 CPU 68 | cache bandwidth (or even L2 for some CPUs these days).[^3] And so on. Data 69 | moves around in many ways and data motion is often a bottleneck and `fread` is 70 | here usually just one piece of a bigger puzzle. 71 | 72 | [^1]: Or maybe by `cp somebigfile $j; truncate -s $((1024*1024*1024))`, etc. 73 | 74 | [^2]: On Linux anyway.. 75 | 76 | [^3]: Measuring IO *itself* is also only one application of `fread`. The 77 | original inspiration was eliminating IO time from *other* benchmarks. 78 | -------------------------------------------------------------------------------- /adorn.nim: -------------------------------------------------------------------------------- 1 | when not declared(File): import std/syncio 2 | import cligen, cligen/[sysUt, mfile, mslice, osUt], std/parseutils 3 | 4 | proc anyNegative(xs: seq[int]): bool = (for x in xs: (if x < 0: return true)) 5 | proc outOfOrder(xs: seq[int]): bool = 6 | for j in 1..= 1 column; Full $HELP" 17 | if colNums.outOfOrder: Help !! "Need firstCol < secondCol ...; Full $HELP" 18 | let origin = if O0: 0 else: origin 19 | var o = if output.len > 0: open(output, fmWrite) else: stdout 20 | let sep = initSep delim # Parse delimiter 21 | let xfm = origin == 0 and colNums.anyNegative # Transform indexing 22 | var fs: seq[TextFrame] # Frames 23 | var ac = colNums; var m0 = 0 # Absolute Column 24 | for ln in mSlices(input, sep=rowDlm, eat='\0'): # RO mmap|1-use slices 25 | let m = ln.frame(fs, sep) # Make frames 26 | if m == 0: discard o.uriteBuffer(ln.mem, ln.len) 27 | else: 28 | if xfm: 29 | if m != m0: # Re-use last ac[] if `m` same as last loop 30 | copyMem ac[0].addr, colNums[0].addr, ac.len*ac[0].sizeof 31 | for c in mitems ac: (if c < 0: c += (m + 1) div 2) 32 | m0 = m 33 | var dc, par = 0 # 3 indices: Raw fs[j], dataCol, param (& ac[that]==dc) 34 | for j in 0.. par and prefix[par].len > 0: o.urite prefix[par] 39 | o.urite fs[j].ms 40 | if suffix.len > par and suffix[par].len > 0: o.urite suffix[par] 41 | inc par 42 | else: o.urite fs[j].ms 43 | o.urite "\n" # Should maybe just extend final MSlice to include line 44 | # end, carefully preserving unterminated whole files. 45 | when isMainModule: dispatch adorn, help={ 46 | "colNums": "`colNums` (`origin`-origin column numbers)", 47 | "origin": "origin for `colNums`; 0 => signed indexing", 48 | "O0" : "shorthand for `--origin=0`", 49 | "prefix": "strings to prepend to listed columns", 50 | "suffix": "strings to append to listed columns", 51 | "input" : "path to mmap|read as input; \"\" => stdin", 52 | "rowDlm": "`input` *row* delimiter character", 53 | "delim" : "`input` *field* dlm chars; len>0=>fold;w=white", 54 | "output": "path to write output file; \"\" => stdout"}, short={"output":'o'} 55 | -------------------------------------------------------------------------------- /wits.nim: -------------------------------------------------------------------------------- 1 | when not declared(stdin): import std/[syncio, formatfloat] 2 | import std/[math, times], adix/[bist, lmbist, embist, xhist1], nio 3 | 4 | var old = 2000.0/2001.0 5 | xhist1.def FHisto, lna, exp, Bist[uint32] 6 | xhist1.def EHisto, lna, exp, EMBist[float32], Hini=true, old 7 | 8 | xhist1.def FBist , lna, exp, Bist[uint32] 9 | xhist1.defMove MFHisto, FBist, 1, 1 10 | 11 | xhist1.def LBist , lna, exp, LMBist[uint32] 12 | xhist1.defMove MLHisto, LBist, it.t + 1, it.t + 1 - it.win 13 | 14 | xhist1.def EBist , lna, exp, EMBist[float32], Hini=true, old 15 | xhist1.defMove MEHisto, EBist, 1.0, it.xwh.hist.scale(it.win) 16 | 17 | type Kernel = enum kFlat="flat", kLin="linear", kExp="exponential" 18 | proc wits(input=".Nf", kernel=kFlat, win=60, oldW=0.99, a=1000.0, b=1e18, 19 | n=32767, time=false, fs: seq[float]) = 20 | ## Windowed/Weighted Incremental Time Series. A CLI for maybe-time-windowed & 21 | ## maybe-time-weighted incremental dynamic histograms of `adix` with related 22 | ## quantities like Winsorized/trimmed moments. Presently, this only takes one 23 | ## binary numeric column (to get experience w/run-time parameterization), but 24 | ## emits as many `float32` (aka `'f'`) as quantile fractions specified. 25 | let fs = if fs.len==0: @[0.5] else: fs # box & whiskers 26 | var i = nOpen(input) #XXX Must decide on & impl a spec lang 27 | var num: float #... for Winsor/trim moments & qtls (& 28 | var o = newSeq[float32](fs.len) #... also impl Winsor/trim moments!) 29 | 30 | var hMF = initMFHisto(a, b, n, win) 31 | var hML = initMLHisto(a, b, n, win) 32 | var hME = initMEHisto(a, b, n, win) #XXX this is buggy/infinite loops XXX 33 | var hF = initFHisto(a, b, n) 34 | var hE = initEHisto(a, b, n) #XXX must propagate a decay factor 35 | let t0 = epochTime() 36 | var c = 0 37 | while i.read num: 38 | if win > 0: 39 | if kernel == kFlat: hMF.add num 40 | elif kernel == kLin : hML.add num 41 | elif kernel == kExp : hME.add num 42 | else: 43 | if kernel == kFlat: hF.add num 44 | elif kernel == kExp : hE.add num 45 | for j, f in fs: 46 | o[j] = (if win > 0: 47 | if kernel == kFlat: hMF.quantile f 48 | elif kernel == kLin : hML.quantile f 49 | elif kernel == kExp : hME.quantile f 50 | else: NaN 51 | else: 52 | if kernel == kFlat: hF.quantile f 53 | elif kernel == kExp : hE.quantile f 54 | else: NaN).float32 55 | discard stdout.writeBuffer(addr o[0], o[0].sizeof*o.len) 56 | inc c 57 | if time: stderr.write (epochTime() - t0)*1e9/c.float, " ns/num\n" 58 | 59 | when isMainModule: import cligen;include cligen/mergeCfgEnv;dispatch wits,help={ 60 | "fs" : "quantile fractions; 0.5=median (also default)", 61 | "input" : "nio input file; extension-only=>stdin", 62 | "kernel": "time-kernel: flat, linear, exponential", 63 | "win" : "window; 0=>running/cumulative(!linear)", 64 | "oldW" : "weight on old data for exponential", 65 | "a" : "lower bound", 66 | "b" : "upper bound", 67 | "n" : """cross-sectional `n` for HDR histo; 32767=~ 68 | 15/log10(1.001)=>defaults=>0.1% bins""", 69 | "time" : "report main loop execution time"} 70 | -------------------------------------------------------------------------------- /memlat.nim: -------------------------------------------------------------------------------- 1 | import std/[random, times, strutils, stats], cligen 2 | when defined(release): randomize() 3 | 4 | when defined(mt): 5 | import mersenne # Test against a diff PRNG 6 | var mt = newMersenneTwister(1234554321) 7 | proc rand(max: int): int = # Inclusive of endpoint 8 | int(mt.getNum mod (uint32(max) + 1u32)) 9 | 10 | proc sattolo_cycle[T](x: var openArray[T]) = 11 | for i in countdown(x.len - 1, 1): 12 | swap x[i], x[rand(i - 1)] # i-1 -> i =>Fisher-Yates 13 | 14 | proc prepRanElt(x: var seq[int], n: int) = 15 | for i in 0.. 0: r = initRand(seed) 72 | else: randomize(); r = initRand(rand(100000)) 73 | let n = (sizeKiB shl 10) shr 3 # or shl 7 74 | case kind 75 | of shuff : time(prepShuffle, runShuffle, n,nAcc,avgN,minN) 76 | of ranElt: time(prepRanElt , runRanElt , n,nAcc,avgN,minN) 77 | of truRan: time(prepTrueRan, runTrue , n,nAcc,avgN,minN) 78 | 79 | include cligen/mergeCfgEnv 80 | dispatch(lat, help = {"kind": "shuff: chase ran perm\n" & 81 | "ranElt: access ran elt\n" & 82 | "truRan: pre-read getrandom", 83 | "seed": "0=>random, else set" }) 84 | -------------------------------------------------------------------------------- /man/catz.1: -------------------------------------------------------------------------------- 1 | .\" -*- nroff -*- 2 | .TH CATZ 1 "July 2002" "CB Utils" 3 | 4 | .SH NAME 5 | 6 | catz \- ``cat'' for compressed files 7 | 8 | .SH SYNOPSIS 9 | 10 | .nf 11 | .B catz \fI[ -d ] [ -v STDIN_NAME_VAR ] [ FILES [ "-" ] [ < FILE ] ]\fR 12 | .fi 13 | 14 | .SH DESCRIPTION 15 | 16 | .B catz 17 | is a replacement for \fBcat\fR(1), but \fIcatz\fR decodes encoded files, 18 | avoiding temporary storage like \fBzcat\fR(1) but for many possible formats. 19 | 20 | .SH USAGE 21 | 22 | This program is careful to go by magic numbers in headers of compressed or 23 | otherwise encoded files when no pathname extension matches. NOTE: in a shell 24 | context, it is easy to say "\fIcatz\fR < input.xz" to force ignoring extensions. 25 | 26 | The flag "-v" indicates \fISTDIN_NAME_VAR \fR, an environment variable that 27 | the invoker sets to the pathname for stdin. Other pathnames are available 28 | from the \fBcatz\fR argument list. Pathnames are only really needed if magic 29 | number recognition would fail or if selected decoder programs need a pathname. 30 | 31 | Just as with \fBcat\fR(1), a lone minus sign ("-") filename indicates 32 | how the standard input stream should be ordered within the catenation. 33 | 34 | Currently, \fBcatz\fR has decoders for 35 | \fBzip\fR, 36 | \fBgzip\fR, 37 | \fBcompress\fR, 38 | \fBbzip\fR, 39 | \fBbzip2\fR, 40 | \fBlzop\fR, 41 | \fBlzma\fR (lzma-utils version), 42 | \fBxz\fR or \fBpixz\fR, 43 | \fBplzip\fR, 44 | \fBlz4\fR, and 45 | \fBzstd\fR and the document formats 46 | \fB.pdf\fR, 47 | \fB.ps*\fR, and 48 | \fB.htm*\fR 49 | 50 | NOTE: Due to limitations in utilities for the format, zip files given as 51 | paths will have \fBall\fR members catenated, while only the \fBfirst\fR 52 | member is extracted from unseekable zip inputs. 53 | 54 | A leading "-d" option is ignored for compatibility with GNU tar -I. 55 | 56 | .SH IMPL NOTES 57 | 58 | The one-file, seekable-input case allows simple replacement of the 59 | \fBcatz\fR process with a decoder process. The \fBexec\fR(2) inherits 60 | the needed file descriptors. This avoids any unnecessary context 61 | switching or copying. 62 | 63 | The N-file, named-argument case requires a two-process at a time system 64 | in order to generate an ordered, integrated output stream. \fBcatz\fR 65 | uses a forked version of itself to read output from the read side of a 66 | pipe, copying it to the original stdout. All decoder programs send 67 | their output to the write-side. 68 | 69 | If no path name is available (e.g., stdin) or if the pathname does not 70 | have a standard filename extension for compressed files, then a magic 71 | number \fImust\fR be read to identify the decoder to be used. A decoder 72 | itself will (typically) \fIalso\fR insist on this magic number being 73 | present. For seekable input streams, \fBlseek\fR(2) can restore the 74 | file pointer and the decoder process will be happy. For unseekable 75 | input streams, we must fork and exec a new process to put the header 76 | back into place for a translator. In short, \fBcat foo|catz\fR should 77 | work fine. This is unlikely true of any/many other auto-decoder. 78 | 79 | .SH AUTHOR 80 | 81 | C.Blake conceived & wrote \fBcatz\fR. 82 | 83 | .SH BUGS 84 | 85 | Please report them! 86 | 87 | .SH SEE ALSO 88 | 89 | .BR cat (1) ", zcat" (1) ", bzip" (1) ", bzip2" (1) ", zip" (1) ", compress" (1) ", grep"(1) 90 | -------------------------------------------------------------------------------- /doc/thermctl.md: -------------------------------------------------------------------------------- 1 | # Motivation 2 | 3 | OS kernels can down clock CPUs, but are often not aggressive enough to block 4 | thermal shutdown. This controller can sometimes do better. 5 | 6 | This approach is limited, but still useful for me (e.g. on old laptops with 7 | failing fans &| overclocked gamer rigs that only overheat with just the right L3 8 | cache loads, often from g++ compiles). 9 | 10 | # Operation 11 | 12 | At CPU temperature `T > temp.b`, `thermctl` sends SIGSTOP to all runnable PIDs 13 | & at `T <= temp.a`, it sends SIGCONT to all stopped PIDs. 14 | 15 | # Limitations 16 | 17 | Pausing can fail to block future work (loadAvg-targeting work ctl, permissions, 18 | rarely scheduled dispatchers, ..). Operation can also undesirably SIGCONT jobs 19 | stopped in shells with job control. 20 | 21 | # Temperature query & Parameter tuning 22 | 23 | ``` 24 | thermctl [optional-params] 25 | OS kernels can down clock CPUs but may not be aggressive enough to block thermal 26 | shutdown. This controller can sometimes do better. At T > temp.b, it SIGSTOPs 27 | runnable PIDs & at T <= temp.a, it SIGCONTs PIDs it stopped. 28 | 29 | NOTE: Pausing can fail to block future work (loadAvg-targeting dispatch, perms, 30 | hot procs often put to sleep just before scheduling thermctl itself, etc.). 31 | So, this approach is limited, but maybe useful (e.g. on old laptops with failing 32 | fans &| overclocked gamer rigs). 33 | Options: 34 | -q=, --qry= string "auto" auto:Intel?turbostat -sCPU,CoreTmp:cpuTemp 35 | -d=, --delay= float 1.0 $1 param to qry (likely a delay) 36 | -m=, --match= string "." pattern selecting cpuTemp line 37 | -t=, --temp= Slice 80.0..90.0 > b => pause; < a => resume 38 | -l=, --log= string "" path to log control transitions to 39 | -i=, --incl= strings ffmpeg cmd names to always SIGSTOP if hot 40 | -e=, --excl= strings thermctl cmd names to never SIGSTOP 41 | ``` 42 | 43 | # A Few More Details 44 | 45 | The idea of `--incl` is to avoid a noticed problem where main heat generating 46 | processes are suspended very briefly just before switching to turbostat/thermctl 47 | itself. That means they are not marked Runnable (& so not sent SIGSTOP) for 48 | maybe several query cycles in a row. I saw as many as 4..6 sequentially on a 49 | 4-core. That allows temperature to rise too quickly. A good rule of thumb for 50 | this parameter might be processes that tend to be long-lived and CPU-heavy that 51 | you are sure it's ok to SIGSTOP & SIGCONT like ffmpeg or gcc. 52 | 53 | Note that `turbostat` is distributed with Linux kernel sources. So, if you 54 | build your own kernels you can usually get it with 55 | ``` 56 | make -C /usr/src/linux/tools turbostat_install WERROR=0 HOME=/usr/local 57 | ``` 58 | For AMD CPUs you will probably need some kind of wrapper program to post-process 59 | the output of `lmsensors` (e.g. `sensors k10temp-pci-00cb k10temp-pci-00c3`) run 60 | in a loop. 61 | 62 | Physics-minded folk might worry that turbostat itself adds to CPU load pseudo- 63 | Heisenberg-style which is true, but also a small effect. I see 0.02% usage by 64 | turbostat with 1 second delays on a laptop with a 12 year old CPU. (The small 65 | effect can, however, become a large battery drain effect if temperature polling 66 | activity prevents a hard sleep mode.) 67 | 68 | Anyway, I usually launch `thermctl -l/var/log/therm` at system boot. 69 | -------------------------------------------------------------------------------- /doc/eve.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ========== 3 | One often wants to extrapolate from a finite sample to the true max|min. When 4 | benchmarking, one might want to [filter out system noise](doc/tim.md) { which 5 | has some unknown distribution, but is even worse non-stationary/not IID :-( }. 6 | Another example is in density estimation such as the "clip" or "cut off" values 7 | for a simple histogram or KDE. 8 | 9 | Solving foundational problems like "What background activity competes on time 10 | sharing systems, how stationary is it, etc.?", is hard. However, it is not so 11 | hard to estimate true max|min's (& errors of said estimates) better than sample 12 | extremes of ginormous samples (if one views this as a performance optimization). 13 | Also, one cannot always sample more data - sometimes that is crazy expensive & 14 | limited by "dollars or years per sample" effects. 15 | 16 | Approach 17 | ======== 18 | The paper initially inspiring this utility is openly available at 19 | https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1433242 . That block maxima 20 | idea (& implementation) has been superceded by the *far* more reliable more 21 | peaks-over-threshold (POT) school of Portuguese Extremists: 22 | https://arxiv.org/abs/1412.3972 23 | 24 | Standard errors for the estimate of the true population extreme are estimated by 25 | a bootstrap which should make them ok, but full disclosure I am still working on 26 | this aspect. 27 | 28 | Usage 29 | ===== 30 | ``` 31 | eve [optional-params] 1-D / univariate data ... 32 | 33 | Extreme Value Estimate by FragaAlves&Neves2017 Estimator for Right Endpoint 34 | method with bootstrapped standard error. E.g.: eve -l $(repeat 99 tmIt). This 35 | only assumes IID samples (which can FAIL for sequential timings!) and checks 36 | that spacings are not consistent with an infinite tail. 37 | 38 | -l, --low bool false flip input to estimate Left Endpoint 39 | -b=, --boot= int 32 number of bootstrap replications 40 | -B=, --BLimit= int 5 re-tries per replication to get not-long 41 | -e=, --emit= set(Emit) bound tail - verbose long-tail test 42 | bound - bound when short-tailed 43 | -a=, --aFinite= float 0.05 tail index > 0 acceptance significance 44 | -k=, --k= float -0.5 2k=num of order statistics; <0 => = n^|k| 45 | -K=, --KMax= int 50 biggest k; FA,N2017 suggests ~50..100 46 | -s=, --shift= float 0.0 shift MAX by this many sigma (finite bias) 47 | ``` 48 | 49 | Some Subtleties 50 | =============== 51 | The idea here does not make sense if extreme data spacings suggest an infinite 52 | rather than finite tail. So, we are careful to rule this out at alpha level 53 | `aFinite` in both the main estimator and the bootstrap re-sampling. 54 | 55 | The bootstrap preserves the sample-max to aid clustering of new estimates around 56 | that best known limit. It also re-samples only the data that contributes to the 57 | estimate - and also only from that portion of the tail. This seems to me the 58 | most coherent approach. 59 | 60 | POT methods require that k/n->0 as n grows. But we want a good estimate. So, 61 | we want k big. However, for the estimator formula, k cannot be > n/2. So, 62 | internally, `eve` uses `k = min(n/2 - 1, n^kPow)`. This should discard most 63 | data above (below for `-l`) the median or much more if you use a lower `kPow`. 64 | -------------------------------------------------------------------------------- /dirt.nim: -------------------------------------------------------------------------------- 1 | # NOTE: Needs `devel` / >= 1.4.0 for `HeapQueue[T].find`. 2 | when not declared(File): import std/syncio 3 | include cligen/unsafeAddr 4 | import std/[heapqueue,sets,posix,strformat], cligen,cligen/[dents,posixUt,statx] 5 | 6 | proc setMTime*(dfd: cint; path: string; m0, m1: StatxTs; 7 | verb: File=nil, err=stderr, dryRun=false): int = 8 | ## Set the file m1)odification time of ``(dfd,path[nmAt..^1])`` only if not 9 | ## equal to the original ``m0`` times with typical command utility ``verb``, 10 | ## ``err``, ``dryRun`` parameters, returning if the call must/did occur. 11 | if m0 == m1: return 0 12 | result = 1 13 | let omit = Timespec(tv_sec: 0.Time, tv_nsec: UTIME_OMIT) 14 | var ftms = [ omit, toTimespec(m1)] 15 | verb.log &"futimens({dfd}({path}), [OMIT, {$ftms[1]}])\n" 16 | if not dryRun and futimens(dfd, ftms) != 0: 17 | err.log &"futimens({dfd}({path}): {strerror(errno)}\n" 18 | 19 | proc dirt*(roots: seq[string], verbose=false, quiet=false, dryRun=false, 20 | prune: seq[string] = @[], xdev=false): int = 21 | ## Set mtimes of dirs under ``roots`` to mtime of its newest kid. This makes 22 | ## directory mtimes "represent" content age at the expense of erasing evidence 23 | ## of change which can be nice for time-sorted ls in some archival file areas. 24 | if roots.len == 0: # For safety, do nothing if user specifies empty `paths` 25 | return 26 | let prune = toHashSet(prune) 27 | let verb = if dryRun or verbose: stdout else: nil 28 | let err = if quiet: nil else: stderr 29 | var n = 0 30 | for root in roots: 31 | var dirs = @[initHeapQueue[int64]()] # HeapQueue.pop is *MINIMUM* 32 | forPath(root, 0, lstats=true, false, xdev, false, err, 33 | depth, path, nmAt, ino, dt, lSt, dfd, dst, did): 34 | if dt != DT_LNK: # Always: 35 | dirs[^1].push -toInt64(lSt.stx_mtime) # Track max age 36 | do: # Pre-recurse: 37 | if path[nmAt..^1] in prune: 38 | verb.log &"pruning at: {path}\n" 39 | discard dirs[^1].pop 40 | continue 41 | dirs.add initHeapQueue[int64]() # Add new queue for kid 42 | let dmt = lSt.stx_mtime # Save old mtime 43 | do: # Post-recurse: 44 | if dirs.len > 0: 45 | if dirs[^1].len > 0: # Deepest queue non-empty 46 | let kidTm = dirs[^1].pop # Get & use max kid time stamp 47 | n += setMTime(dfd, path, dmt, toStatxTs(-kidTm), verb, err, dryRun) 48 | if dirs.len > 1: # ASSUME setMTime SUCCEEDS 49 | dirs[^2].del dirs[^2].find(-toInt64(dmt)) #XXX BST/BTreeQ 4big dirs 50 | dirs[^2].push kidTm # reflect dmt -> kidTm in parent 51 | discard dirs.pop # discard kid queue 52 | do: recFailDefault("dirt", path) # Cannot recurse 53 | return min(127, n) 54 | 55 | when isMainModule: 56 | include cligen/mergeCfgEnv; dispatch(dirt, short={"dry-run": 'n'}, help={ 57 | "verbose": "print `utimes` calls as they happen", 58 | "quiet" : "suppress most OS error messages", 59 | "dry-run": "only print what system calls are needed", 60 | "prune" : "prune exactly matching paths from recursion", 61 | "xdev" : "block recursion across device boundaries" }) 62 | -------------------------------------------------------------------------------- /doc/pid2.md: -------------------------------------------------------------------------------- 1 | # Basics 2 | 3 | Usage: (***NOT*** a cligen utility) 4 | ``` 5 | pid2 [integer(300)] 6 | ``` 7 | The [] notation here indicates optionality and default is in (). 8 | 9 | This program just does `vfork()` as fast as possible to wrap a Linux process 10 | table until the target Process ID integer is reached. 11 | 12 | # Motivation 13 | 14 | PID-wrapping was made famous more as a hacking tool for programs which foolishly 15 | assume the next PID is neither predictable nor re-used (e.g. a shell /tmp/foo.$$ 16 | construct). 17 | 18 | I am publishing it here mostly as an example of a big effect that OS scheduling 19 | affinity for a particular CPU can make. It can also sometimes be nice to 20 | "position" within the process table if you often do PID-sorted process table 21 | listings..(e.g. to group all your xterms or shells together). 22 | 23 | # Speed-up 24 | 25 | For even greater speed, you can do this in parallel with each pid2 pinned 26 | to different CPUs, such as a wrapper script (called, say, `2pid`): 27 | 28 | ```sh 29 | : "${j:=$(nproc)}" 30 | for k in `seq 0 "$((j-1))"` 31 | do pid2 "$@" "$k" & done 32 | wait 33 | ``` 34 | Even with `2^22` pids (default lately), this can take under 8 sec on my laptop. 35 | Most people I know are unfamiliar with how fast the PID counter can advance 36 | under heavy fork load. 37 | 38 | # Regrets, I have a few.. 39 | 40 | In 1979 when Berkeley introduced `vfork` on the VAX 11/780, they should have 41 | made PIDs 32-bits. At the time, it was about 3..8ms to `vfork` meaning 32768 42 | wraparounds could take just a few minutes.[^1] Meanwhile, 32-bit would have 43 | been 6..18 months doing almost nothing but `vfork` -- likely easily noticed / 44 | trapped activity right up until about the 64-bit moves in the late 90s. PIDs 45 | then could have been "unique ids" from the dawn of Unix & very likely moved to 46 | 64-bit ids by the late 90s which (in 2025) would still be fine unique IDs for 47 | the foreseeable future. Oh well! 48 | 49 | As it is, today a `pid_max/dev/null 2>&1; then 62 | echo 1>&2 'Need to install `noa` from https://github.com/c-blake/bu' 63 | exit 2 64 | fi 65 | noa "$idx" "$@" | ( # Carefully d=`noa $idx $*`; [ -e $d ] || $make $d 66 | n=' 67 | ' # read -rd works in Bash|Zsh, but not POSIX. So, 68 | while IFS= read -r line #..loop which works for all text but input with no 69 | do d="$d${line}$n" #..final newline where we add one "erroneously". 70 | done; d=${d%?} # Chop extra newline 71 | [ -e "$d" ] || { # not -d since some cmds have -f to replace files 72 | [ -n "${v+ANY}" ] && printf '%s\n' "$make $d" 73 | $make "$d" # Make needed argument $d 74 | } ) 75 | exec "$@" # Then just run passed command 76 | ``` 77 | 78 | [^1]: It could likely be written reliably in pure shell (have at it!), but `noa` 79 | is also very simple in lower level languages (even as low as ANSI C). 80 | -------------------------------------------------------------------------------- /topn.nim: -------------------------------------------------------------------------------- 1 | import cligen, cligen/[mfile, mslice, osUt], adix/topk 2 | from std/strutils as su import nil 3 | when not declared(stderr): import std/syncio 4 | 5 | proc pyIx(x: openArray[MSlice], i: int): MSlice = x[if i < 0: i + x.len else: i] 6 | proc pyIx(x: openArray[MSlice], s: Slice[int]): MSlice = 7 | let a = if s.a < 0: s.a + x.len else: s.a 8 | let b = min(x.len - 1, if s.b < 0: s.b + x.len else: s.b) # b < a | OutOfB: "" 9 | if b < a or a + 1 > x.len or b + 1 > x.len: result.mem = nil; result.len = 0 10 | else: result.mem = x[a].mem; result.len = x[b].mem +! x[b].len -! x[a].mem 11 | 12 | proc topn*(input="/dev/stdin", delim=" ", mxCol=0, n=0, order=Cheap, 13 | partn=Partn.last, specs: seq[string]) = 14 | ## Write spec'd cols of topN-rows-by-various-other-cols to outFile's. A spec 15 | ## is `[,(0)[,outCol(same)[,outFile(stdout)]]]`. ColNos are Py-like 16 | ## 0-origin,signed. *outCol* can be an A:B exclusive or A..B slice. Algo is 17 | ## fast one-pass over (mmap|stream) input. Simple & Fancy E.g.s: 18 | ## ``find . -type f -printf '%C@ %p\\n' | topn -m1 5`` # newest 5 by ctime 19 | ## ``topn 9,1,-1,x`` # writes last col of top 9-by-col-1 rows to file x. 20 | ## If `n!=0` then `` can end in '%' to instead mean *100\*pct/n* rows. 21 | let m = specs.len # Handle all `m` sort orders in one pass 22 | if m < 1: stderr.write "No specs requested. -h for help.\n"; return 23 | var keyC = newSeq[int](m) 24 | var nTop = newSeq[int](m) 25 | var oCol = newSeq[Slice[int]](m) 26 | var oFil = newSeq[File](m) 27 | for i, spec in specs: # Parse key-output specifiers 28 | let params = su.split(spec, ',') 29 | if params.len < 1: 30 | stderr.write "too few sub-params in spec ", spec, "\n"; continue 31 | let p0 = params[0] 32 | nTop[i] = if su.endsWith(p0, '%'): su.parseInt(p0[0..^2]) * n div 100 33 | else: su.parseInt(p0) 34 | nTop[i] = max(1, nTop[i]) 35 | keyC[i] = if params.len > 1: su.parseInt(params[1]) else: 0 36 | oCol[i]=if params.len>2:parseHSlice[int,int](params[2])else:keyC[i]..keyC[i] 37 | oFil[i] = if params.len > 3: open(params[3], fmWrite) else: stdout 38 | let sep = initSep(delim) # Init into-seq[MSlice] splitter 39 | var row: seq[MSlice] = @[] 40 | let mf = mopen(input) 41 | 42 | template sweep(mf, T, i, outsVal) {.dirty.} = 43 | type Rec = tuple[val: float32; outs: T] 44 | var tops: seq[TopK[Rec]] 45 | for i in 0 ..< m: tops.add initTopK[Rec](nTop[i], partn) 46 | var rec: Rec 47 | for line in mSlices(mf, eat='\0'): # RO mmap | slices from stdio 48 | sep.split(line, row, mxCol) # split into columns 49 | for i in 0 ..< m: # update our 1-to-several `TopK` 50 | rec.val = parseFloat(pyIx(row, keyC[i])).float32 # tuned4 float rarity 51 | rec.outs = outsVal 52 | tops[i].push rec.move # `$pyIx(..)` in outsVal maybe made cpy 53 | for i in 0 ..< m: # Emit like sort -gk|tail -n|tac 54 | for e in tops[i].maybeOrdered(order): oFil[i].urite e.outs, "\n" 55 | if not oFil[i].isNil and oFil[i] != stdout: oFil[i].close 56 | 57 | # rec.outs become either GC'd `string` or no-need-to-GC `MSlice` 58 | if mf.mem.isNil: sweep(input, string, i, $pyIx(row, oCol[i])) 59 | else : sweep(mf , MSlice, i, pyIx(row, oCol[i])) 60 | 61 | when isMainModule: include cligen/mergeCfgEnv; dispatch topn, help={ 62 | "input": "input data path", 63 | "delim": "delimiting (repeats=>any num; \"white\")", 64 | "mxCol": "max columns in input to parse", 65 | "n" : "scale for '%' amounts", 66 | "partn": "partition: last, ran", 67 | "order": "order: Cheap, Ascending, Descending"} 68 | -------------------------------------------------------------------------------- /doc/dirq.md: -------------------------------------------------------------------------------- 1 | Motivation 2 | ---------- 3 | The idea is to use a directory as a work queue with `inotify` as a subscription 4 | service to the queue. Any process with write perms can drop a file into a 5 | watched directory to trigger activity. 6 | 7 | This can also be done with atomic rename, frequent stat(dir).st\_mtime, readdir, 8 | but that is less efficient. It can happen that a program you do not control 9 | wants to just write & close, not rename into the directory. `inotify` lets you 10 | detect the union of both events simply for a more general facility. 11 | 12 | As a very concrete example, you could direct your web browser to save new files 13 | to "$HOME/dl" and leave a `dirq ~/dl got-dl` instance running. The `got-dl` 14 | script/program can recognize various kinds of files and do appropriate stuff. 15 | 16 | "Stuff" may be `mv` \*.pdf files to "~/doc/" or starting a \*.torrent download, 17 | making a simple browser click a gateway to auto-activity. You could rename a 18 | browser fetched file to take out spaces/other things that may need annoying 19 | shell quoting. You could (de)compress a file/otherwise re-encode it or even 20 | move it to another `dirq`-watched directory after processing. Queues have many 21 | uses. The only limit is your imagination. :) 22 | 23 | There may be other interesting setups with other event classes. 24 | 25 | Usage 26 | ----- 27 | ``` 28 | dirq [optional-params] [cmdPrefix: string...] 29 | 30 | chdir(dir) & wait for events to occur on it. For each delivered event, run 31 | cmdPrefix NAME where NAME is the filename (NOT full path) delivered. 32 | 33 | Handleable events are: 34 | access attrib modify open closeWrite closeNoWrite 35 | movedFrom movedTo moveSelf create delete deleteSelf 36 | 37 | Default events closeWrite (any writable fd-close) | movedTo (renamed into dir) 38 | usually signal that NAME is ready as an input file. 39 | 40 | dirq can monitor & dispatch for many dirs at once with repeated --dir=A cmdPfx 41 | for A --dir=B cmdPfx for A patterns; events & wait are global. 42 | 43 | Options: 44 | -e=, --events= set(Event) closeWrite,movedTo inotify event types to use 45 | -w, --wait bool false wait4(kid) until re-launch 46 | -d=, --dir= string "." directory to watch 47 | ``` 48 | 49 | History/Cultural 50 | ---------------- 51 | Circa 2006, Linux added a `man 7 inotify` system that obsoleted an inefficient 52 | & limited (*must* rename into) approach for this. So, I did a C program (had to 53 | use `syscall(__NR_inotify_add_watch, ..)` since it took glibc a while to wrap). 54 | `dirq` is a Nim port of this C program. (I pronounce `dirq` like "Dirk" myself, 55 | but you can do as you like.) 56 | 57 | Future Work 58 | ----------- 59 | I don't use BSD these days, but KQueue and similar facilities could allow this 60 | program to be a kind of portable command entry point for this limited subset of 61 | functionality. Maybe something like it already exists? I believe kqueue file 62 | monitoring pre-dates Linux inotify. Similarly, a few events like `movedTo` can 63 | be handled portably with a stat-loop, re-scanning directories upon mtime update. 64 | 65 | Related Work 66 | ------------ 67 | `inotifywait` of [inotify-tools](https://github.com/inotify-tools/inotify-tools) 68 | does allow this, but a "command wrapper" use concept makes working with general 69 | filenames easier. Specifically, `dirq` simply populates the last `argv[]` slot 70 | with the filename received from the kernel & runs your program. This eliminates 71 | both quoting & parsing concerns. With `inotifywait` you would have to format 72 | things in a reliably parsable way which is yet another convention to fret about. 73 | 74 | Bell Labs Plan 9 has a not dissimilar concept called "plumb"/"plumbers" but I 75 | believe these require a bit more cooperation. 76 | -------------------------------------------------------------------------------- /doc/cbtm.md: -------------------------------------------------------------------------------- 1 | WARNING 2 | ------- 3 | Use at your own risk as with any tool that uses `xfs_db` | `debugfs`. No 4 | warranty, express or implied. 5 | 6 | Motivation 7 | ---------- 8 | 9 | Hardware hosting filesystems can change. It can be nice to save & restore ctime 10 | & btime rather than always wiping file life cycle history. 11 | 12 | There is no OS/FS-portable way to do this. (settimeofday can do ctime, but with 13 | system-disruptive time storms.) This utility fills the gap for XFS/ext4 14 | on Linux. 15 | 16 | Basic usage for an XFS on DEV mounted at MNT 17 | -------------------------------------------- 18 | ``` 19 | cbtm save /MNT >MNT.stDat 20 | ``` 21 | This basically just saves all the statx data (which tends to compress very 22 | well if you want). 23 | 24 | Then, sometime later, on e.g. a brand new device: 25 | ``` 26 | cbtm filt -qr/MNT CMDS 27 | umount /MNT 28 | xfs_db -x DEV CMDS.log 2>&1 29 | ``` 30 | Here, `xfs_db` does the hard work. NOTE: Does not yet work for `ext4`. 31 | 32 | WARNING AGAIN 33 | ------------- 34 | Note that until you become comfortable with this tool, you should look over 35 | generated `CMDS` and perhaps manually run just the first few against a (backed 36 | up!) FS. { While `xfs_db` does have a `path` command as well as `inode` and 37 | even has escape/quote-sensitive tokenization code, unfortunately it does not 38 | de-escape or de-quote things before internal use. So, one must use inode to get 39 | pathname generality. Well, or patch `xfsprogs`. But `stat(1)` does report 40 | inodes for you. } 41 | 42 | More details 43 | ------------ 44 | ``` 45 | Usage: 46 | cbtm {SUBCMD} [sub-command options & parameters] 47 | where {SUBCMD} is one of: 48 | help print comprehensive or per-cmd help 49 | save Save all statx metadata for all paths under roots to output. 50 | print Print metadata stored in input in a human-readable format. 51 | filter Remove input records if source & target differ|same [bc]time. 52 | restore Generate commands to restore [cb]time input 53 | 54 | save [optional-params] [roots: string...] 55 | Save all statx metadata for all paths under roots to output. 56 | 57 | Output format is back-to-back (statx, 2B-pathLen, NUL-term path) records. 58 | To be more selective than full recursion on roots, you can use the output of 59 | find -print[0] if you like (& file=/dev/stdin to avoid temp files). 60 | -f=, --file= string "" optional input ("-"|!tty=stdin) 61 | -d=, --delim= char '\n' input file record delimiter 62 | -o=, --output= string "/dev/stdout" output file 63 | -q, --quiet bool false suppress most OS error messages 64 | 65 | print [optional-params] 66 | Print metadata stored in input in a human-readable format. 67 | -i=, --input= string "/dev/stdin" metadata archive/backup path 68 | -d=, --delim= string "\t" set delim 69 | 70 | filter [optional-params] PCRE path patterns to *INCLUDE* 71 | Remove input records if source & target differ|same [bc]time. 72 | -i=, --input= string "/dev/stdin" metadata archive/backup path 73 | -o=, --output= string "/dev/stdout" output file 74 | -r=, --root= string "" target FS root 75 | -q, --quiet bool false do not stderr.emit mismatches 76 | -m=, --match= set(Match) {} {}=>all else: name, size,perm, 77 | owner,links,mtime,timeSame, re 78 | -d=, --drop= string "" PCRE path pattern to EXCLUDE 79 | 80 | restore [optional-params] 81 | Generate commands to restore [cb]time input 82 | -i=, --input= string "/dev/stdin" metadata archive/backup path 83 | -k=, --kind= FSKind xfs xfs: gen for xfs_db -x myImage 84 | ext4: gen for debugfs -w myImage 85 | ``` 86 | -------------------------------------------------------------------------------- /lncs.nim: -------------------------------------------------------------------------------- 1 | when not declared(stderr): import std/syncio 2 | include cligen/unsafeAddr 3 | import std/[posix, tables, strutils], cligen, cligen/[osUt, posixUt, dents] 4 | 5 | type LncsLog* = enum osErr, summary ## A micro logging system 6 | type DevIno = tuple[dev: Dev; ino: uint64] 7 | 8 | proc lncs(paths: seq[string], file="", dlm='\n', recurse=1, chase=false, # in 9 | xdev=false, eof0=false, kinds={fkFile}, minSize=0, thresh=2, # filt 10 | quiet=false, log={osErr}, # log 11 | nEcho= -1, noDot=false, outDlm="\t", endOut="\n"): int = # out 12 | ## Print hard link clusters within paths of maybe-chasing, maybe-recursive 13 | ## closure of the UNION of ``roots`` and optional ``dlm``-delimited input 14 | ## ``file`` (stdin if "-"|if "" & stdin not a tty). Exit code is min(255, 15 | ## num.clusters >= thresh). Eg., ``find -print0|lncs -d\\0 -o '' -e ''`` 16 | ## makes a report reliably splittable on double-NUL then single-NUL for 17 | ## fully general path names while ``lncs -ls -n0 -r0 /`` echoes a summary. 18 | let outDlm = if outDlm.len > 0: outDlm else: "\x00" 19 | let endOut = if endOut.len > 0: endOut else: "\x00" 20 | var nPaths, nSet, nFile: int #Track some statistics 21 | var tab = initTable[DevIno, seq[string]](512) 22 | let err = if quiet: nil else: stderr 23 | let it = both(paths, fileStrings(file, dlm)) 24 | var roots: seq[string] 25 | for root in it(): (if root.len > 0: roots.add root) 26 | for rt in (if roots.len == 0 and paths.len == 0: @["."] else: roots.move): 27 | forPath(rt, recurse, true, chase, xdev, eof0, err, 28 | depth, path, nmAt, ino, dt, lst, dfd, dst, did): 29 | if dt != DT_UNKNOWN and lst.stx_mode.match(kinds): # unknown here =>gone 30 | let path = if noDot and path.startsWith("./"): path[2..^1] else: path 31 | nPaths.inc 32 | if lst.stx_size >= minSize.uint64: # big enough 33 | let key: DevIno = (lst.st_dev, lst.stx_ino) 34 | tab.mgetOrPut(key, @[]).add(path) 35 | do: discard 36 | do: discard 37 | do: recFailDefault("lncs", path) 38 | for ino, s in tab: 39 | if s.len >= thresh: 40 | nSet.inc 41 | nFile.inc s.len 42 | if nEcho != 0: #Maybe emit report for set 43 | let lim = min(s.len, if nEcho > 0: nEcho else: s.len) 44 | stdout.write s[0 ..< lim].join(outDlm), endOut 45 | if summary in log: #Emit summary statistics 46 | stderr.write nSet," sets of ",nFile," hard links in ",nPaths," paths\n" 47 | return min(127, nSet) #Exit with appropriate status 48 | 49 | when isMainModule: #Provide a useful CLI wrapper. 50 | include cligen/mergeCfgEnv 51 | dispatch lncs, help={ "paths" : "filesystem roots", 52 | "file" : "optional input (\"-\"|!tty=stdin)", 53 | "dlm" : "input file delimiter (\\0->NUL)", 54 | "recurse": "recurse n-levels on dirs; 0:unlimited", 55 | "chase" : "follow symlinks to dirs in recursion", 56 | "xdev" : "block recursion across device boundaries", 57 | "eof0" : "read dirents until 0 eof", 58 | "kinds" : "i-node type like find(1): [fdlbcps]", 59 | "minSize": "minimum file size", 60 | "thresh" : "smallest hard link cluster to count", 61 | "quiet" : "suppress file access errors", 62 | "log" : ">stderr{osErr, summary}", 63 | "nEcho" : "num to print; 0: none; -1: unlimited", 64 | "noDot" : "remove a leading . from names", 65 | "outDlm" : "output internal delimiter", 66 | "endOut" : "output record terminator" }, 67 | short = {"xdev": 'X', "eof0": '0', "noDot": '.'} 68 | -------------------------------------------------------------------------------- /tw.nim: -------------------------------------------------------------------------------- 1 | when not declared(stdin): import std/syncio 2 | from std/terminal import terminalWidth, isatty 3 | from std/cmdline import paramCount, paramStr 4 | from std/strutils import parseInt 5 | from cligen/osUt import getDelims, putchar # ~1.65x fasterThan std.lines for me 6 | template put(c) = putchar(c) 7 | 8 | proc contin(b: char; uc: var int32): int = # Continuation bytes after `b` 9 | if b.uint <= 127 : result = 0; uc = (b).int32 10 | elif b.uint shr 5 == 0b110 : result = 1; uc = (b.int32 and 0b11111) 11 | elif b.uint shr 4 == 0b1110 : result = 2; uc = (b.int32 and 0b1111) 12 | elif b.uint shr 3 == 0b11110: result = 3; uc = (b.int32 and 0b111) 13 | else: result = 0; uc = 0 14 | 15 | proc isCombining(uc: int32): bool = uc >= 0x0300 and (uc <= 0x036f or 16 | (uc >= 0x1ab0 and uc <= 0x1aff) or (uc >= 0x1dc0 and uc <= 0x1dff) or 17 | (uc >= 0x20d0 and uc <= 0x20ff) or (uc >= 0xfe20 and uc <= 0xfe2f)) 18 | 19 | type ParseState = enum start, rune, esc, csi, osc # Loop-to-loop Parse State 20 | const ST = '\\' 21 | proc putClipped(line: cstring; n, w: int) = 22 | var ps: ParseState; var r,con,ix: int # ParseState,Rendered width,rune(Con&Ix) 23 | var did=false; var uc=0i32 # Flag & unicode character 24 | var bs: array[4, char] 25 | for i in 0 ..< n: # Input byte Index 26 | if did: did = false # String Terminator Esc-\ needs a peek 27 | else: 28 | let b = line[i] # NOTE: State machines fitting on one screen read easier. 29 | case ps # Idea is to just stop cursor advances after w char cells. 30 | of start: # Special ASCII, then utf8, then emit 31 | if b == '\e' : ps = esc;put b # Enter Esc-Seq mode 32 | elif b == '\b' : dec r; put b # backspace rewinds 1 33 | elif b == '\r' : r = 0; put b # carriage-return rewinds all 34 | elif b == '\t' : r = ((r + 8) div 8)*8; (if r < w: put b) # ${3:-8}? 35 | elif ord(b)<32 : put b # For me \v only lineFeeds, not moving r 36 | elif ord(b)>127: ps = rune; con = contin(b, uc); ix = 0; bs[0] = b 37 | elif r < w: inc r; put b 38 | else: inc r # Advancing blocks combiners post r==w 39 | of rune: # Does not handle Double-Wide Unicode, 40 | inc ix #..or grapheme extensions or similar. 41 | if ix <= con: # Accumulate rune / unicode character 42 | bs[ix] = b; uc = (uc shl 6) or (b.int32 and 0b111111) 43 | if ix == con: 44 | ps = start # Unicode char assembled: maybe emit 45 | if r < w or (r == w and uc.isCombining): 46 | if con==1: put bs[0]; put bs[1] 47 | elif con==2: put bs[0]; put bs[1]; put bs[2] 48 | elif con==3: put bs[0]; put bs[1]; put bs[2]; put bs[3] 49 | if not uc.isCombining: inc r 50 | of esc: # Assume no other escSeq & 0 advance. 51 | if b == '[': ps = csi; put b #..This is inexact since several vtXXX 52 | elif b == ']': ps = osc; put b #..codes can reset/move cursors, BUT we 53 | else: ps = start; put b #..cannot be a full TEmulator *though* 54 | of csi: #..some TEms DO have "no wrap" modes. 55 | if ord(b) in 0x40..0x7E: ps = start 56 | put b 57 | of osc: 58 | if b == '\a': ps = start 59 | elif b == '\e' and i= 1: parseInt(paramStr(1)) else: 1 67 | let w = m*(if paramCount() >= 2: parseInt(paramStr(2)) else: terminalWidth()) 68 | for (line, n) in stdin.getDelims: 69 | putClipped line, n - 1, w; put '\n' 70 | main() 71 | -------------------------------------------------------------------------------- /bu/rs.nim: -------------------------------------------------------------------------------- 1 | import std/random # For weighted versions of this, see work of Yves Tillé with 2 | type #..keywords "unequal probability sampling without replacement" 3 | Reservoir*[T] = object ## A Reservoir Random Subset Generator 4 | seen, size: int 5 | res*: seq[T] ## Accumulated fair subset/sample 6 | Dup[T] = proc(x: T): T 7 | Del[T] = proc(x: T) 8 | 9 | proc init*[T](r: var Reservoir[T], size=0) = r.size = size ## Initialize 10 | proc initReservoir*[T](size=0): Reservoir[T] = result.init size ## Factory 11 | 12 | proc add*[T](r: var Reservoir[T], item: T, dup: Dup[T]=nil, del: Del[T]=nil) = 13 | ## Add an item to reservoir `r` 14 | inc r.seen 15 | template nix(j: int) = (if not del.isNil: del r.res[j]) 16 | template set(j: int, it: T) = 17 | if not dup.isNil: r.res[j] = dup it 18 | else: r.res[j] = it 19 | if r.size > 0: # Subset mode (No Replacement) 20 | if r.res.len < r.size: # Just populating reservoir 21 | r.res.setLen r.res.len + 1 # `setLenUninit` needs {.nodestroy.}=>.. 22 | set r.res.len - 1, item #..need `=dup`=>No Faster for T=string. 23 | else: # Random replacement in reservoir 24 | if (let j = rand(0.. prefix.`ns`. 39 | ## If `n<0` sample w/replacement else do subsets. O(`Σns`) space. Examples: 40 | ## ``seq 1 100 | rs 10 .-5`` or (after maybe ``mkfifo f1 f2``) 41 | ## ``workOn 0: ext = ext[1..^1] 47 | var n = parseInt(ext.toMSlice, e) 48 | if (e==ext.len and ext.len!=0) and dir.len==0 and n!=0: # integral `ext` 49 | rs.add initReservoir[MSlice](n) 50 | let p = dir/name; os.add if name.len>0: open(p, fmWrite) else: stdout 51 | else: 52 | n = parseInt(name.toMSlice, e) 53 | if e!=name.len or name.len==0 or dir.len!=0: Help!!"Non-integral! $HELP" 54 | rs.add initReservoir[MSlice](n); os.add stdout 55 | for line in mSlices(input, mf=mf): 56 | proc dup(x: MSlice): MSlice = # Program does not know until here.. 57 | if mf.mem.isNil: #..if read-only memory map succeeded. 58 | result = MSlice(mem: alloc x.len, len: x.len) 59 | copyMem result.mem, x.mem, x.len # Need a copy only if it failed. 60 | else: result = x 61 | proc del(x: MSlice) = (if mf.mem.isNil: dealloc x.mem else: discard) 62 | for r in mitems rs: r.add line, dup, del # PROCESS INPUT 63 | var n = rs.len 64 | while n > 0: # Looping gives round-robin work to ||readers of os 65 | for j, r in mpairs rs: 66 | if r.res.len > 0: 67 | os[j].urite r.res[^1]; os[j].urite '\n' 68 | r.res.setLen r.res.len - 1 69 | if flush: flushFile os[j] 70 | if r.res.len == 0: dec n 71 | include cligen/mergeCfgEnv 72 | dispatch rs, help={"prefixNs": "[pfx.][-]`n`.. output paths; pfx\"\"=>stdout", 73 | "input": "\"\" => stdin", "flush": "write to outs immediately", 74 | "randomize": "randomize() for non-deterministic filtering"} 75 | --------------------------------------------------------------------------------