├── .gitignore
├── internal
    ├── test
    │   ├── testdata
    │   │   └── three-small-pigs
    │   └── test.go
    ├── dbg
    │   └── logger.go
    ├── pmap.go
    ├── pmap_test.go
    ├── unit_test.go
    ├── run_files.go
    └── unit.go
├── .gitmodules
├── head
    ├── head.awk
    ├── head_negative.awk
    ├── head_test.go
    └── head.go
├── cat
    ├── show_ends.awk
    ├── show_tabs.awk
    ├── show_number.awk
    ├── show_number_nonblank.awk
    ├── squeeze_blanks.awk
    ├── np_test.go
    ├── cat_test.go
    └── cat.go
├── .github
    └── workflows
    │   ├── golang-lint.yml
    │   └── go.yml
├── go.mod
├── example
    ├── real_test.go
    └── example_test.go
├── LICENSE
├── awk
    ├── awk_test.go
    └── awk.go
├── wc
    ├── wc_test.go
    └── wc.go
├── TODO.md
├── README.md
├── x
    └── tr
    │   ├── tr_test.go
    │   └── tr.go
├── go.sum
└── cksum
    ├── cksum_test.go
    └── cksum.go


/.gitignore:
--------------------------------------------------------------------------------
1 | gonix
2 | # vim swap files
3 | .*.swp
4 | 


--------------------------------------------------------------------------------
/internal/test/testdata/three-small-pigs:
--------------------------------------------------------------------------------
1 | three
2 | small
3 | pigs
4 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "sbase/testdata/sbase"]
2 | 	path = sbase/testdata/sbase
3 | 	url = git://git.suckless.org/sbase
4 | 


--------------------------------------------------------------------------------
/head/head.awk:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Michal Vyskocil. All rights reserved.
2 | # Use of this source code is governed by a MIT
3 | # license that can be found in the LICENSE file.
4 | NR <= lines
5 | 


--------------------------------------------------------------------------------
/cat/show_ends.awk:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Michal Vyskocil. All rights reserved.
2 | # Use of this source code is governed by a MIT
3 | # license that can be found in the LICENSE file.
4 | 
5 | {sub(/$/, "$")}1
6 | 


--------------------------------------------------------------------------------
/cat/show_tabs.awk:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Michal Vyskocil. All rights reserved.
2 | # Use of this source code is governed by a MIT
3 | # license that can be found in the LICENSE file.
4 | 
5 | {sub(/\t/, "^I")}1
6 | 


--------------------------------------------------------------------------------
/cat/show_number.awk:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Michal Vyskocil. All rights reserved.
 2 | # Use of this source code is governed by a MIT
 3 | # license that can be found in the LICENSE file.
 4 | 
 5 | BEGIN { n = 1; }
 6 | {
 7 |     printf("%6d\t%s\n", n, $_);
 8 |     n++;
 9 | }
10 | 


--------------------------------------------------------------------------------
/cat/show_number_nonblank.awk:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Michal Vyskocil. All rights reserved.
 2 | # Use of this source code is governed by a MIT
 3 | # license that can be found in the LICENSE file.
 4 | 
 5 | BEGIN { n = 1; }
 6 | {
 7 |     if (NF > 0) {
 8 |         printf("%6d\t%s\n", n, $_);
 9 |         n++;
10 |     } else {
11 |         print;
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/cat/squeeze_blanks.awk:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Michal Vyskocil. All rights reserved.
 2 | # Use of this source code is governed by a MIT
 3 | # license that can be found in the LICENSE file.
 4 | 
 5 | BEGIN {
 6 |     squeeze = 0;
 7 | }
 8 | {
 9 |     if (NF == 0) {
10 |         if (squeeze==0) {print};
11 |         squeeze = 1;
12 |     } else {
13 |         squeeze = 0;
14 |     }
15 |     if (squeeze == 0) {
16 |         print($_);
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/internal/dbg/logger.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Michal Vyskocil. All rights reserved.
 2 | // Use of this source code is governed by a MIT
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package dbg
 6 | 
 7 | import (
 8 | 	"io"
 9 | 	"log"
10 | )
11 | 
12 | func Logger(enabled bool, command string, stderr io.Writer) *log.Logger {
13 | 	if !enabled {
14 | 		stderr = io.Discard
15 | 	}
16 | 	return log.New(stderr, "[DBG]"+command+": ", log.Flags())
17 | }
18 | 


--------------------------------------------------------------------------------
/.github/workflows/golang-lint.yml:
--------------------------------------------------------------------------------
 1 | name: golangci-lint
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - v*
 6 |     branches:
 7 |       - master
 8 |       - main
 9 |   pull_request:
10 | permissions:
11 |   contents: read
12 | jobs:
13 |   golangci:
14 |     name: lint
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - uses: actions/setup-go@v3
18 |         with:
19 |           go-version: '1.20'
20 |       - uses: actions/checkout@v3
21 |       - name: golangci-lint
22 |         uses: golangci/golangci-lint-action@v3
23 |         with:
24 |           version: latest
25 | 


--------------------------------------------------------------------------------
/head/head_negative.awk:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Michal Vyskocil. All rights reserved.
 2 | # Use of this source code is governed by a MIT
 3 | # license that can be found in the LICENSE file.
 4 | BEGIN {
 5 | 	delete ring_buf[0]
 6 | 	buf_idx = 0
 7 | 	buf_head = 0
 8 |     buf_full = 0
 9 | }
10 | {
11 | 	if (! buf_full) {
12 | 		ring_buf[buf_idx]=$0
13 | 		buf_idx ++
14 |         if (buf_idx == lines) {
15 |             buf_full = 1
16 |         }
17 |         next
18 | 	}
19 | 
20 | 	print(ring_buf[buf_head])
21 | 	ring_buf[buf_head]=$0
22 | 	buf_head ++
23 | 	if (buf_head == lines)
24 | 		buf_head = 0
25 | }
26 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/gomoni/gonix
 2 | 
 3 | go 1.20
 4 | 
 5 | require (
 6 | 	github.com/benhoyt/goawk v1.21.0
 7 | 	github.com/gomoni/gio v0.0.0-20230206214735-ff72054e35d2
 8 | 	github.com/spf13/pflag v1.0.5
 9 | 	github.com/stretchr/testify v1.8.1
10 | 	go.uber.org/goleak v1.1.12
11 | 	golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550
12 | 	golang.org/x/sync v0.0.0-20220819030929-7fc1605a5dde
13 | )
14 | 
15 | require (
16 | 	github.com/davecgh/go-spew v1.1.1 // indirect
17 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
18 | 	golang.org/x/sys v0.0.0-20210510120138-977fb7262007 // indirect
19 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
20 | )
21 | 


--------------------------------------------------------------------------------
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
 1 | name: Go
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | jobs:
10 | 
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v3
15 | 
16 |     - name: Set up Go
17 |       uses: actions/setup-go@v3
18 |       with:
19 |         go-version: '1.20'
20 |     
21 |     - name: cache go modules
22 |       uses: actions/cache@v3
23 |       with:
24 |         path: |
25 |           ~/.cache/go-build
26 |           ~/go/pkg/mod
27 |         key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
28 |         restore-keys: |
29 |           ${{ runner.os }}-go-
30 | 
31 |     - name: Build
32 |       run: go build -v ./...
33 | 
34 |     - name: Test
35 |       run: go test ./...
36 | 


--------------------------------------------------------------------------------
/cat/np_test.go:
--------------------------------------------------------------------------------
 1 | package cat
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/require"
 8 | )
 9 | 
10 | func TestNonPrinting(t *testing.T) {
11 | 	var out bytes.Buffer
12 | 
13 | 	inp := []byte{0, 8, 9, 10, 31, 32}
14 | 	nonPrinting(inp, &out)
15 | 	require.Equal(t, "^@^H\t\n^_ ", out.String())
16 | 	inp = []byte{32, 42, 126, 127}
17 | 	nonPrinting(inp, &out)
18 | 	require.Equal(t, " *~^?", out.String())
19 | 	inp = []byte{128, 142, 159}
20 | 	nonPrinting(inp, &out)
21 | 	require.Equal(t, "M-BM-^@M-BM-^NM-BM-^_", out.String())
22 | 	inp = []byte{160, 180, 191}
23 | 	nonPrinting(inp, &out)
24 | 	require.Equal(t, "M-BM- M-BM-4M-BM-?", out.String())
25 | 	inp = []byte{192, 202, 223}
26 | 	nonPrinting(inp, &out)
27 | 	require.Equal(t, "M-CM-^@M-CM-^JM-CM-^_", out.String())
28 | 	inp = []byte{224, 242, 255}
29 | 	nonPrinting(inp, &out)
30 | 	require.Equal(t, "M-CM- M-CM-2M-CM-?", out.String())
31 | }
32 | 


--------------------------------------------------------------------------------
/example/real_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Michal Vyskocil. All rights reserved.
 2 | // Use of this source code is governed by a MIT
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package example_test
 6 | 
 7 | import (
 8 | 	"bytes"
 9 | 	"context"
10 | 	"io"
11 | 	"log"
12 | 	"os"
13 | 	"testing"
14 | 
15 | 	"github.com/gomoni/gio/unix"
16 | 	"github.com/gomoni/gonix/cat"
17 | 	"github.com/gomoni/gonix/wc"
18 | 	"github.com/stretchr/testify/require"
19 | 
20 | 	"go.uber.org/goleak"
21 | )
22 | 
23 | func TestGoleak(t *testing.T) {
24 | 	defer goleak.VerifyNone(t)
25 | 	var b bytes.Buffer
26 | 	stdio := unix.NewStdio(
27 | 		io.NopCloser(bytes.NewBufferString("three\nsmall\npigs\n")),
28 | 		&b,
29 | 		os.Stderr,
30 | 	)
31 | 	ctx := context.Background()
32 | 	err := unix.NewLine().Run(ctx, stdio, cat.New(), wc.New().Lines(true))
33 | 	if err != nil {
34 | 		log.Fatal(err)
35 | 	}
36 | 	require.Equal(t, "3\n", b.String())
37 | }
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) Michal Vyskocil <michal.vyskocil@gmail.com>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/internal/pmap.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Michal Vyskocil. All rights reserved.
 2 | // Use of this source code is governed by a MIT
 3 | // license that can be found in the LICENSE file.
 4 | package internal
 5 | 
 6 | import (
 7 | 	"context"
 8 | 	"errors"
 9 | 
10 | 	"golang.org/x/sync/semaphore"
11 | )
12 | 
13 | // an experiments with a paralelization of work
14 | // the must is that task must maintain the order they were submitted
15 | 
16 | type MapFunc[T any, U any] func(context.Context, T) (U, error)
17 | 
18 | func PMap[T any, U any](ctx context.Context, limit uint, slice []T, mapFunc MapFunc[T, U]) ([]U, error) {
19 | 	retu := make([]U, len(slice))
20 | 	errs := make([]error, len(slice))
21 | 
22 | 	sem := semaphore.NewWeighted(int64(limit))
23 | 
24 | 	for idx, input := range slice {
25 | 
26 | 		if err := sem.Acquire(ctx, 1); err != nil {
27 | 			return nil, err
28 | 		}
29 | 
30 | 		go func(ctx context.Context, idx int, input T, results []U, errors []error) {
31 | 			defer sem.Release(1)
32 | 
33 | 			result, err := mapFunc(ctx, input)
34 | 			if err != nil {
35 | 				errs[idx] = err
36 | 			}
37 | 			results[idx] = result
38 | 		}(ctx, idx, input, retu, errs)
39 | 	}
40 | 
41 | 	if err := sem.Acquire(ctx, int64(limit)); err != nil {
42 | 		return nil, err
43 | 	}
44 | 
45 | 	return retu, errors.Join(errs...)
46 | }
47 | 


--------------------------------------------------------------------------------
/internal/pmap_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Michal Vyskocil. All rights reserved.
 2 | // Use of this source code is governed by a MIT
 3 | // license that can be found in the LICENSE file.
 4 | package internal_test
 5 | 
 6 | import (
 7 | 	"context"
 8 | 	"testing"
 9 | 	"time"
10 | 
11 | 	. "github.com/gomoni/gonix/internal"
12 | 	"github.com/stretchr/testify/require"
13 | )
14 | 
15 | func TestPMap(t *testing.T) {
16 | 	t.Parallel()
17 | 	f := func(_ context.Context, i int) (int, error) {
18 | 		t.Logf("TestPMAP.f(%d)", i)
19 | 		time.Sleep(time.Duration(i) * time.Millisecond)
20 | 		return i + 42, nil
21 | 	}
22 | 	ctx := context.TODO()
23 | 
24 | 	start := time.Now()
25 | 	ret, err := PMap(ctx, 1, []int{10, 20, 50, 100, 200, 500}, f)
26 | 	stop := time.Now()
27 | 	require.NoError(t, err)
28 | 	require.Equal(t, []int{52, 62, 92, 142, 242, 542}, ret)
29 | 	duration1 := stop.Sub(start)
30 | 
31 | 	start = time.Now()
32 | 	ret, err = PMap(ctx, 3, []int{10, 20, 50, 100, 200, 500}, f)
33 | 	stop = time.Now()
34 | 	require.NoError(t, err)
35 | 	require.Equal(t, []int{52, 62, 92, 142, 242, 542}, ret)
36 | 	duration2 := stop.Sub(start)
37 | 
38 | 	// this may be reliable enough in all scenarios (CI vs local machine)
39 | 	// the seq case shall be 10+20+50+100+200+500=880ms long
40 | 	// the parallel one shall take slightly more than 500ms
41 | 	require.GreaterOrEqual(t, duration1, duration2)
42 | }
43 | 


--------------------------------------------------------------------------------
/awk/awk_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Michal Vyskocil. All rights reserved.
 2 | // Use of this source code is governed by a MIT
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package awk_test
 6 | 
 7 | import (
 8 | 	"testing"
 9 | 
10 | 	. "github.com/gomoni/gonix/awk"
11 | 	"github.com/gomoni/gonix/internal/test"
12 | 
13 | 	"github.com/benhoyt/goawk/interp"
14 | 	"github.com/stretchr/testify/require"
15 | )
16 | 
17 | func TestAWK(t *testing.T) {
18 | 	test.Parallel(t)
19 | 
20 | 	testCases := []test.Case[AWK]{
21 | 		{
22 | 			Name:     "cat",
23 | 			Filter:   compile(t, newCfg(), `{print $1;}`),
24 | 			Input:    "01\tthree\n02\tsmall\n03\tpigs\n",
25 | 			Expected: "01\n02\n03\n",
26 | 		},
27 | 		{
28 | 			Name:     "cat FS ;",
29 | 			Filter:   compile(t, newCfg().FS(";"), `{print $2;}`),
30 | 			Input:    "01;three\n02;small\n03;pigs\n",
31 | 			Expected: "three\nsmall\npigs\n",
32 | 		},
33 | 	}
34 | 	test.RunAll(t, testCases)
35 | }
36 | 
37 | func compile(t *testing.T, c *cfg, src string) AWK {
38 | 	t.Helper()
39 | 	awk, err := Compile([]byte(src), c.config)
40 | 	require.NoError(t, err)
41 | 	return awk
42 | }
43 | 
44 | type cfg struct {
45 | 	config *interp.Config
46 | }
47 | 
48 | func newCfg() *cfg {
49 | 	return &cfg{config: &interp.Config{}}
50 | }
51 | func (c *cfg) FS(value string) *cfg {
52 | 	c.config.Vars = append(c.config.Vars, []string{"FS", value}...)
53 | 	return c
54 | }
55 | 


--------------------------------------------------------------------------------
/head/head_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Michal Vyskocil. All rights reserved.
 2 | // Use of this source code is governed by a MIT
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package head_test
 6 | 
 7 | import (
 8 | 	"testing"
 9 | 
10 | 	. "github.com/gomoni/gonix/head"
11 | 	"github.com/gomoni/gonix/internal/test"
12 | 	"github.com/stretchr/testify/require"
13 | )
14 | 
15 | func TestHead(t *testing.T) {
16 | 	test.Parallel(t)
17 | 	testCases := []test.Case[Head]{
18 | 		{
19 | 			Name:     "default",
20 | 			Filter:   fromArgs(t, []string{}),
21 | 			Input:    "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n",
22 | 			Expected: "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n",
23 | 		},
24 | 		{
25 | 			Name:     "--lines 2",
26 | 			Filter:   New().Lines(2),
27 | 			FromArgs: fromArgs(t, []string{"-n", "2"}),
28 | 			Input:    "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n",
29 | 			Expected: "1\n2\n",
30 | 		},
31 | 		{
32 | 			Name:     "--lines -10",
33 | 			Filter:   New().Lines(-10),
34 | 			FromArgs: fromArgs(t, []string{"-n", "-10"}),
35 | 			Input:    "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n",
36 | 			Expected: "1\n2\n",
37 | 		},
38 | 		{
39 | 			Name:     "--lines 2 --zero-terminated",
40 | 			Filter:   New().Lines(2).ZeroTerminated(true),
41 | 			FromArgs: fromArgs(t, []string{"-n", "2", "--zero-terminated"}),
42 | 			Input:    "1\x002\x003\x004\x00",
43 | 			Expected: "1\n2\n",
44 | 		},
45 | 	}
46 | 	test.RunAll(t, testCases)
47 | }
48 | 
49 | func fromArgs(t *testing.T, argv []string) Head {
50 | 	t.Helper()
51 | 	n := New()
52 | 	f, err := n.FromArgs(argv)
53 | 	require.NoError(t, err)
54 | 	return f
55 | }
56 | 


--------------------------------------------------------------------------------
/wc/wc_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Michal Vyskocil. All rights reserved.
 2 | // Use of this source code is governed by a MIT
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package wc_test
 6 | 
 7 | import (
 8 | 	"fmt"
 9 | 	"testing"
10 | 
11 | 	"github.com/gomoni/gonix/internal/test"
12 | 	. "github.com/gomoni/gonix/wc"
13 | 
14 | 	"github.com/stretchr/testify/require"
15 | )
16 | 
17 | func TestWc(t *testing.T) {
18 | 	test.Parallel(t)
19 | 	threeSmallPigs := test.Testdata(t, "three-small-pigs")
20 | 	testCases := []test.Case[Wc]{
21 | 		{
22 | 			Name:     "default",
23 | 			Filter:   fromArgs(t, []string{}),
24 | 			Input:    "The three\nsmall\npigs\n",
25 | 			Expected: " 3 4 21\n",
26 | 		},
27 | 		{
28 | 			Name:     "wc -l",
29 | 			Filter:   New().Lines(true),
30 | 			FromArgs: fromArgs(t, []string{"-l"}),
31 | 			Input:    "three\nsmall\npigs\n",
32 | 			Expected: "3\n",
33 | 		},
34 | 		{
35 | 			Name:     "wc --lines",
36 | 			Filter:   New().Lines(true),
37 | 			FromArgs: fromArgs(t, []string{"--lines"}),
38 | 			Input:    "three\nsmall\npigs\n",
39 | 			Expected: "3\n",
40 | 		},
41 | 		{
42 | 			Name:     "wc -cmlLw",
43 | 			Filter:   New().Bytes(true).Chars(true).Lines(true).MaxLineLength(true).Words(true),
44 | 			FromArgs: fromArgs(t, []string{"-cmlLw"}),
45 | 			Input:    "The three žluťoučká\nsmall\npigs\n",
46 | 			Expected: " 3 5 31 35 19\n",
47 | 		},
48 | 		{
49 | 			Name:     "wc -l - three-small-pigs",
50 | 			Filter:   New().Lines(true).Files("-", threeSmallPigs),
51 | 			FromArgs: fromArgs(t, []string{"-l", "-", threeSmallPigs}),
52 | 			Input:    "1\n2\n3\n4\n",
53 | 			Expected: fmt.Sprintf(" 4 -\n 3 %s\n 7 total\n", threeSmallPigs),
54 | 		},
55 | 	}
56 | 
57 | 	test.RunAll(t, testCases)
58 | }
59 | 
60 | func fromArgs(t *testing.T, argv []string) Wc {
61 | 	t.Helper()
62 | 	n := New()
63 | 	f, err := n.FromArgs(argv)
64 | 	require.NoError(t, err)
65 | 	return f
66 | }
67 | 


--------------------------------------------------------------------------------
/awk/awk.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 Michal Vyskocil. All rights reserved.
 2 | // Use of this source code is governed by a MIT
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | /*
 6 | awk is a thin wrapper on top of github.com/benhoyt/goawk/interp and github.com/benhoyt/goawk/parser
 7 | providing a compatible [unix.Filter] interface for goawk.
 8 | */
 9 | 
10 | package awk
11 | 
12 | import (
13 | 	"context"
14 | 	"fmt"
15 | 	"io"
16 | 
17 | 	"github.com/benhoyt/goawk/interp"
18 | 	"github.com/benhoyt/goawk/parser"
19 | 	"github.com/gomoni/gio/unix"
20 | )
21 | 
22 | func NewConfig() *interp.Config {
23 | 	return &interp.Config{
24 | 		NoArgVars:    true,
25 | 		NoExec:       true,
26 | 		NoFileWrites: true,
27 | 		NoFileReads:  true,
28 | 		ShellCommand: []string{"/bin/true"},
29 | 	}
30 | }
31 | 
32 | // AWK is a thin wrapper on top of github.com/benhoyt/goawk
33 | type AWK struct {
34 | 	program *parser.Program
35 | 	config  *interp.Config
36 | }
37 | 
38 | func New(prog *parser.Program, config *interp.Config) AWK {
39 | 	return AWK{
40 | 		program: prog,
41 | 		config:  config,
42 | 	}
43 | }
44 | 
45 | func Compile(src []byte, config *interp.Config) (AWK, error) {
46 | 	if config == nil {
47 | 		return AWK{}, fmt.Errorf("nil config")
48 | 	}
49 | 	pconfig := parser.ParserConfig{
50 | 		DebugTypes:  false,
51 | 		DebugWriter: io.Discard,
52 | 		Funcs:       config.Funcs,
53 | 	}
54 | 	prog, err := parser.ParseProgram(src, &pconfig)
55 | 	if err != nil {
56 | 		return AWK{}, err
57 | 	}
58 | 	return AWK{
59 | 		program: prog,
60 | 		config:  config,
61 | 	}, nil
62 | }
63 | 
64 | func (c AWK) Run(ctx context.Context, stdio unix.StandardIO) error {
65 | 	if c.config == nil {
66 | 		return fmt.Errorf("nil config")
67 | 	}
68 | 	if c.program == nil {
69 | 		return fmt.Errorf("nil prog")
70 | 	}
71 | 	config := *c.config
72 | 	config.Stdin = stdio.Stdin()
73 | 	config.Output = stdio.Stdout()
74 | 	config.Error = stdio.Stderr()
75 | 	_, err := interp.ExecProgram(c.program, &config)
76 | 	return err
77 | }
78 | 


--------------------------------------------------------------------------------
/example/example_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 Michal Vyskocil. All rights reserved.
 2 | // Use of this source code is governed by a MIT
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package example_test
 6 | 
 7 | import (
 8 | 	"bytes"
 9 | 	"context"
10 | 	"log"
11 | 	"os"
12 | 
13 | 	"github.com/gomoni/gio/unix"
14 | 	"github.com/gomoni/gonix/cat"
15 | 	"github.com/gomoni/gonix/head"
16 | 	"github.com/gomoni/gonix/wc"
17 | )
18 | 
19 | // This example shows the unix.NewLine().Run with cat and wc
20 | func Example() {
21 | 	stdio := unix.NewStdio(
22 | 		bytes.NewBufferString("three\nsmall\npigs\n"),
23 | 		os.Stdout,
24 | 		os.Stderr,
25 | 	)
26 | 	ctx := context.Background()
27 | 	// printf "three\nsmall\npigs\n" | cat | wc -l
28 | 	err := unix.NewLine().Run(ctx, stdio, cat.New(), wc.New().Lines(true))
29 | 	if err != nil {
30 | 		log.Fatal(err)
31 | 	}
32 | 	// Output:
33 | 	// 3
34 | }
35 | 
36 | // This example shows the unix.NewLine().Run with cat and wc with arguments passed as []string
37 | func Example_from_args() {
38 | 	stdio := unix.NewStdio(
39 | 		bytes.NewBufferString("three\nsmall\npigs\n"),
40 | 		os.Stdout,
41 | 		os.Stderr,
42 | 	)
43 | 	ctx := context.Background()
44 | 	cat, err := cat.New().FromArgs(nil)
45 | 	if err != nil {
46 | 		log.Fatal(err)
47 | 	}
48 | 	wc, err := wc.New().FromArgs([]string{"-l"})
49 | 	if err != nil {
50 | 		log.Fatal(err)
51 | 	}
52 | 	err = unix.NewLine().Run(ctx, stdio, cat, wc)
53 | 	if err != nil {
54 | 		log.Fatal(err)
55 | 	}
56 | 	// Output:
57 | 	// 3
58 | }
59 | 
60 | /* FIXME: NewExec shall be ported back to gio
61 | func ExampleRun_exec() {
62 | 	stdio := unix.NewStdio(
63 | 		os.Stdin,
64 | 		os.Stdout,
65 | 		os.Stderr,
66 | 	)
67 | 	ctx := context.Background()
68 | 	cmd := exec.Command("go", "version")
69 | 	goVersion := pipe.NewExec(cmd)
70 | 	wc, err := wc.New().FromArgs([]string{"-l"})
71 | 	if err != nil {
72 | 		log.Fatal(err)
73 | 	}
74 | 	// go version | wc -l
75 | 	err = unix.NewLine().Run(ctx, stdio, goVersion, wc)
76 | 	if err != nil {
77 | 		log.Fatal(err)
78 | 	}
79 | 	// Output:
80 | 	// 1
81 | }
82 | */
83 | 
84 | func ExampleHead_Run() {
85 | 	head := head.New().Lines(2)
86 | 	err := head.Run(context.TODO(), unix.NewStdio(
87 | 		bytes.NewBufferString("three\nsmall\npigs\n"),
88 | 		os.Stdout,
89 | 		os.Stderr,
90 | 	))
91 | 	if err != nil {
92 | 		log.Fatal(err)
93 | 	}
94 | 	// Output:
95 | 	// three
96 | 	// small
97 | }
98 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
  1 | # TODO
  2 | 
  3 | Implement this?
  4 | >       >128    A command was interrupted by a signal.
  5 | 
  6 |  * what about tasks running other commands?
  7 |     `cat /etc/passwd | xargs -L1 timeout 2s printf "%s\n"`
  8 | 
  9 |  * implement and a shell scripting builtins like until?
 10 | 
 11 |  * Add (a basic) tr - x/tr
 12 |  * Add (a basic) tail
 13 |  * Add sort --version-sort
 14 |  * Add (a basic) grep
 15 |  * Add wrapper for goawk
 16 |  * https://github.com/itchyny/gojq
 17 |  * wc can run in a parallel
 18 | 
 19 | ## sbase tools
 20 | 
 21 | sorted by a length of manual page
 22 | 
 23 |  *  yes
 24 |  * true/false
 25 |  * sponge
 26 |  * tee
 27 |  * seq
 28 |  * comm
 29 |  * fold
 30 |  * cmp
 31 |  * paste
 32 |  * unexpand
 33 |  * uniq
 34 |  * strings
 35 |  * env      - not implement as is, but check the options of pipe.Environ with this tool
 36 |  * tail
 37 |  * split
 38 |  * expand
 39 |  * uudecode
 40 |  * cols
 41 |  * tr
 42 |  * tsort
 43 |  * cut
 44 |  * od
 45 |  * sort
 46 |  * join
 47 |  * nl
 48 |  * grep (idea: add a gg - like ripgrep/rg first?)
 49 |  * sed
 50 |  * awk - based on goawk
 51 |  * jq - based on gojq
 52 | 
 53 | ## GNU tools
 54 | 
 55 |  * fmt
 56 |  * shuf
 57 |  * numfmt
 58 |  * base32
 59 |  * base64
 60 |  * csplit
 61 |  * tac
 62 |  * timeout - do it via context(?)
 63 |  * basenc
 64 | 
 65 | 
 66 | # #bringmeback
 67 | 
 68 | _Following features got lost during a port on top of github.com/gomoni/gio.
 69 | Bring them back at least in a different projects_
 70 | 
 71 | Most unix colons exists in shell compatible format. `gonix` provides helpers which can split the shell
 72 | syntax into equivalent Go code. Code can
 73 | 
 74 | * ✔ control which names will be mapped into native Go code
 75 | * ✔ supports extra split function ([github.com/desertbit/go-shlex](https://github.com/desertbit/go-shlex) is probably the best)
 76 | * ✔ control what to do if command name is not found
 77 | * ✔ support  `PATH` lookups and binaries execution like shell does, but disabled by default
 78 | * ✔ control environment variables
 79 | 
 80 | ```go
 81 | 	builtins := map[string]func([]string) (pipe.Filter, error){
 82 | 		"wc": func(a []string) (pipe.Filter, error) { return wc.New().FromArgs(a) },
 83 | 	}
 84 | 	// use real shlex code like github.com/desertbit/go-shlex
 85 | 	// splitfn := func(s string) ([]string, error) { return shlex.Split(s, true) }
 86 | 	splitfn := func(s string) ([]string, error) { return []string{"go", "version", "|", "wc", "-l"}, nil }
 87 | 	stdio := pipe.Stdio{
 88 | 		Stdin:  os.Stdin,
 89 | 		Stdout: os.Stdout,
 90 | 		Stderr: os.Stderr,
 91 | 	}
 92 | 	ctx := context.Background()
 93 | 
 94 | 	env := pipe.DuplicateEnviron()
 95 | 	sh := pipe.NewSh(builtins, splitfn).NotFoundFunc(env.NotFoundFunc)
 96 | 	err := sh.Run(ctx, stdio, `go version | wc -l`)
 97 | 	if err != nil {
 98 | 		log.Fatal(err)
 99 | 	}
100 | 	// Output:
101 | 	// 1
102 | ```
103 | 
104 | ## Busybox-like command line tool
105 | 
106 | Can be built and executed like busybox or a toybox.
107 | 
108 | ```sh
109 | ./gonix cat /etc/passwd /etc/resolv.conf | ./gonix cksum --algorithm md5 --untagged md5sum
110 | ```
111 | 
112 | 


--------------------------------------------------------------------------------
/internal/test/test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 Michal Vyskocil. All rights reserved.
  2 | // Use of this source code is governed by a MIT
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package test
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"context"
 10 | 	"io"
 11 | 	"os"
 12 | 	"path/filepath"
 13 | 	"reflect"
 14 | 	"runtime"
 15 | 	"strings"
 16 | 	"sync"
 17 | 	"testing"
 18 | 
 19 | 	"github.com/gomoni/gio/unix"
 20 | 	"github.com/stretchr/testify/require"
 21 | )
 22 | 
 23 | // Parallel enables parallel tests only if testing is not verbose
 24 | // this prevents the debug logs from being mixed together
 25 | func Parallel(t *testing.T) {
 26 | 	if !testing.Verbose() {
 27 | 		t.Parallel()
 28 | 	}
 29 | }
 30 | 
 31 | // Case is a single test case testing gonix filter
 32 | // It contains a pointer (PF) to type implementing the pipe.Filter interface
 33 | type Case[F unix.Filter] struct {
 34 | 	Name     string // Name is test case name
 35 | 	Input    string // Input is test case input
 36 | 	Expected string // Expected is what filter is expected to produce
 37 | 	Filter   F      // Filter is a pointer to type implementing the pipe.Filter
 38 | 	FromArgs F      // Optional Filter constructed via FromArgs helper, expected to be equal to Filter
 39 | }
 40 | 
 41 | func RunAll[F unix.Filter](t *testing.T, testCases []Case[F]) {
 42 | 	t.Helper()
 43 | 
 44 | 	for _, tt := range testCases {
 45 | 		tt := tt
 46 | 		t.Run(tt.Name, func(t *testing.T) {
 47 | 			Parallel(t)
 48 | 
 49 | 			var out strings.Builder
 50 | 			stdio := unix.NewStdio(
 51 | 				bytes.NewBufferString(tt.Input),
 52 | 				&out,
 53 | 				os.Stderr,
 54 | 			)
 55 | 			ctx := context.Background()
 56 | 
 57 | 			var zero F
 58 | 			if !reflect.DeepEqual(tt.FromArgs, zero) {
 59 | 				require.Equal(t, tt.FromArgs, tt.Filter)
 60 | 			}
 61 | 
 62 | 			// call SetDebug(true) if present and test if verbose
 63 | 			x := reflect.ValueOf(tt.Filter)
 64 | 			setDebug := x.MethodByName("SetDebug")
 65 | 			if setDebug.Kind() == reflect.Func {
 66 | 				setDebug.Call([]reflect.Value{reflect.ValueOf(testing.Verbose())})
 67 | 			}
 68 | 
 69 | 			err := unix.NewLine().Run(ctx, stdio, tt.Filter)
 70 | 			require.NoError(t, err)
 71 | 			require.Equal(t, tt.Expected, out.String())
 72 | 		})
 73 | 	}
 74 | }
 75 | 
 76 | var (
 77 | 	testDataDir  string
 78 | 	testDataOnce sync.Once
 79 | )
 80 | 
 81 | // Testdata returns and (absolute) path to internal/test/testdata file
 82 | func Testdata(t *testing.T, key string) string {
 83 | 	t.Helper()
 84 | 	testDataOnce.Do(func() {
 85 | 		_, f, _, ok := runtime.Caller(0)
 86 | 		require.Truef(t, ok, "can't call runtime.Caller")
 87 | 		testDataDir = filepath.Join(filepath.Dir(f), "testdata")
 88 | 	})
 89 | 
 90 | 	path := filepath.Join(
 91 | 		testDataDir,
 92 | 		key)
 93 | 	st, err := os.Stat(path)
 94 | 	require.NoError(t, err)
 95 | 	require.True(t, st.Mode().IsRegular())
 96 | 	return path
 97 | }
 98 | 
 99 | type IOError struct {
100 | 	Reads    [][]byte
101 | 	Writes   int
102 | 	Err      error
103 | 	CloseErr error
104 | }
105 | 
106 | func (i *IOError) Read(p []byte) (int, error) {
107 | 	if len(i.Reads) == 0 {
108 | 		if i.Err != nil {
109 | 			return 0, i.Err
110 | 		}
111 | 		return 0, io.EOF
112 | 	}
113 | 	copy(p, i.Reads[0])
114 | 	i.Reads = i.Reads[1:]
115 | 	return len(p), nil
116 | }
117 | func (i *IOError) Write(p []byte) (int, error) {
118 | 	if i.Writes == 0 {
119 | 		return 0, i.Err
120 | 	}
121 | 	i.Writes -= 1
122 | 	return len(p), nil
123 | }
124 | func (i IOError) Close() error {
125 | 	return i.CloseErr
126 | }
127 | 


--------------------------------------------------------------------------------
/internal/unit_test.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/stretchr/testify/require"
  9 | )
 10 | 
 11 | func TestParseByte(t *testing.T) {
 12 | 	t.Parallel()
 13 | 
 14 | 	testCases := []struct {
 15 | 		input    string
 16 | 		expected Byte
 17 | 	}{
 18 | 		{
 19 | 			"0",
 20 | 			Byte(0),
 21 | 		},
 22 | 		{
 23 | 			"00008",
 24 | 			Byte(8),
 25 | 		},
 26 | 		{
 27 | 			"42",
 28 | 			Byte(42),
 29 | 		},
 30 | 		{
 31 | 			"5b",
 32 | 			5 * Block,
 33 | 		},
 34 | 		{
 35 | 			"7kB",
 36 | 			7 * KiloByte,
 37 | 		},
 38 | 		{
 39 | 			"8K",
 40 | 			8 * KibiByte,
 41 | 		},
 42 | 		{
 43 | 			"9KiB",
 44 | 			9 * KibiByte,
 45 | 		},
 46 | 		{
 47 | 			"1MB500kB",
 48 | 			1*MegaByte + 500*KiloByte,
 49 | 		},
 50 | 		{
 51 | 			"1024KiB",
 52 | 			1024 * KibiByte,
 53 | 		},
 54 | 		{
 55 | 			"1024KiB",
 56 | 			1 * MebiByte,
 57 | 		},
 58 | 		{
 59 | 			"1.5G",
 60 | 			1.5 * GibiByte,
 61 | 		},
 62 | 		{
 63 | 			"-1.5G",
 64 | 			-1.5 * GibiByte,
 65 | 		},
 66 | 	}
 67 | 
 68 | 	for _, tt := range testCases {
 69 | 		tt := tt
 70 | 		t.Run(tt.input, func(t *testing.T) {
 71 | 			t.Parallel()
 72 | 			s, err := ParseByte(tt.input)
 73 | 			require.NoError(t, err)
 74 | 			require.Equal(t, tt.expected, s)
 75 | 		})
 76 | 	}
 77 | }
 78 | 
 79 | func TestParseByteErr(t *testing.T) {
 80 | 	t.Parallel()
 81 | 
 82 | 	testCases := []struct {
 83 | 		name     string
 84 | 		input    string
 85 | 		expected string
 86 | 	}{
 87 | 		{
 88 | 			input:    "",
 89 | 			expected: `invalid size "": empty`,
 90 | 		},
 91 | 		{
 92 | 			input:    " ",
 93 | 			expected: `invalid size " ": expected number or decimal separator`,
 94 | 		},
 95 | 		{
 96 | 			input:    "x",
 97 | 			expected: `invalid size "x": expected number or decimal separator`,
 98 | 		},
 99 | 		{
100 | 			input:    "3x",
101 | 			expected: `unknown unit "x" in size "3x"`,
102 | 		},
103 | 		{
104 | 			name:     "maxfloat640",
105 | 			input:    fmt.Sprintf("%f", math.MaxFloat64) + "0",
106 | 			expected: fmt.Sprintf(`invalid size %q: overflow`, fmt.Sprintf("%f", math.MaxFloat64)+"0"),
107 | 		},
108 | 		{
109 | 			name:     "maxfloat64b",
110 | 			input:    fmt.Sprintf("%f", math.MaxFloat64) + "b",
111 | 			expected: fmt.Sprintf(`invalid size %q: overflow`, fmt.Sprintf("%f", math.MaxFloat64)+"b"),
112 | 		},
113 | 		{
114 | 			input:    "\xf22000000",
115 | 			expected: `invalid size "\xf22000000": expected number or decimal separator`,
116 | 		},
117 | 		{
118 | 			input:    "A",
119 | 			expected: `invalid size "A": expected number or decimal separator`,
120 | 		},
121 | 		{
122 | 			input:    "\t\t",
123 | 			expected: `invalid size "\t\t": expected number or decimal separator`,
124 | 		},
125 | 		{
126 | 			input:    ".",
127 | 			expected: `invalid size ".": no digits`,
128 | 		},
129 | 		{
130 | 			input:    "-.s",
131 | 			expected: `invalid size "-.s": no digits`,
132 | 		},
133 | 	}
134 | 
135 | 	for _, tt := range testCases {
136 | 		tt := tt
137 | 		name := func() string {
138 | 			if tt.name != "" {
139 | 				return tt.name
140 | 			}
141 | 			return tt.input
142 | 		}
143 | 		t.Run(name(), func(t *testing.T) {
144 | 			t.Parallel()
145 | 			_, err := ParseByte(tt.input)
146 | 			require.Error(t, err)
147 | 			require.EqualError(t, err, tt.expected)
148 | 		})
149 | 	}
150 | }
151 | 
152 | func TestSPF13Value(t *testing.T) {
153 | 
154 | 	b := 6*KiloByte + 72*Block
155 | 
156 | 	require.Equal(t, "42864B", b.String())
157 | 
158 | 	var b2 Byte
159 | 	err := b2.Set("10KiB")
160 | 	require.NoError(t, err)
161 | 	require.Equal(t, "10240B", b2.String())
162 | 
163 | 	require.Equal(t, "Byte", b2.Type())
164 | }
165 | 


--------------------------------------------------------------------------------
/internal/run_files.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"errors"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"os"
 10 | 	"runtime"
 11 | 
 12 | 	"github.com/gomoni/gio/pipe"
 13 | 	"github.com/gomoni/gio/unix"
 14 | )
 15 | 
 16 | // RunFiles is a helper run gonix commands with inputs from more files
 17 | // failure in file opening does not break the loop, but returns exit code 1
 18 | // "" or "-" are treated as stdin
 19 | type RunFiles struct {
 20 | 	files []string
 21 | 	errs  error
 22 | 	stdio unix.StandardIO
 23 | 	fun   func(context.Context, unix.StandardIO, int, string) error
 24 | }
 25 | 
 26 | func NewRunFiles(files []string, stdio unix.StandardIO, fun func(context.Context, unix.StandardIO, int, string) error) RunFiles {
 27 | 	return RunFiles{
 28 | 		files: files,
 29 | 		stdio: stdio,
 30 | 		errs:  nil,
 31 | 		fun:   fun,
 32 | 	}
 33 | }
 34 | 
 35 | func (l RunFiles) Do(ctx context.Context) error {
 36 | 	errs := make([]error, 0, len(l.files))
 37 | 	if len(l.files) == 0 {
 38 | 		return l.doOne(ctx, 0, "", l.stdio.Stdout(), l.stdio.Stderr(), &errs)
 39 | 	}
 40 | 	for idx, name := range l.files {
 41 | 		err := l.doOne(ctx, idx, name, l.stdio.Stdout(), l.stdio.Stderr(), &errs)
 42 | 		if err != nil {
 43 | 			return err
 44 | 		}
 45 | 	}
 46 | 	return asPipeError(errs)
 47 | }
 48 | 
 49 | type in struct {
 50 | 	idx  int
 51 | 	name string
 52 | }
 53 | 
 54 | type out struct {
 55 | 	stdout *bytes.Buffer
 56 | 	stderr *bytes.Buffer
 57 | }
 58 | 
 59 | // DoThreads runs individual tasks concurrently via PMap. Each command writes to the memory buffer
 60 | // first, so probably best to be used for a compute intensive operations like cksum is. As it uses
 61 | // PMap, outputs are in the same order as inputs.
 62 | func (l RunFiles) DoThreads(ctx context.Context, threads uint) error {
 63 | 	if threads == 0 {
 64 | 		threads = uint(runtime.GOMAXPROCS(0))
 65 | 	}
 66 | 	if threads == 1 || len(l.files) == 0 {
 67 | 		return l.Do(ctx)
 68 | 	}
 69 | 
 70 | 	errs := make([]error, 0, len(l.files))
 71 | 	one := func(ctx context.Context, in in) (out, error) {
 72 | 		out := out{
 73 | 			stdout: bytes.NewBuffer(nil),
 74 | 			stderr: bytes.NewBuffer(nil),
 75 | 		}
 76 | 		err := l.doOne(ctx, in.idx, in.name, out.stdout, out.stderr, &errs)
 77 | 		if err != nil {
 78 | 			errs = append(errs, err)
 79 | 		}
 80 | 		return out, err
 81 | 	}
 82 | 
 83 | 	inputs := make([]in, len(l.files))
 84 | 	for idx, f := range l.files {
 85 | 		inputs[idx] = in{idx: idx, name: f}
 86 | 	}
 87 | 
 88 | 	outputs, err := PMap(ctx, threads, inputs, one)
 89 | 	if err != nil {
 90 | 		return err
 91 | 	}
 92 | 	for _, out := range outputs {
 93 | 		_, err = io.Copy(l.stdio.Stderr(), out.stderr)
 94 | 		if err != nil {
 95 | 			errs = append(errs, err)
 96 | 		}
 97 | 		_, err = io.Copy(l.stdio.Stdout(), out.stdout)
 98 | 		if err != nil {
 99 | 			errs = append(errs, err)
100 | 		}
101 | 	}
102 | 	return asPipeError(errs)
103 | }
104 | 
105 | func (l RunFiles) doOne(ctx context.Context, idx int, name string, stdout, stderr io.Writer, errsp *[]error) error {
106 | 	var in io.Reader
107 | 	if name == "" || name == "-" {
108 | 		in = l.stdio.Stdin()
109 | 	} else {
110 | 		f, err := os.Open(name)
111 | 		if err != nil {
112 | 			fmt.Fprintf(l.stdio.Stderr(), "%s\n", err)
113 | 			*errsp = append(*errsp, err)
114 | 			return nil
115 | 		}
116 | 		defer f.Close()
117 | 		in = f
118 | 	}
119 | 	return l.fun(ctx, unix.NewStdio(
120 | 		in,
121 | 		stdout,
122 | 		stderr),
123 | 		idx,
124 | 		name,
125 | 	)
126 | }
127 | 
128 | func asPipeError(errs []error) error {
129 | 	if len(errs) == 0 {
130 | 		return nil
131 | 	}
132 | 	err := pipe.NewError(1, errors.Join(errs...))
133 | 	return err
134 | }
135 | 


--------------------------------------------------------------------------------
/head/head.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 Michal Vyskocil. All rights reserved.
  2 | // Use of this source code is governed by a MIT
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package head
  6 | 
  7 | import (
  8 | 	"context"
  9 | 	"fmt"
 10 | 	"math"
 11 | 	"strconv"
 12 | 
 13 | 	"github.com/gomoni/gio/pipe"
 14 | 	"github.com/gomoni/gio/unix"
 15 | 	"github.com/gomoni/gonix/awk"
 16 | 	"github.com/gomoni/gonix/internal"
 17 | 	"github.com/gomoni/gonix/internal/dbg"
 18 | 	"github.com/spf13/pflag"
 19 | 
 20 | 	_ "embed"
 21 | )
 22 | 
 23 | //go:embed head.awk
 24 | var headAwk []byte
 25 | 
 26 | //go:embed head_negative.awk
 27 | var headNegative []byte
 28 | 
 29 | type Head struct {
 30 | 	debug          bool
 31 | 	lines          int
 32 | 	zeroTerminated bool
 33 | 	files          []string
 34 | }
 35 | 
 36 | func New() Head {
 37 | 	return Head{}
 38 | }
 39 | 
 40 | func (c Head) FromArgs(argv []string) (Head, error) {
 41 | 	if len(argv) == 0 {
 42 | 		c = c.Lines(10)
 43 | 		return c, nil
 44 | 	}
 45 | 
 46 | 	flag := pflag.FlagSet{}
 47 | 
 48 | 	var lines internal.Byte = internal.Byte(c.lines)
 49 | 	flag.VarP(&lines, "lines", "n", "print at least n lines, -n means everything except last n lines")
 50 | 
 51 | 	zeroTerminated := flag.BoolP("zero-terminated", "z", false, "line delimiter is NUL")
 52 | 
 53 | 	err := flag.Parse(argv)
 54 | 	if err != nil {
 55 | 		return Head{}, pipe.NewErrorf(1, "head: parsing failed: %w", err)
 56 | 	}
 57 | 	if len(flag.Args()) > 0 {
 58 | 		c.files = flag.Args()
 59 | 	}
 60 | 
 61 | 	// TODO: deal with more than int64 lines
 62 | 	c.lines = int(math.Round(float64(lines)))
 63 | 	c.zeroTerminated = *zeroTerminated
 64 | 
 65 | 	return c, nil
 66 | }
 67 | 
 68 | // Files are input files, where - denotes stdin
 69 | func (c Head) Files(f ...string) Head {
 70 | 	c.files = append(c.files, f...)
 71 | 	return c
 72 | }
 73 | 
 74 | func (c Head) Lines(lines int) Head {
 75 | 	c.lines = lines
 76 | 	return c
 77 | }
 78 | 
 79 | func (c Head) ZeroTerminated(zeroTerminated bool) Head {
 80 | 	c.zeroTerminated = zeroTerminated
 81 | 	return c
 82 | }
 83 | 
 84 | func (c Head) SetDebug(debug bool) Head {
 85 | 	c.debug = debug
 86 | 	return c
 87 | }
 88 | 
 89 | func (c Head) Run(ctx context.Context, stdio unix.StandardIO) error {
 90 | 	debug := dbg.Logger(c.debug, "cat", stdio.Stderr())
 91 | 	if c.lines == 0 {
 92 | 		return nil
 93 | 	}
 94 | 	var src []byte
 95 | 	var lines int
 96 | 	if c.lines > 0 {
 97 | 		lines = c.lines
 98 | 		src = headAwk
 99 | 	} else {
100 | 		lines = -1 * c.lines
101 | 		src = headNegative
102 | 	}
103 | 
104 | 	debug.Printf("head: src=`%s`", src)
105 | 	debug.Printf("head: lines=%d", lines)
106 | 	debug.Printf("head: zero-terminated=%t", c.zeroTerminated)
107 | 
108 | 	config := awk.NewConfig()
109 | 	if c.zeroTerminated {
110 | 		config.Vars = append(config.Vars, []string{"RS", "\x00"}...)
111 | 	}
112 | 	config.Vars = append(config.Vars, []string{"lines", strconv.Itoa(lines)}...)
113 | 
114 | 	prog, err := awk.Compile([]byte(src), config)
115 | 	if err != nil {
116 | 		return err
117 | 	}
118 | 
119 | 	var head func(context.Context, unix.StandardIO, int, string) error
120 | 	if len(c.files) <= 1 {
121 | 		head = func(ctx context.Context, stdio unix.StandardIO, _ int, _ string) error {
122 | 			err := prog.Run(ctx, stdio)
123 | 			if err != nil {
124 | 				return pipe.NewError(1, fmt.Errorf("head: fail to run: %w", err))
125 | 			}
126 | 			return nil
127 | 		}
128 | 	} else {
129 | 		head = func(ctx context.Context, stdio unix.StandardIO, _ int, name string) error {
130 | 			fmt.Fprintf(stdio.Stdout(), "==> %s <==\n", name)
131 | 			err := prog.Run(ctx, stdio)
132 | 			if err != nil {
133 | 				return pipe.NewError(1, fmt.Errorf("head: fail to run: %w", err))
134 | 			}
135 | 			fmt.Fprintln(stdio.Stdout())
136 | 			return nil
137 | 		}
138 | 	}
139 | 
140 | 	runFiles := internal.NewRunFiles(
141 | 		c.files,
142 | 		stdio,
143 | 		head,
144 | 	)
145 | 	return runFiles.Do(ctx)
146 | }
147 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # gonix: unix as a Go library
  2 | 
  3 | Unix text utilities implemented in pure Go, using
  4 | [github.com/gomoni/gio/unix](https://github.com/gomoni/gio/blob/main/unix/unix.go)
  5 | and an excellent [github.com/benhoyt/goawk](https://github.com/benhoyt/goawk)
  6 | 
  7 |  * ⚠ not yet guaranteed to be stable, API and a project layout MAY change
  8 |  * ✔ Go library
  9 |  * ✔ Native pipes in Go
 10 | 
 11 | # Native filters
 12 | 
 13 |  * awk - a thin wrapper for [goawk](https://github.com/benhoyt/goawk)
 14 |  * cat -uses [goawk](https://github.com/benhoyt/goawk)
 15 |  * cksum - POSIX ctx, md5 and sha check sums, runs concurrently (`-j/--threads`) by default
 16 |  * head -n/--lines - uses [goawk](https://github.com/gomoni/gonix/blob/main/head/head_negative.awk)
 17 |  * wc - word count
 18 | 
 19 | # Work in progress
 20 | 
 21 |  * x/tr - translate characters
 22 | 
 23 | # Go library
 24 | 
 25 | Each filter can be called from Go code.
 26 | 
 27 | ```go
 28 | 	head := head.New().Lines(2)
 29 | 	err := head.Run(context.TODO(), unix.NewStdio(
 30 | 		bytes.NewBufferString("three\nsmall\npigs\n"),
 31 | 		os.Stdout,
 32 | 		os.Stderr,
 33 | 	))
 34 | 	if err != nil {
 35 | 		log.Fatal(err)
 36 | 	}
 37 | 	// Output:
 38 | 	// three
 39 | 	// small
 40 | ```
 41 | 
 42 | # Native pipes in Go
 43 | 
 44 | Unix is unix because of a `pipe(2)` allowing a seamless combination of all unix filters into longer colons.
 45 | `gonix` has `pipe.Run` allowing to connect and arbitrary number of filters. It connects stdin/stdout
 46 | automatically like unix `sh(1)` do.
 47 | 
 48 | 
 49 | 
 50 | ```go
 51 | 	// printf "three\nsmall\npigs\n" | cat | wc -l
 52 | 	err := unix.NewLine().Run(ctx, stdio, cat.New(), wc.New().Lines(true))
 53 | 	if err != nil {
 54 | 		log.Fatal(err)
 55 | 	}
 56 | 	// Output:
 57 | 	// 3
 58 | ```
 59 | 
 60 | # Architecture of a filter
 61 | 
 62 | 1. Each command is represented as Go struct
 63 | 2. New() returns a pointer to zero structure, no default values are passed in
 64 | 3. Optional `FromArgs([]string)(*Struct, error)` provides cli parsing and implements defaults
 65 | 4. It does defer most of runtime errors to `Run` method
 66 | 5. `Run(context.Context, pipe.Stdio) error` method gets a _value receiver_ so it never changes the configuration
 67 | 
 68 | ```go
 69 | // wc does nothing, as it has all zeroes - an equivalent of &wc.Wc{} or new(Wc)
 70 | wc := wc.New()
 71 | // wc gets Lines(true) Chars(true) Bytes(true)
 72 | wc, err := wc.FromArgs(nil)
 73 | // wc gets chars(false)
 74 | wc = wc.Chars(false)
 75 | // wc is a value receiver, so never changes the configuration
 76 | err = wc.Run(...)
 77 | ```
 78 | 
 79 | ## Internal helpers
 80 | 
 81 | `internal.RunFiles` abstracts running a command over stdin (and) or list of
 82 | files. Takes a care about opening and proper closing the files, does errors
 83 | gracefully, so they do not cancel the code to run, but are propagated to caller
 84 | properly. Supports a parallel execution of tasks via `internal.PMap` so `cksum`
 85 | run in a parallel by default.
 86 | 
 87 | `internal.PMap` is a parallel map algorithm. Executes MapFunc, which converts
 88 | input slices to output slice and each execution is capped by maximum number of
 89 | threads. It maintains the order.
 90 | 
 91 | `internal.Unit` and `internal.Byte` is a fork of time.Duration of stdlib, which
 92 | supports bigger ranges (based on float64). New units can be easily defined on
 93 | top of Unit type.
 94 | 
 95 | ## Testing
 96 | 
 97 | The typical testing is very repetitive, so there is a common structure for build of
 98 | table tests. It uses generics to improve a type safety.
 99 | 
100 | ```
101 | import "github.com/gomoni/gonix/internal/test"
102 | 
103 | 	testCases := []test.Case[Wc]{
104 | 		{
105 | 			Name:     "wc -l",
106 | 			Filter:   New().Lines(true),
107 | 			FromArgs: fromArgs(t, []string{"-l"}),
108 | 			Input:    "three\nsmall\npigs\n",
109 | 			Expected: "3\n",
110 | 		},
111 |     }
112 | 	test.RunAll(t, testCases)
113 | ```
114 | 
115 | Where the struct fields are
116 | 
117 | * Name is name of test case to be printed by go test
118 | * Input is a string input for a particular command
119 | * Expected is what command is supposed to generate
120 | * Filter is a definition of a filter
121 | * FromArgs is an alternative definition obtained by `FromArgs` helper. It
122 |   ensures CLI parsing is tested as a part of regular functional testing
123 | 
124 | ## Testing with real files
125 | 
126 | WIP atm, there is `test.TestData` helper and a bunch of code in
127 | `cksum/cksum_test.go` to run tests using real files.
128 |  
129 | # Other interesting projects
130 |  * [github.com/benhoyt/goawk](https://github.com/benhoyt/goawk) an excellent awk implementation for Go
131 |  * [https://github.com/mvdan/sh](https://github.com/mvdan/sh) shell parser formater and interpreter
132 |  * [github.com/desertbit/go-shlex](https://github.com/desertbit/go-shlex) probably the best sh lexing library for Go
133 |  * [github.com/u-root/u-root](https://github.com/u-root/u-root) full Go userland for bootloaders, similar idea, not providing a library
134 | 
135 | 


--------------------------------------------------------------------------------
/x/tr/tr_test.go:
--------------------------------------------------------------------------------
  1 | package tr
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/gomoni/gonix/internal/test"
  7 | 	"go.uber.org/goleak"
  8 | )
  9 | 
 10 | func TestMain(m *testing.M) {
 11 | 	goleak.VerifyTestMain(m)
 12 | }
 13 | 
 14 | func TestTr(t *testing.T) {
 15 | 	test.Parallel(t)
 16 | 	testCases := []test.Case[Tr]{
 17 | 		{
 18 | 			Name:     "tr -d aeiou",
 19 | 			Filter:   New().Array1("aeiou").Delete(true),
 20 | 			Input:    "three\nsmall\npigs\n",
 21 | 			Expected: "thr\nsmll\npgs\n",
 22 | 		},
 23 | 		{
 24 | 			Name:     "tr -d [:space:]",
 25 | 			Filter:   New().Array1("[:space:]").Delete(true),
 26 | 			Input:    "three\nsmall\npigs\n",
 27 | 			Expected: "threesmallpigs",
 28 | 		},
 29 | 		{
 30 | 			Name:     "tr -d \\n",
 31 | 			Filter:   New().Array1("\\n").Delete(true),
 32 | 			Input:    "three\nsmall\npigs\n",
 33 | 			Expected: "threesmallpigs",
 34 | 		},
 35 | 		{
 36 | 			Name:     "tr -d [=t=]\\n",
 37 | 			Filter:   New().Array1("[=t=]\\n").Delete(true),
 38 | 			Input:    "three\nsmall\npigs\n",
 39 | 			Expected: "hreesmallpigs",
 40 | 		},
 41 | 		{
 42 | 			Name:     "tr -d [:digit:][=t=]\\n",
 43 | 			Filter:   New().Array1("[:digit:][=t=]\\n").Delete(true),
 44 | 			Input:    "1:three\n2:small\n3:pigs\n",
 45 | 			Expected: ":hree:small:pigs",
 46 | 		},
 47 | 		{
 48 | 			Name:     "tr -d [:digit:][=t=]\\n\145",
 49 | 			Filter:   New().Array1("[:digit:][=t=]\\n\\145").Delete(true),
 50 | 			Input:    "1:three\n2:small\n3:pigs\n",
 51 | 			Expected: ":hr:small:pigs",
 52 | 		},
 53 | 		{
 54 | 			Name:     "tr -c -d aeiou",
 55 | 			Filter:   New().Array1("aeiou").Delete(true).Complement(true),
 56 | 			Input:    "three\nsmall\npigs\n",
 57 | 			Expected: "eeai",
 58 | 		},
 59 | 		{
 60 | 			Name:     "tr -c -d [:space:]",
 61 | 			Filter:   New().Array1("[:space:]").Delete(true).Complement(true),
 62 | 			Input:    "three\nsmall\npigs\n",
 63 | 			Expected: "\n\n\n",
 64 | 		},
 65 | 		{
 66 | 			Name:     "tr -c -d \\n",
 67 | 			Filter:   New().Array1("\\n").Delete(true).Complement(true),
 68 | 			Input:    "three\nsmall\npigs\n",
 69 | 			Expected: "\n\n\n",
 70 | 		},
 71 | 		{
 72 | 			Name:     "tr -c -d [=t=]\\n",
 73 | 			Filter:   New().Array1("[=t=]\\n").Delete(true).Complement(true),
 74 | 			Input:    "three\nsmall\npigs\n",
 75 | 			Expected: "t\n\n\n",
 76 | 		},
 77 | 		{
 78 | 			Name:     "tr -c -d [:digit:][=t=]\\n",
 79 | 			Filter:   New().Array1("[:digit:][=t=]\\n").Delete(true).Complement(true),
 80 | 			Input:    "1:three\n2:small\n3:pigs\n",
 81 | 			Expected: "1t\n2\n3\n",
 82 | 		},
 83 | 		{
 84 | 			Name:     "tr -c -d [:digit:][=t=]\\n\145",
 85 | 			Filter:   New().Array1("[:digit:][=t=]\\n\\145").Delete(true).Complement(true),
 86 | 			Input:    "1:three\n2:small\n3:pigs\n",
 87 | 			Expected: "1tee\n2\n3\n",
 88 | 		},
 89 | 		{
 90 | 			Name:     "tr e a",
 91 | 			Filter:   New().Array1("e").Array2("a"),
 92 | 			Input:    "three\nsmall\npigs\n",
 93 | 			Expected: "thraa\nsmall\npigs\n",
 94 | 		},
 95 | 		{
 96 | 			Name:     "tr el a",
 97 | 			Filter:   New().Array1("el").Array2("a"),
 98 | 			Input:    "three\nsmall\npigs\n",
 99 | 			Expected: "thraa\nsmaaa\npigs\n",
100 | 		},
101 | 		{
102 | 			Name:     "tr el\\n a",
103 | 			Filter:   New().Array1("el\\n").Array2("a"),
104 | 			Input:    "three\nsmall\npigs\n",
105 | 			Expected: "thraaasmaaaapigsa",
106 | 		},
107 | 		{
108 | 			Name:     "tr el\\n aX",
109 | 			Filter:   New().Array1("el\\n").Array2("aX"),
110 | 			Input:    "three\nsmall\npigs\n",
111 | 			Expected: "thraaXsmaXXXpigsX",
112 | 		},
113 | 		{
114 | 			Name:     "tr e xy",
115 | 			Filter:   New().Array1("e").Array2("xy"),
116 | 			Input:    "three\nsmall\npigs\n",
117 | 			Expected: "thrxx\nsmall\npigs\n",
118 | 		},
119 | 		{
120 | 			Name:     "tr [=e=] xy",
121 | 			Filter:   New().Array1("e").Array2("xy"),
122 | 			Input:    "three\nsmall\npigs\n",
123 | 			Expected: "thrxx\nsmall\npigs\n",
124 | 		},
125 | 		/*
126 | 			{
127 | 				Name:     "tr [:digit:] X",
128 | 				Filter:   New().Array1("[:digit:]").Array2("X"),
129 | 				Input:    "1:three\n2:small\n3:pigs\n",
130 | 				Expected: "X:three\nX:small\nX:pigs\n",
131 | 			},
132 | 			{
133 | 				Name:     "tr [:digit:] XY",
134 | 				Filter:   New().Array1("[:digit:]").Array2("XY"),
135 | 				Input:    "1:three\n2:small\n3:pigs\n",
136 | 				Expected: "Y:three\nY:small\nY:pigs\n",
137 | 			},
138 | 			{
139 | 				Name:     "tr e[:digit:] XY",
140 | 				Filter:   New().Array1("e[:digit:]").Array2("XY"),
141 | 				Input:    "1:three\n2:small\n3:pigs\n",
142 | 				Expected: "Y:thrXX\nY:small\nY:pigs\n",
143 | 			},
144 | 			{
145 | 				Name:     "tr e[:digit:] X",
146 | 				Filter:   New().Array1("e[:digit:]").Array2("X"),
147 | 				Input:    "1:three\n2:small\n3:pigs\n",
148 | 				Expected: "X:thrXX\nX:small\nX:pigs\n",
149 | 			},
150 | 			{
151 | 				Name:     "tr [:digit:]e X",
152 | 				Filter:   New().Array1("[:digit:]e").Array2("X"),
153 | 				Input:    "1:three\n2:small\n3:pigs\n",
154 | 				Expected: "X:thrXX\nX:small\nX:pigs\n",
155 | 			},
156 | 			{
157 | 				Name:     "tr [:digit:]e XY",
158 | 				Filter:   New().Array1("[:digit:]e").Array2("XY"),
159 | 				Input:    "1:three\n2:small\n3:pigs\n",
160 | 				Expected: "Y:thrYY\nY:small\nY:pigs\n",
161 | 			},
162 | 		*/
163 | 	}
164 | 	test.RunAll(t, testCases)
165 | }
166 | 


--------------------------------------------------------------------------------
/cat/cat_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 Michal Vyskocil. All rights reserved.
  2 | // Use of this source code is governed by a MIT
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package cat_test
  6 | 
  7 | import (
  8 | 	"context"
  9 | 	"fmt"
 10 | 	"io"
 11 | 	"strings"
 12 | 	"testing"
 13 | 
 14 | 	. "github.com/gomoni/gonix/cat"
 15 | 	"github.com/gomoni/gonix/internal/test"
 16 | 
 17 | 	"github.com/gomoni/gio/pipe"
 18 | 	"github.com/gomoni/gio/unix"
 19 | 	"github.com/stretchr/testify/require"
 20 | 	"go.uber.org/goleak"
 21 | )
 22 | 
 23 | func TestMain(m *testing.M) {
 24 | 	goleak.VerifyTestMain(m)
 25 | }
 26 | 
 27 | func TestCat(t *testing.T) {
 28 | 	test.Parallel(t)
 29 | 
 30 | 	testCases := []test.Case[Cat]{
 31 | 		{
 32 | 			Name:     "cat",
 33 | 			Filter:   New(),
 34 | 			FromArgs: fromArgs(t, nil),
 35 | 			Input:    "three\nsmall\npigs\n",
 36 | 			Expected: "three\nsmall\npigs\n",
 37 | 		},
 38 | 		// --show-all
 39 | 		{
 40 | 			Name:     "cat -b",
 41 | 			Filter:   New().ShowNumber(NonBlank),
 42 | 			FromArgs: fromArgs(t, []string{"-b"}),
 43 | 			Input:    "three\n\n\nsmall\npigs\n",
 44 | 			Expected: "     1\tthree\n\n\n     2\tsmall\n     3\tpigs\n",
 45 | 		},
 46 | 		// -e   equivalent to -vE
 47 | 		{
 48 | 			Name:     "cat -E",
 49 | 			Filter:   New().ShowEnds(true),
 50 | 			FromArgs: fromArgs(t, []string{"-E"}),
 51 | 			Input:    "three\nsmall\npigs\n",
 52 | 			Expected: "three$\nsmall$\npigs$\n",
 53 | 		},
 54 | 		{
 55 | 			Name:     "cat -n",
 56 | 			Filter:   New().ShowNumber(All),
 57 | 			FromArgs: fromArgs(t, []string{"-n"}),
 58 | 			Input:    "three\nsmall\npigs\n",
 59 | 			Expected: "     1\tthree\n     2\tsmall\n     3\tpigs\n",
 60 | 		},
 61 | 		{
 62 | 			Name:     "cat -s",
 63 | 			Filter:   New().SqueezeBlanks(true),
 64 | 			FromArgs: fromArgs(t, []string{"-s"}),
 65 | 			Input:    "three\n\n\nsmall\npigs\n",
 66 | 			Expected: "three\n\nsmall\npigs\n",
 67 | 		},
 68 | 		// -t     equivalent to -vT
 69 | 		{
 70 | 			Name:     "cat -T",
 71 | 			Filter:   New().ShowTabs(true),
 72 | 			FromArgs: fromArgs(t, []string{"-T"}),
 73 | 			Input:    "\tthree\nsmall\t\npi\tgs\n",
 74 | 			Expected: "^Ithree\nsmall^I\npi^Igs\n",
 75 | 		},
 76 | 		{
 77 | 			Name:     "cat -ET",
 78 | 			Filter:   New().ShowEnds(true).ShowTabs(true),
 79 | 			FromArgs: fromArgs(t, []string{"-ET"}),
 80 | 			Input:    "\tthree\nsmall\t\npi\tgs\n",
 81 | 			Expected: "^Ithree$\nsmall^I$\npi^Igs$\n",
 82 | 		},
 83 | 		{
 84 | 			Name:     "cat -A",
 85 | 			Filter:   New().ShowNonPrinting(true).ShowEnds(true).ShowTabs(true),
 86 | 			FromArgs: fromArgs(t, []string{"-A"}),
 87 | 			Input:    string(rune(127)) + "\tthree\nsmall\t\npi\tgs\n",
 88 | 			Expected: "^?^Ithree$\nsmall^I$\npi^Igs$\n",
 89 | 		},
 90 | 	}
 91 | 	test.RunAll(t, testCases)
 92 | }
 93 | 
 94 | // TODO: think about how this can be more generic
 95 | func TestError(t *testing.T) {
 96 | 	ctx := context.Background()
 97 | 
 98 | 	t.Run("FromArgs error", func(t *testing.T) {
 99 | 		_, err := New().FromArgs([]string{"-x"})
100 | 		require.Error(t, err)
101 | 		e := pipe.FromError(err)
102 | 		require.EqualValues(t, 1, e.Code)
103 | 	})
104 | 	t.Run("read error", func(t *testing.T) {
105 | 		cat := New()
106 | 		stdio := unix.NewStdio(
107 | 			&test.IOError{Err: fmt.Errorf("stdin crashed")},
108 | 			io.Discard,
109 | 			io.Discard,
110 | 		)
111 | 		err := cat.Run(ctx, stdio)
112 | 		require.Error(t, err)
113 | 		e := pipe.FromError(err)
114 | 		require.EqualValues(t, 1, e.Code)
115 | 		require.EqualError(t, e.Err, "cat: fail to run: stdin crashed")
116 | 	})
117 | 	t.Run("write error", func(t *testing.T) {
118 | 		cat := New()
119 | 		stdio := unix.NewStdio(
120 | 			&test.IOError{Reads: [][]byte{{0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xe, 0xf}}},
121 | 			&test.IOError{Err: fmt.Errorf("stdout crashed")},
122 | 			io.Discard,
123 | 		)
124 | 		err := cat.Run(ctx, stdio)
125 | 		require.Error(t, err)
126 | 		e := pipe.FromError(err)
127 | 		require.EqualValues(t, 1, e.Code)
128 | 		require.EqualError(t, e.Err, "cat: fail to run: stdout crashed")
129 | 	})
130 | 	t.Run("close error", func(t *testing.T) {
131 | 		t.Skipf("TODO: must redefine this ReadCloser usage")
132 | 		cat := New()
133 | 		stdio := unix.NewStdio(
134 | 			&test.IOError{
135 | 				Reads:    [][]byte{{0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xe, 0xf}},
136 | 				CloseErr: fmt.Errorf("close crashed"),
137 | 			},
138 | 			&test.IOError{Writes: 1},
139 | 			io.Discard,
140 | 		)
141 | 		err := cat.Run(ctx, stdio)
142 | 		require.Error(t, err)
143 | 		e := pipe.FromError(err)
144 | 		require.EqualValues(t, 1, e.Code)
145 | 		require.EqualError(t, e.Err, "cat: fail to run: close crashed")
146 | 	})
147 | 	t.Run("file not found", func(t *testing.T) {
148 | 		// main.c is guaranteed to not exists, because this is pure Go and compiler
149 | 		// will complain otherwise
150 | 		// package github.com/gomoni/gonix/cat: C source files not allowed when not using cgo or SWIG: main.c
151 | 		cat := New().Files("main.c")
152 | 		stdio := unix.NewStdio(
153 | 			io.NopCloser(strings.NewReader("")),
154 | 			io.Discard,
155 | 			io.Discard,
156 | 		)
157 | 		err := cat.Run(ctx, stdio)
158 | 		require.Error(t, err)
159 | 		t.Logf("KEBAPI: err=%#v", err)
160 | 		e := pipe.FromError(err)
161 | 		t.Logf("KEBAPI: e=%#v", e)
162 | 		require.EqualValues(t, 1, e.Code)
163 | 		require.Contains(t, e.Err.Error(), "main.c")
164 | 	})
165 | 
166 | }
167 | 
168 | func fromArgs(t *testing.T, argv []string) Cat {
169 | 	t.Helper()
170 | 	n := New()
171 | 	f, err := n.FromArgs(argv)
172 | 	require.NoError(t, err)
173 | 	return f
174 | }
175 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/benhoyt/goawk v1.21.0 h1:GASuhJXHMFZ/2TJBPh+2Ah3kclVGNvGjt+uh3ajMdLk=
 2 | github.com/benhoyt/goawk v1.21.0/go.mod h1:UG1Ld6CjkkHhoyQmErQGSTwmavsTqFnCDYsLSJbovqU=
 3 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 4 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 5 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 6 | github.com/gomoni/gio v0.0.0-20230206214735-ff72054e35d2 h1:iJesVqsE6n7QQD60yVcxueLXrvAIi8rvTTe6pLhARCw=
 7 | github.com/gomoni/gio v0.0.0-20230206214735-ff72054e35d2/go.mod h1:EcJkjwrDsQEgR3AqUuh9pCr0m6x0CCDqiPwL1W+VZtI=
 8 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
 9 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
10 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
11 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
12 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
13 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
14 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
15 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
16 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
17 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
18 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
19 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
20 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
21 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
22 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
23 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
24 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
25 | github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
26 | go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA=
27 | go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
28 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
29 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550 h1:ObdrDkeb4kJdCP557AjRjq69pTHfNouLtWZG7j9rPN8=
30 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
31 | golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs=
32 | golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
33 | golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
34 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
35 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
36 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
37 | golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
38 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
39 | golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
40 | golang.org/x/sync v0.0.0-20220819030929-7fc1605a5dde h1:ejfdSekXMDxDLbRrJMwUk6KnSLZ2McaUCVcIKM+N6jc=
41 | golang.org/x/sync v0.0.0-20220819030929-7fc1605a5dde/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
42 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
43 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
44 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
45 | golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
46 | golang.org/x/sys v0.0.0-20210510120138-977fb7262007 h1:gG67DSER+11cZvqIMb8S8bt0vZtiN6xWYARwirrOSfE=
47 | golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
48 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
49 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
50 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
51 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
52 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
53 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
54 | golang.org/x/tools v0.1.5 h1:ouewzE6p+/VEB31YYnTbEJdi8pFqKp4P4n85vwo3DHA=
55 | golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
56 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
57 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
58 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
59 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
60 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
61 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
62 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
63 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
64 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
65 | 


--------------------------------------------------------------------------------
/wc/wc.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 Michal Vyskocil. All rights reserved.
  2 | // Use of this source code is governed by a MIT
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | /*
  6 |    Print  newline,  word, and byte counts for each FILE, and a total line if more than one FILE is specified.  A word is a non-zero-length sequence of printable characters delimited
  7 |    by white space.
  8 | 
  9 |    With no FILE, or when FILE is -, read standard input.
 10 | 
 11 |    The options below may be used to select which counts are printed, always in the following order: newline, word, character, byte, maximum line length.
 12 | 
 13 |    --files0-from=F
 14 |           read input from the files specified by NUL-terminated names in file F; If F is - then read names from standard input
 15 | 
 16 |    --version
 17 |           output version information and exit
 18 | 
 19 | */
 20 | 
 21 | package wc
 22 | 
 23 | import (
 24 | 	"bufio"
 25 | 	"bytes"
 26 | 	"context"
 27 | 	"fmt"
 28 | 	"io"
 29 | 	"log"
 30 | 	"sort"
 31 | 	"strconv"
 32 | 	"strings"
 33 | 	"text/tabwriter"
 34 | 	"unicode/utf8"
 35 | 
 36 | 	"github.com/gomoni/gio/pipe"
 37 | 	"github.com/gomoni/gio/unix"
 38 | 	"github.com/gomoni/gonix/internal"
 39 | 	"github.com/gomoni/gonix/internal/dbg"
 40 | 	"github.com/spf13/pflag"
 41 | )
 42 | 
 43 | type Wc struct {
 44 | 	debug         bool
 45 | 	bytes         bool
 46 | 	chars         bool
 47 | 	lines         bool
 48 | 	maxLineLength bool
 49 | 	words         bool
 50 | 	files         []string
 51 | }
 52 | 
 53 | func New() Wc {
 54 | 	return Wc{}
 55 | }
 56 | 
 57 | // FromArgs builds a WcFilter from standard argv except the command name (os.Argv[1:])
 58 | func (c Wc) FromArgs(argv []string) (Wc, error) {
 59 | 	if len(argv) == 0 {
 60 | 		c = c.Bytes(true).Lines(true).Words(true)
 61 | 		return c, nil
 62 | 	}
 63 | 
 64 | 	flag := pflag.FlagSet{}
 65 | 	flag.BoolVarP(&c.bytes, "bytes", "c", false, "print number of bytes")
 66 | 	flag.BoolVarP(&c.chars, "chars", "m", false, "print number of characters (runes)")
 67 | 	flag.BoolVarP(&c.lines, "lines", "l", false, "print number of lines")
 68 | 	flag.BoolVarP(&c.maxLineLength, "max-line-length", "L", false, "print maximum display width")
 69 | 	flag.BoolVarP(&c.words, "words", "w", false, "print number of words")
 70 | 
 71 | 	err := flag.Parse(argv)
 72 | 	if err != nil {
 73 | 		return Wc{}, pipe.NewErrorf(1, "wc: parsing failed: %w", err)
 74 | 	}
 75 | 	if len(flag.Args()) > 0 {
 76 | 		c.files = flag.Args()
 77 | 	}
 78 | 
 79 | 	return c, nil
 80 | }
 81 | 
 82 | func (w Wc) Bytes(b bool) Wc {
 83 | 	w.bytes = b
 84 | 	return w
 85 | }
 86 | 
 87 | func (w Wc) Chars(b bool) Wc {
 88 | 	w.chars = b
 89 | 	return w
 90 | }
 91 | 
 92 | func (w Wc) Lines(lines bool) Wc {
 93 | 	w.lines = lines
 94 | 	return w
 95 | }
 96 | 
 97 | func (w Wc) MaxLineLength(b bool) Wc {
 98 | 	w.maxLineLength = b
 99 | 	return w
100 | }
101 | 
102 | func (w Wc) Words(b bool) Wc {
103 | 	w.words = b
104 | 	return w
105 | }
106 | 
107 | // Files adds files into a list of files
108 | func (w Wc) Files(files ...string) Wc {
109 | 	w.files = append(w.files, files...)
110 | 	return w
111 | }
112 | 
113 | func (w Wc) SetDebug(debug bool) Wc {
114 | 	w.debug = debug
115 | 	return w
116 | }
117 | 
118 | func (c Wc) Run(ctx context.Context, stdio unix.StandardIO) error {
119 | 	debug := dbg.Logger(c.debug, "wc", stdio.Stderr())
120 | 
121 | 	files := c.files
122 | 	if len(files) == 0 {
123 | 		files = []string{""}
124 | 	}
125 | 	stat := make([]stats, 0, len(c.files))
126 | 	total := stats{fileName: "total"}
127 | 
128 | 	wc := func(ctx context.Context, stdio unix.StandardIO, _ int, name string) error {
129 | 		st, err := c.runFile(ctx, stdio.Stdin(), debug)
130 | 		if err != nil {
131 | 			return pipe.NewError(1, fmt.Errorf("wc: fail to run: %w", err))
132 | 		}
133 | 		st.fileName = name
134 | 		total.add(st)
135 | 		stat = append(stat, st)
136 | 		return nil
137 | 	}
138 | 
139 | 	runFiles := internal.NewRunFiles(c.files, stdio, wc)
140 | 	errs := runFiles.Do(ctx)
141 | 
142 | 	percents, argsFn := c.percentsArgsFn()
143 | 	stdinOnly := len(files) == 1 && files[0] == ""
144 | 	var template string
145 | 	if stdinOnly {
146 | 		template = fmt.Sprintf("%s\t\n", strings.Join(percents, "\t"))
147 | 	} else {
148 | 		template = fmt.Sprintf("%s\t %%s\n", strings.Join(percents, "\t"))
149 | 	}
150 | 	minWidth := total.maxLen()
151 | 	padding := 1
152 | 	if len(stat) == 1 && len(argsFn) == 1 {
153 | 		padding = 0
154 | 	}
155 | 	debug.Printf("template=%q", template)
156 | 	debug.Printf("minWidth=%+v, tabwith=8, padding=%+v", minWidth, padding)
157 | 	w := tabwriter.NewWriter(stdio.Stdout(), minWidth-padding, 8, padding, ' ', tabwriter.AlignRight)
158 | 
159 | 	if stdinOnly {
160 | 		args := make([]any, 0, len(argsFn))
161 | 		for _, fn := range argsFn {
162 | 			args = append(args, fn(stat[0]))
163 | 		}
164 | 		fmt.Fprintf(w, template, args...)
165 | 		err := w.Flush()
166 | 		if err != nil {
167 | 			return pipe.NewErrorf(1, "wc: pipe flush: %w", err)
168 | 		}
169 | 		if errs != nil {
170 | 			return pipe.NewError(1, errs)
171 | 		}
172 | 		return nil
173 | 	}
174 | 
175 | 	stat = append(stat, total)
176 | 	for _, st := range stat {
177 | 		args := make([]any, 0, len(argsFn))
178 | 		for _, fn := range argsFn {
179 | 			args = append(args, fn(st))
180 | 		}
181 | 		args = append(args, st.fileName)
182 | 		fmt.Fprintf(w, template, args...)
183 | 	}
184 | 
185 | 	err := w.Flush()
186 | 	if err != nil {
187 | 		return pipe.NewErrorf(1, "wc: tabwriter flush: %w", err)
188 | 	}
189 | 
190 | 	debug.Printf("exiting")
191 | 	if errs != nil {
192 | 		return pipe.NewError(1, errs)
193 | 	}
194 | 	return nil
195 | }
196 | 
197 | func (c Wc) runFile(ctx context.Context, in io.Reader, debug *log.Logger) (stats, error) {
198 | 	var stat stats
199 | 	s := bufio.NewScanner(in)
200 | 	for s.Scan() {
201 | 		if s.Err() != nil {
202 | 			return stat, s.Err()
203 | 		}
204 | 		if ctx.Err() != nil {
205 | 			return stat, ctx.Err()
206 | 		}
207 | 		if c.bytes {
208 | 			// TODO: windows has two(?)
209 | 			stat.bytes += len(s.Bytes()) + 1
210 | 		}
211 | 		if c.chars || c.maxLineLength {
212 | 			count := utf8.RuneCount(s.Bytes())
213 | 			// TODO: windows has two(?)
214 | 			stat.chars += count + 1
215 | 			if count > stat.maxLineLength {
216 | 				// \n does not count to maxLineLength
217 | 				stat.maxLineLength = count
218 | 			}
219 | 		}
220 | 		if c.words {
221 | 			ws := bufio.NewScanner(bytes.NewReader(s.Bytes()))
222 | 			ws.Split(bufio.ScanWords)
223 | 			for ws.Scan() {
224 | 				stat.words += 1
225 | 			}
226 | 		}
227 | 		stat.lines++
228 | 	}
229 | 	return stat, nil
230 | }
231 | 
232 | // percentsArgsFn ensures wc prints in following order: newline, word,
233 | // character, byte, maximum line length.
234 | func (c Wc) percentsArgsFn() ([]string, []func(stats) int) {
235 | 	percents := make([]string, 0, 5)
236 | 	argsFn := make([]func(stat stats) int, 0, 5)
237 | 	if c.lines {
238 | 		argsFn = append(argsFn, func(stat stats) int { return stat.lines })
239 | 		percents = append(percents, "%d")
240 | 	}
241 | 	if c.words {
242 | 		argsFn = append(argsFn, func(stat stats) int { return stat.words })
243 | 		percents = append(percents, "%d")
244 | 	}
245 | 	if c.chars {
246 | 		argsFn = append(argsFn, func(stat stats) int { return stat.chars })
247 | 		percents = append(percents, "%d")
248 | 	}
249 | 	if c.bytes {
250 | 		argsFn = append(argsFn, func(stat stats) int { return stat.bytes })
251 | 		percents = append(percents, "%d")
252 | 	}
253 | 	if c.maxLineLength {
254 | 		argsFn = append(argsFn, func(stat stats) int { return stat.maxLineLength })
255 | 		percents = append(percents, "%d")
256 | 	}
257 | 	return percents, argsFn
258 | }
259 | 
260 | type stats struct {
261 | 	bytes         int
262 | 	chars         int
263 | 	lines         int
264 | 	maxLineLength int
265 | 	words         int
266 | 	fileName      string
267 | }
268 | 
269 | func (s *stats) add(t stats) {
270 | 	s.bytes += t.bytes
271 | 	s.chars += t.chars
272 | 	s.lines += t.lines
273 | 	s.maxLineLength += t.maxLineLength
274 | 	s.words += t.words
275 | }
276 | 
277 | func (s stats) maxLen() int {
278 | 	foo := [5]int{
279 | 		s.bytes,
280 | 		s.chars,
281 | 		s.lines,
282 | 		s.maxLineLength,
283 | 		s.words,
284 | 	}
285 | 	sort.Ints(foo[:])
286 | 	return len(strconv.Itoa(foo[4]))
287 | }
288 | 


--------------------------------------------------------------------------------
/cat/cat.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 Michal Vyskocil. All rights reserved.
  2 | // Use of this source code is governed by a MIT
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package cat
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"context"
 10 | 	"fmt"
 11 | 	"io"
 12 | 	"log"
 13 | 
 14 | 	"github.com/gomoni/gio/pipe"
 15 | 	"github.com/gomoni/gio/unix"
 16 | 	"github.com/gomoni/gonix/awk"
 17 | 	"github.com/gomoni/gonix/internal"
 18 | 	"github.com/gomoni/gonix/internal/dbg"
 19 | 
 20 | 	"github.com/spf13/pflag"
 21 | 
 22 | 	_ "embed"
 23 | )
 24 | 
 25 | type number int
 26 | 
 27 | const (
 28 | 	None     number = 0
 29 | 	NonBlank number = 1
 30 | 	All      number = 2
 31 | )
 32 | 
 33 | var (
 34 | 	ErrNothingToDo = pipe.NewErrorf(1, "cat: nothing to do")
 35 | )
 36 | 
 37 | //go:embed show_ends.awk
 38 | var showEndsAwk []byte
 39 | 
 40 | //go:embed show_number.awk
 41 | var showNumberAwk []byte
 42 | 
 43 | //go:embed show_number_nonblank.awk
 44 | var showNumberNonBlankAwk []byte
 45 | 
 46 | //go:embed squeeze_blanks.awk
 47 | var squeezeBlanksAwk []byte
 48 | 
 49 | //go:embed show_tabs.awk
 50 | var showTabsAwk []byte
 51 | 
 52 | type Cat struct {
 53 | 	debug           bool
 54 | 	files           []string
 55 | 	showNumber      number
 56 | 	showEnds        bool
 57 | 	squeezeBlanks   bool
 58 | 	showTabs        bool
 59 | 	showNonPrinting bool
 60 | }
 61 | 
 62 | func New() Cat {
 63 | 	return Cat{}
 64 | }
 65 | 
 66 | // FromArgs build a Cat from standard argv except the command name (os.Argv[1:])
 67 | func (c Cat) FromArgs(argv []string) (Cat, error) {
 68 | 	var zero Cat
 69 | 	flag := pflag.FlagSet{}
 70 | 
 71 | 	nb := flag.BoolP("number-nonblank", "b", false, "number non blank lines only")
 72 | 	flag.BoolVarP(&c.showEnds, "show-ends", "E", false, "print $ at the end of each line")
 73 | 	na := flag.BoolP("number", "n", false, "number all lines")
 74 | 	flag.BoolVarP(&c.squeezeBlanks, "squeeze-blanks", "s", false, "ignore repeated blank lines")
 75 | 	flag.Bool("u", false, "ignored, for compatibility with POSIX")
 76 | 	flag.BoolVarP(&c.showTabs, "show-tabs", "T", false, "print TAB as ^I")
 77 | 	flag.BoolVarP(&c.showNonPrinting, "show-nonprinting", "v", false, "use ^ and M- notation for non printing characters")
 78 | 
 79 | 	// compound options
 80 | 	var all, e, t bool
 81 | 	flag.BoolVarP(&all, "show-all", "A", false, "equivalent of -vET")
 82 | 	// TODO FIXME - single dash options only - this accepts -e and --e
 83 | 	flag.BoolVarP(&e, "e", "e", false, "equivalent of -vE")
 84 | 	flag.BoolVarP(&t, "t", "t", false, "equivalent of -vT")
 85 | 
 86 | 	err := flag.Parse(argv)
 87 | 	if err != nil {
 88 | 		return zero, pipe.NewErrorf(1, "cat: parsing failed: %w", err)
 89 | 	}
 90 | 
 91 | 	if all {
 92 | 		c.ShowNonPrinting(true).ShowEnds(true).ShowTabs(true)
 93 | 	}
 94 | 	if e {
 95 | 		c.ShowNonPrinting(true).ShowEnds(true)
 96 | 	}
 97 | 	if t {
 98 | 		c.ShowNonPrinting(true).ShowTabs(true)
 99 | 	}
100 | 
101 | 	if len(flag.Args()) > 0 {
102 | 		c.files = flag.Args()
103 | 	}
104 | 
105 | 	// post process
106 | 	if *nb {
107 | 		c.ShowNumber(NonBlank)
108 | 	} else if *na {
109 | 		c.ShowNumber(All)
110 | 	}
111 | 
112 | 	return c, nil
113 | }
114 | 
115 | // Files are input files, where - denotes stdin
116 | func (c Cat) Files(f ...string) Cat {
117 | 	c.files = append(c.files, f...)
118 | 	return c
119 | }
120 | 
121 | // ShowNumber adds none all or non empty output lines
122 | func (c Cat) ShowNumber(n number) Cat {
123 | 	c.showNumber = n
124 | 	return c
125 | }
126 | 
127 | // ShowEnds add $ to the end of each line
128 | func (c Cat) ShowEnds(b bool) Cat {
129 | 	c.showEnds = b
130 | 	return c
131 | }
132 | 
133 | // SqueezeBlanks - supress repeated empty lines
134 | func (c Cat) SqueezeBlanks(b bool) Cat {
135 | 	c.squeezeBlanks = b
136 | 	return c
137 | }
138 | 
139 | // ShowTabs display TAB as ^I
140 | func (c Cat) ShowTabs(b bool) Cat {
141 | 	c.showTabs = b
142 | 	return c
143 | }
144 | 
145 | // ShowNonPrinting use ^ and M- notation, except for LFD and TAB
146 | func (c Cat) ShowNonPrinting(b bool) Cat {
147 | 	c.showNonPrinting = b
148 | 	return c
149 | }
150 | 
151 | // SetDebug additional debugging messages on stderr
152 | func (c Cat) SetDebug(debug bool) Cat {
153 | 	c.debug = debug
154 | 	return c
155 | }
156 | 
157 | func (c Cat) modifyStdout() bool {
158 | 	return c.showNumber != None || c.showEnds || c.squeezeBlanks || c.showTabs || c.showNonPrinting
159 | }
160 | 
161 | func (c Cat) Run(ctx context.Context, stdio unix.StandardIO) error {
162 | 	debug := dbg.Logger(c.debug, "cat", stdio.Stderr())
163 | 	var filters []unix.Filter
164 | 	if !c.modifyStdout() {
165 | 		filters = []unix.Filter{cat{debug: c.debug}}
166 | 	} else {
167 | 		progs, err := c.awk(debug)
168 | 		if err != nil {
169 | 			return err
170 | 		}
171 | 		filters = make([]unix.Filter, len(progs))
172 | 		for idx, prog := range progs {
173 | 			filters[idx] = unix.Filter(prog)
174 | 		}
175 | 	}
176 | 	if c.showNonPrinting {
177 | 		filters = append(filters, catNonPrinting{})
178 | 	}
179 | 	if len(filters) == 0 {
180 | 		return ErrNothingToDo
181 | 	}
182 | 
183 | 	cat := func(ctx context.Context, stdio unix.StandardIO, _ int, _ string) error {
184 | 		err := unix.NewLine().Run(ctx, stdio, filters...)
185 | 		if err != nil {
186 | 			return pipe.NewError(1, fmt.Errorf("cat: fail to run: %w", err))
187 | 		}
188 | 		return nil
189 | 	}
190 | 
191 | 	runFiles := internal.NewRunFiles(c.files, stdio, cat)
192 | 	return runFiles.Do(ctx)
193 | }
194 | 
195 | func (c Cat) awk(debug *log.Logger) ([]awk.AWK, error) {
196 | 	debug.Printf("c=%+v", c)
197 | 	var sources [][]byte
198 | 	if c.showEnds {
199 | 		sources = append(sources, showEndsAwk)
200 | 	}
201 | 	if c.showNumber == All {
202 | 		sources = append(sources, showNumberAwk)
203 | 	} else if c.showNumber == NonBlank {
204 | 		sources = append(sources, showNumberNonBlankAwk)
205 | 	}
206 | 	if c.squeezeBlanks {
207 | 		sources = append(sources, squeezeBlanksAwk)
208 | 	}
209 | 	if c.showTabs {
210 | 		sources = append(sources, showTabsAwk)
211 | 	}
212 | 
213 | 	progs := make([]awk.AWK, len(sources))
214 | 	for idx, src := range sources {
215 | 		debug.Printf("goawk src[%d] = %q", idx, src)
216 | 		var err error
217 | 		progs[idx], err = awk.Compile(src, awk.NewConfig())
218 | 		if err != nil {
219 | 			return nil, err
220 | 		}
221 | 	}
222 | 	return progs, nil
223 | }
224 | 
225 | type cat struct {
226 | 	debug bool
227 | }
228 | 
229 | func (c cat) Run(ctx context.Context, stdio unix.StandardIO) error {
230 | 	debug := dbg.Logger(c.debug, "cat", stdio.Stderr())
231 | 	const n = 8192
232 | 	for {
233 | 		wb, err := io.CopyN(stdio.Stdout(), stdio.Stdin(), n)
234 | 		debug.Printf("written %d bytes", wb)
235 | 		if err == io.EOF {
236 | 			break
237 | 		}
238 | 		if err != nil {
239 | 			return err
240 | 		}
241 | 		if ctx.Err() != nil {
242 | 			return ctx.Err()
243 | 		}
244 | 	}
245 | 	debug.Printf("found io.EOF, exiting")
246 | 	return nil
247 | }
248 | 
249 | // catNonPrinting converts non printable characters to ^ M- codes
250 | type catNonPrinting struct{}
251 | 
252 | func (catNonPrinting) Run(ctx context.Context, stdio unix.StandardIO) error {
253 | 	var inp [4096]byte
254 | 	var out bytes.Buffer
255 | 	for {
256 | 		if ctx.Err() != nil {
257 | 			return ctx.Err()
258 | 		}
259 | 		n, err := stdio.Stdin().Read(inp[:])
260 | 		if err == io.EOF {
261 | 			return nil
262 | 		} else if err != nil {
263 | 			return err
264 | 		}
265 | 		nonPrinting(inp[:n], &out)
266 | 		_, err = out.WriteTo(stdio.Stdout())
267 | 		if err != nil {
268 | 			return err
269 | 		}
270 | 	}
271 | }
272 | 
273 | func nonPrinting(inp []byte, out *bytes.Buffer) {
274 | 	out.Reset()
275 | 	for _, ch := range inp {
276 | 		if ch < 32 {
277 | 			// print TAB and \n
278 | 			if ch == 9 || ch == 10 {
279 | 				out.WriteByte(ch)
280 | 				continue
281 | 			}
282 | 			out.WriteByte('^')
283 | 			out.WriteByte(ch + 64)
284 | 			continue
285 | 		} else if ch == 127 {
286 | 			out.WriteByte('^')
287 | 			out.WriteByte('?')
288 | 			continue
289 | 		} else if ch >= 128 && ch < 160 {
290 | 			out.WriteString(`M-BM-^`)
291 | 			out.WriteByte(ch - 128 + 64)
292 | 			continue
293 | 		} else if ch >= 160 && ch < 192 {
294 | 			out.WriteString(`M-BM-`)
295 | 			out.WriteByte(ch - 128)
296 | 			continue
297 | 		} else if ch >= 192 && ch < 224 {
298 | 			out.WriteString(`M-CM-^`)
299 | 			out.WriteByte(ch - 128)
300 | 			continue
301 | 		} else if ch >= 224 {
302 | 			out.WriteString(`M-CM-`)
303 | 			out.WriteByte(ch - 192)
304 | 			continue
305 | 		}
306 | 		out.WriteByte(ch)
307 | 	}
308 | }
309 | 


--------------------------------------------------------------------------------
/internal/unit.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2010 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Ported from https://cs.opensource.google/go/go/+/refs/tags/go1.18.4:src/time/format.go
  6 | // by Michal Vyskocil michal.vyskocil@gmail.com
  7 | 
  8 | package internal
  9 | 
 10 | import (
 11 | 	"errors"
 12 | 	"fmt"
 13 | 	"math"
 14 | 	"strconv"
 15 | )
 16 | 
 17 | // Unit represents the units with multiplier suffixes. Is defined as float64 to support
 18 | // things like YottaByte and so. It is generalized version of time.Duration
 19 | type Unit float64
 20 | 
 21 | const (
 22 | 	maxUnit float64 = math.MaxFloat64 / 10
 23 | )
 24 | 
 25 | var (
 26 | 	ErrEmpty                = errors.New("empty")
 27 | 	ErrInvalidNextCharacter = errors.New("expected number or decimal separator")
 28 | 	ErrOverflow             = errors.New("overflow")
 29 | 	ErrNoDigits             = errors.New("no digits")
 30 | )
 31 | 
 32 | // parseUnit was ported from time.ParseDuration, generalized to arbitrary suffixes
 33 | // and works with float64
 34 | // https://cs.opensource.google/go/go/+/refs/tags/go1.18.4:src/time/format.go;l=1511
 35 | // [-+]?([0-9]*(\.[0-9]*)?[a-z]+)+
 36 | func parseUnit(unitMap map[string]float64, s string) (Unit, error) {
 37 | 	orig := s
 38 | 	var d float64
 39 | 	neg := false
 40 | 
 41 | 	// Consume [-+]?
 42 | 	if s != "" {
 43 | 		c := s[0]
 44 | 		if c == '-' || c == '+' {
 45 | 			neg = c == '-'
 46 | 			s = s[1:]
 47 | 		}
 48 | 	}
 49 | 	// Special case: if all that is left is "0", this is zero.
 50 | 	if s == "0" {
 51 | 		return 0, nil
 52 | 	}
 53 | 	if s == "" {
 54 | 		return 0, fmt.Errorf("invalid size %q: %w", orig, ErrEmpty)
 55 | 	}
 56 | 
 57 | 	for s != "" {
 58 | 		var (
 59 | 			v, f  float64     // integers before, after decimal point
 60 | 			scale float64 = 1 // value = v + f/scale
 61 | 		)
 62 | 
 63 | 		var err error
 64 | 		// The next character must be [0-9.]
 65 | 		if !(s[0] == '.' || '0' <= s[0] && s[0] <= '9') {
 66 | 			return 0, fmt.Errorf("invalid size %q: %w", orig, ErrInvalidNextCharacter)
 67 | 		}
 68 | 		// Consume [0-9]*
 69 | 		pl := len(s)
 70 | 		v, s, err = leadingInt(s)
 71 | 		if err != nil {
 72 | 			return 0, fmt.Errorf("invalid size %q: %w", orig, ErrOverflow)
 73 | 		}
 74 | 		pre := pl != len(s) // whether we consumed anything before a period
 75 | 
 76 | 		// Consume (\.[0-9]*)?
 77 | 		post := false
 78 | 		if s != "" && s[0] == '.' {
 79 | 			s = s[1:]
 80 | 			pl := len(s)
 81 | 			f, scale, s = leadingFraction(s)
 82 | 			post = pl != len(s)
 83 | 		}
 84 | 		if !pre && !post {
 85 | 			// no digits (e.g. ".s" or "-.s")
 86 | 			return 0, fmt.Errorf("invalid size %q: %w", orig, ErrNoDigits)
 87 | 		}
 88 | 		// Consume unit.
 89 | 		i := 0
 90 | 		for ; i < len(s); i++ {
 91 | 			c := s[i]
 92 | 			if c == '.' || '0' <= c && c <= '9' {
 93 | 				break
 94 | 			}
 95 | 		}
 96 | 		var unit float64
 97 | 		if i == 0 {
 98 | 			unit = 1.0
 99 | 		} else {
100 | 			u := s[:i]
101 | 			s = s[i:]
102 | 			var ok bool
103 | 			unit, ok = unitMap[u]
104 | 			if !ok {
105 | 				return 0, fmt.Errorf("unknown unit %q in size %q", u, orig)
106 | 			}
107 | 		}
108 | 		if v > maxUnit/unit {
109 | 			// overflow
110 | 			return 0, fmt.Errorf("invalid size %q: %w", orig, ErrOverflow)
111 | 		}
112 | 		v *= unit
113 | 		if f > 0 {
114 | 			// float64 is needed to be nanosecond accurate for fractions of hours.
115 | 			// v >= 0 && (f*unit/scale) <= 3.6e+12 (ns/h, h is the largest unit)
116 | 			v += float64(float64(f) * (float64(unit) / scale))
117 | 			if v > maxUnit {
118 | 				// overflow
119 | 				return 0, fmt.Errorf("invalid size %q: %w", orig, ErrOverflow)
120 | 			}
121 | 		}
122 | 		d += v
123 | 		if d > maxUnit {
124 | 			return 0, fmt.Errorf("invalid size %q: %w", orig, ErrOverflow)
125 | 		}
126 | 	}
127 | 
128 | 	if neg {
129 | 		return -Unit(d), nil
130 | 	}
131 | 	if d > maxUnit-1 {
132 | 		return 0, fmt.Errorf("invalid size %q: %w", orig, ErrOverflow)
133 | 	}
134 | 	return Unit(d), nil
135 | }
136 | 
137 | var errLeadingInt = errors.New("bad [0-9]*") // never printed
138 | 
139 | // leadingInt consumes the leading [0-9]* from s.
140 | func leadingInt(s string) (x float64, rem string, err error) {
141 | 	i := 0
142 | 	for ; i < len(s); i++ {
143 | 		c := s[i]
144 | 		if c < '0' || c > '9' {
145 | 			break
146 | 		}
147 | 	}
148 | 	if i == 0 {
149 | 		return 0, s[0:], nil
150 | 	}
151 | 	x, err = strconv.ParseFloat(s[:i], 64)
152 | 	if err != nil {
153 | 		return 0, "", errLeadingInt
154 | 	}
155 | 	return x, s[i:], nil
156 | }
157 | 
158 | // leadingFraction consumes the leading [0-9]* from s.
159 | // It is used only for fractions, so does not return an error on overflow,
160 | // it just stops accumulating precision.
161 | func leadingFraction(s string) (x float64, scale float64, rem string) {
162 | 	i := 0
163 | 	scale = 1
164 | 	overflow := false
165 | 	for ; i < len(s); i++ {
166 | 		c := s[i]
167 | 		if c < '0' || c > '9' {
168 | 			break
169 | 		}
170 | 		if overflow {
171 | 			continue
172 | 		}
173 | 		if x > (maxUnit-1)/10 {
174 | 			// It's possible for overflow to give a positive number, so take care.
175 | 			overflow = true
176 | 			continue
177 | 		}
178 | 		y := x*10 + float64(c) - '0'
179 | 		if y > maxUnit {
180 | 			overflow = true
181 | 			continue
182 | 		}
183 | 		x = y
184 | 		scale *= 10
185 | 	}
186 | 	return x, scale, s[i:]
187 | }
188 | 
189 | // Byte is a size of disk/memory/buffer capacities
190 | type Byte Unit
191 | 
192 | // Byte may have a multiplier suffix: b 512, kB 1000, K 1024, MB 1000*1000, M
193 | // 1024*1024, GB 1000*1000*1000, G 1024*1024*1024, and so on for T, P, E, Z, Y.
194 | // Binary prefixes can be used, too: KiB=K, MiB=M, and so on.
195 | // see https://man7.org/linux/man-pages/man1/head.1.html
196 | const (
197 | 	Block     Byte = 512              // b
198 | 	KiloByte       = 1000             // kB
199 | 	KibiByte       = 1024             // K/KiB
200 | 	MegaByte       = 1000 * KiloByte  // MB
201 | 	MebiByte       = 1024 * KibiByte  // M/MiB
202 | 	GigaByte       = 1000 * MegaByte  // GB
203 | 	GibiByte       = 1024 * MebiByte  // G/GiB
204 | 	TeraByte       = 1000 * GigaByte  // TB
205 | 	TebiByte       = 1024 * GibiByte  // T/TiB
206 | 	PetaByte       = 1000 * TeraByte  // PB
207 | 	PebiByte       = 1024 * TebiByte  // P/PiB
208 | 	ExaByte        = 1000 * PetaByte  // ZB
209 | 	ExbiByte       = 1024 * PebiByte  // Z/ZiB
210 | 	ZettaByte      = 1000 * ExaByte   // ZB
211 | 	ZebiByte       = 1024 * ExbiByte  // Z/ZiB
212 | 	YottaByte      = 1000 * ZettaByte // YB
213 | 	YobiByte       = 1024 * ZebiByte  // Y/YiB
214 | )
215 | 
216 | var ByteSuffixes = map[string]float64{
217 | 	"b":   float64(Block),
218 | 	"kB":  float64(KiloByte),
219 | 	"K":   float64(KibiByte),
220 | 	"KiB": float64(KibiByte),
221 | 	"MB":  float64(MegaByte),
222 | 	"M":   float64(MebiByte),
223 | 	"MiB": float64(MebiByte),
224 | 	"GB":  float64(GigaByte),
225 | 	"G":   float64(GibiByte),
226 | 	"GiB": float64(GibiByte),
227 | 	"TB":  float64(TeraByte),
228 | 	"T":   float64(TebiByte),
229 | 	"TiB": float64(TebiByte),
230 | 	"PB":  float64(PetaByte),
231 | 	"P":   float64(PebiByte),
232 | 	"PiB": float64(PebiByte),
233 | 	"EB":  float64(ExaByte),
234 | 	"E":   float64(ExbiByte),
235 | 	"EiB": float64(ExbiByte),
236 | 	"ZB":  float64(ZettaByte),
237 | 	"Z":   float64(ZebiByte),
238 | 	"ZiB": float64(ZebiByte),
239 | 	"YB":  float64(YottaByte),
240 | 	"Y":   float64(YobiByte),
241 | 	"YiB": float64(YobiByte),
242 | }
243 | 
244 | // ParseByte parses a byte definition.
245 | // A byte string is a possibly signed sequence of
246 | // decimal numbers, each with optional fraction and a unit suffix,
247 | // such as "300K", "-1.5MiB" or "1TB45GB".
248 | // Valid time units are "b" block 512, "kB" kilobyte 1000, "K", "KiB" kibibyte 1024
249 | // and so on for M, G, T, P, E, Z, Y
250 | func ParseByte(s string) (Byte, error) {
251 | 	u, err := parseUnit(ByteSuffixes, s)
252 | 	return Byte(u), err
253 | }
254 | 
255 | // https://pkg.go.dev/github.com/spf13/pflag#Value
256 | func (b Byte) String() string {
257 | 	// TODO: make it better
258 | 	return fmt.Sprintf("%.fB", float64(b))
259 | }
260 | 
261 | func (b Byte) Type() string {
262 | 	return "Byte"
263 | }
264 | 
265 | func (b *Byte) Set(value string) error {
266 | 	x, err := ParseByte(value)
267 | 	if err != nil {
268 | 		return err
269 | 	}
270 | 
271 | 	if float64(x) > float64(1<<63) {
272 | 		return fmt.Errorf("size overflow %f", math.Round(float64(x)))
273 | 	}
274 | 	*b = x
275 | 	return nil
276 | }
277 | 


--------------------------------------------------------------------------------
/cksum/cksum_test.go:
--------------------------------------------------------------------------------
  1 | package cksum_test
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"os"
  8 | 	"path/filepath"
  9 | 	"strings"
 10 | 	"testing"
 11 | 
 12 | 	"github.com/gomoni/gio/unix"
 13 | 	. "github.com/gomoni/gonix/cksum"
 14 | 	"github.com/gomoni/gonix/internal/test"
 15 | 	"github.com/stretchr/testify/require"
 16 | )
 17 | 
 18 | func TestCKSum(t *testing.T) {
 19 | 	test.Parallel(t)
 20 | 	testCases := []test.Case[CKSum]{
 21 | 		{
 22 | 			Name:     "default",
 23 | 			Filter:   fromArgs(t, nil),
 24 | 			Input:    "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n",
 25 | 			Expected: "1340348198 27 \n",
 26 | 		},
 27 | 		{
 28 | 			Name:     "default untagged",
 29 | 			Filter:   New().Untagged(false),
 30 | 			FromArgs: fromArgs(t, nil),
 31 | 			Input:    "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n",
 32 | 			Expected: "1340348198 27 \n",
 33 | 		},
 34 | 		{
 35 | 			Name:     "md5",
 36 | 			Filter:   New().Algorithm(MD5),
 37 | 			FromArgs: fromArgs(t, []string{"--algorithm", "md5"}),
 38 | 			Input:    "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n",
 39 | 			Expected: "MD5 (-) = f4699b80440c0403b31fce987f9cd8af\n",
 40 | 		},
 41 | 		{
 42 | 			Name:     "md5 untagged",
 43 | 			Filter:   New().Algorithm(MD5).Untagged(true),
 44 | 			FromArgs: fromArgs(t, []string{"--algorithm", "md5", "--untagged"}),
 45 | 			Input:    "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n",
 46 | 			Expected: "f4699b80440c0403b31fce987f9cd8af  -\n",
 47 | 		},
 48 | 	}
 49 | 
 50 | 	test.RunAll(t, testCases)
 51 | }
 52 | 
 53 | func initTemp(t *testing.T, name string) string {
 54 | 	t.Helper()
 55 | 	temp, err := os.MkdirTemp("", "gonix-cksum-check*")
 56 | 	require.NoError(t, err)
 57 | 	err = os.Chdir(temp)
 58 | 	require.NoError(t, err)
 59 | 	dest, err := os.Create(name)
 60 | 	require.NoError(t, err)
 61 | 	defer dest.Close()
 62 | 	src, err := os.Open(test.Testdata(t, name))
 63 | 	require.NoError(t, err)
 64 | 	defer src.Close()
 65 | 	_, err = io.Copy(dest, src)
 66 | 	require.NoError(t, err)
 67 | 	err = dest.Sync()
 68 | 	require.NoError(t, err)
 69 | 
 70 | 	spongef(t, name+".notag.md5", "%s  %s\n", "5f707e2a346cc0dac73e1323198a503c", name)
 71 | 	spongef(t, name+".tag.md5", "MD5 (%s) = %s\n", name, "5f707e2a346cc0dac73e1323198a503c")
 72 | 	spongef(t, name+".missing.file.tag.md5", "MD5 (%s) = %s\nMD5 (missing-file) = %s\n", name, "5f707e2a346cc0dac73e1323198a503c", "5f707e2a346cc0dac73e1323198a503c")
 73 | 	spongef(t, name+".notag.broken.md5", "%s  %s\n", "1f707e2a346cc0dac73e1323198a503c", name)
 74 | 	spongef(t, name+".tag.broken.md5", "MD5 (%s) = %s\n", name, "1f707e2a346cc0dac73e1323198a503c")
 75 | 
 76 | 	return temp
 77 | }
 78 | 
 79 | func spongef(t *testing.T, name string, format string, a ...any) {
 80 | 	t.Helper()
 81 | 	f, err := os.Create(name)
 82 | 	require.NoError(t, err)
 83 | 	defer f.Close()
 84 | 	_, err = fmt.Fprintf(f, format, a...)
 85 | 	require.NoError(t, err)
 86 | 	err = f.Sync()
 87 | 	require.NoError(t, err)
 88 | }
 89 | 
 90 | type emptyStdio struct{}
 91 | 
 92 | func (e emptyStdio) Stdin() io.Reader {
 93 | 	return noopReader{}
 94 | }
 95 | func (e emptyStdio) Stdout() io.Writer {
 96 | 	return io.Discard
 97 | }
 98 | func (e emptyStdio) Stderr() io.Writer {
 99 | 	return io.Discard
100 | }
101 | 
102 | type noopReader struct{}
103 | 
104 | func (noopReader) Read([]byte) (int, error) {
105 | 	return 0, nil
106 | }
107 | 
108 | func TestCheckCRC(t *testing.T) {
109 | 	test.Parallel(t)
110 | 	cksum := New().Check(true).Algorithm(CRC)
111 | 	err := cksum.Run(context.Background(), emptyStdio{})
112 | 	require.Error(t, err)
113 | 	require.EqualError(t, err, "--check is not supported with algorithm=crc")
114 | }
115 | 
116 | func TestCheck(t *testing.T) {
117 | 	test.Parallel(t)
118 | 
119 | 	temp := initTemp(t, "three-small-pigs")
120 | 	tsp := filepath.Join(temp, "three-small-pigs")
121 | 	t.Logf("temp=%q", temp)
122 | 	t.Cleanup(func() {
123 | 		err := os.RemoveAll(temp)
124 | 		require.NoError(t, err)
125 | 	})
126 | 
127 | 	testCases := []struct {
128 | 		name           string
129 | 		cksum          CKSum
130 | 		expectedStdout string
131 | 	}{
132 | 		{
133 | 			name:           "md5 untagged",
134 | 			cksum:          New().Check(true).Algorithm(MD5).Files(tsp + ".notag.md5"),
135 | 			expectedStdout: "three-small-pigs: OK\n",
136 | 		},
137 | 		{
138 | 			name:           "md5 untagged autodetect",
139 | 			cksum:          New().SetDebug(testing.Verbose()).Check(true).Files(tsp + ".notag.md5"),
140 | 			expectedStdout: "three-small-pigs: OK\n",
141 | 		},
142 | 		{
143 | 			name:           "md5 tagged",
144 | 			cksum:          New().SetDebug(testing.Verbose()).Check(true).Untagged(false).Algorithm(MD5).Files(tsp + ".tag.md5"),
145 | 			expectedStdout: "three-small-pigs: OK\n",
146 | 		},
147 | 		{
148 | 			name:           "md5 ignore missing",
149 | 			cksum:          New().SetDebug(testing.Verbose()).Check(true).IgnoreMissing(true).Algorithm(MD5).Files(tsp + ".missing.file.tag.md5"),
150 | 			expectedStdout: "three-small-pigs: OK\n",
151 | 		},
152 | 		{
153 | 			name:           "md5 quiet",
154 | 			cksum:          New().SetDebug(testing.Verbose()).Check(true).Quiet(true).Algorithm(MD5).Files(tsp + ".tag.md5"),
155 | 			expectedStdout: "",
156 | 		},
157 | 		{
158 | 			name:           "md5 status",
159 | 			cksum:          New().SetDebug(testing.Verbose()).Check(true).Status(true).Algorithm(MD5).Files(tsp + ".tag.md5"),
160 | 			expectedStdout: "",
161 | 		},
162 | 		// TODO: sha512 and blake2b autodetect
163 | 	}
164 | 
165 | 	for _, tt := range testCases {
166 | 		tt := tt
167 | 		t.Run(tt.name, func(t *testing.T) {
168 | 			test.Parallel(t)
169 | 
170 | 			var stdout strings.Builder
171 | 			var stderr strings.Builder
172 | 			stdio := unix.NewStdio(
173 | 				nil,
174 | 				&stdout,
175 | 				&stderr,
176 | 			)
177 | 
178 | 			err := tt.cksum.Run(context.Background(), stdio)
179 | 			t.Logf("stderr=%q", stderr.String())
180 | 			t.Logf("stdout=%q", stdout.String())
181 | 			require.NoError(t, err)
182 | 
183 | 			require.Equal(t, tt.expectedStdout, stdout.String())
184 | 		})
185 | 	}
186 | 
187 | 	// test errors
188 | 	errCases := []struct {
189 | 		name           string
190 | 		cksum          CKSum
191 | 		expectedError  string
192 | 		expectedStdout string
193 | 	}{
194 | 		{
195 | 			name:          "error --algorithm mismatch",
196 | 			cksum:         New().Check(true).Algorithm(SHA224).Files(tsp + ".tag.md5"),
197 | 			expectedError: "BadLineFormatError",
198 | 		},
199 | 		{
200 | 			name:          "error not found file",
201 | 			cksum:         New().Check(true).Algorithm(SHA224).Files(tsp + ".notfound.md5"),
202 | 			expectedError: "notfound.md5: no such file or directory",
203 | 		},
204 | 		{
205 | 			name:           "error mismatch tagged --algorithm NONE",
206 | 			cksum:          New().Check(true).Algorithm(NONE).Files(tsp + ".tag.broken.md5"),
207 | 			expectedStdout: "three-small-pigs: FAILED\n",
208 | 		},
209 | 		{
210 | 			name:           "error mismatch tagged --algorithm MD5",
211 | 			cksum:          New().Check(true).Algorithm(MD5).Files(tsp + ".tag.broken.md5"),
212 | 			expectedStdout: "three-small-pigs: FAILED\n",
213 | 		},
214 | 		{
215 | 			name:           "error mismatch untagged --algorithm NONE",
216 | 			cksum:          New().Check(true).Algorithm(NONE).Files(tsp + ".notag.broken.md5"),
217 | 			expectedStdout: "three-small-pigs: FAILED\n",
218 | 		},
219 | 		{
220 | 			name:           "error mismatch untagged --algorithm MD5",
221 | 			cksum:          New().Check(true).Algorithm(MD5).Files(tsp + ".notag.broken.md5"),
222 | 			expectedStdout: "three-small-pigs: FAILED\n",
223 | 		},
224 | 	}
225 | 
226 | 	for _, tt := range errCases {
227 | 		tt := tt
228 | 		t.Run(tt.name, func(t *testing.T) {
229 | 			test.Parallel(t)
230 | 			tt.cksum.SetDebug(testing.Verbose())
231 | 
232 | 			var stdout strings.Builder
233 | 			var stderr strings.Builder
234 | 			stdio := unix.NewStdio(
235 | 				nil,
236 | 				&stdout,
237 | 				&stderr,
238 | 			)
239 | 
240 | 			err := tt.cksum.Run(context.Background(), stdio)
241 | 			require.Error(t, err)
242 | 
243 | 			if tt.expectedError != "" {
244 | 				require.True(t, strings.Contains(err.Error(), tt.expectedError))
245 | 			} else if tt.expectedStdout != "" {
246 | 				require.Equal(t, tt.expectedStdout, stdout.String())
247 | 			} else {
248 | 				t.Fatalf("test case %q does not check error neither stdout", tt.name)
249 | 			}
250 | 		})
251 | 	}
252 | 
253 | }
254 | 
255 | func fromArgs(t *testing.T, argv []string) CKSum {
256 | 	t.Helper()
257 | 	n := New()
258 | 	f, err := n.FromArgs(argv)
259 | 	require.NoError(t, err)
260 | 	return f
261 | }
262 | 


--------------------------------------------------------------------------------
/x/tr/tr.go:
--------------------------------------------------------------------------------
  1 | package tr
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"context"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"strconv"
  9 | 	"strings"
 10 | 	"unicode"
 11 | 	"unicode/utf8"
 12 | 
 13 | 	"github.com/gomoni/gio/unix"
 14 | 	"github.com/gomoni/gonix/internal"
 15 | 	"github.com/gomoni/gonix/internal/dbg"
 16 | )
 17 | 
 18 | /*
 19 |    tr - translate or replace runes
 20 | 
 21 |    Working on runes makes it backward compatible with POSIX tr and supports
 22 |    utf-8 well. Ignores unicode combining characters though, user is expected to use NFC
 23 |    forms of input.
 24 | 
 25 |    Status:
 26 |    * DONE:   --delete and --delete --complement for all characters, character sets and escape characters
 27 |    * TODO: translate aka ARRAY2
 28 | 
 29 | 
 30 |  7. [CHAR*]
 31 |  8. [CHAR*REPEAT]
 32 | */
 33 | 
 34 | /*
 35 | Notes for an implementation:
 36 |  1. translate array1 -> array2
 37 |       if len(array1) > len(array2) - make array2 repeat last rune
 38 |       ix -> X becomes {'i': 'X', 'x': 'X'}
 39 |  2. translate a complement
 40 |  3. delete
 41 |  4. squeeze
 42 |  5. \NNN + \\ \a et \v
 43 |  6. CHAR1-CHAR2
 44 |  7. [CHAR*]
 45 |  8. [CHAR*REPEAT]
 46 |  9. [:alnum:] to [:xdigit:]
 47 |  10. equivalence classes[=CHAR=]
 48 | 	Although equivalence classes are intended to support non-English alphabets, there seems to be no standard way to define them or determine their contents. Therefore, they are not fully implemented in GNU tr; each character’s equivalence class consists only of that character, which is of no particular use.
 49 | 
 50 |  tr: when translating, the only character classes that may appear in
 51 |   string2 are 'upper' and 'lower'
 52 |  so [:upper:] -> [:lower:] or vice versa is all what is enabled
 53 |  tr '1[:upper:]' [:lower:] -> misaligned
 54 |  tr '1[:upper:]' 2[:lower:] is fine
 55 |  tr 'h[:upper:]' X[:lower:] is fine
 56 |  tr '[:lower:]e' [:upper:]x -> first replace lower case to upper case except e, which is replaced by x        -> HxLLO WORLD
 57 | 
 58 |  combining leads to weird results
 59 |  "2 Hello world" | tr [:alpha:][:digit:] XY
 60 |   Y YYYYY YYYYY
 61 | */
 62 | 
 63 | type Tr struct {
 64 | 	debug      bool
 65 | 	array1     string
 66 | 	array2     string
 67 | 	complement bool // use complement of ARRAY1
 68 | 	del        bool // delete characters in ARRAY1
 69 | 	//truncate   bool       // TODO
 70 | 	files []string
 71 | }
 72 | 
 73 | func New() Tr {
 74 | 	return Tr{}
 75 | }
 76 | 
 77 | func (c Tr) Array1(in string) Tr {
 78 | 	c.array1 = in
 79 | 	return c
 80 | }
 81 | 
 82 | func (c Tr) Array2(in string) Tr {
 83 | 	c.array2 = in
 84 | 	return c
 85 | }
 86 | 
 87 | func (c Tr) Complement(b bool) Tr {
 88 | 	c.complement = b
 89 | 	return c
 90 | }
 91 | 
 92 | func (c Tr) Delete(b bool) Tr {
 93 | 	c.del = b
 94 | 	return c
 95 | }
 96 | 
 97 | func (c Tr) Run(ctx context.Context, stdio unix.StandardIO) error {
 98 | 	c.debug = true
 99 | 	debug := dbg.Logger(c.debug, "tr", stdio.Stderr())
100 | 	var chain chain
101 | 	if c.del {
102 | 		trs, err := c.makeDelChain(c.array1)
103 | 		if err != nil {
104 | 			return err
105 | 		}
106 | 		chain.trs = trs
107 | 		debug.Printf("trs=%#v", trs)
108 | 	} else {
109 | 		if c.complement {
110 | 			panic("--complement for translate is not implemented")
111 | 		}
112 | 		trs, err := c.makeTrChain(c.array1, c.array2)
113 | 		if err != nil {
114 | 			return err
115 | 		}
116 | 		chain.trs = trs
117 | 		debug.Printf("trs=%#v", trs)
118 | 	}
119 | 
120 | 	var trFunc = chain.Tr
121 | 	if c.complement {
122 | 		trFunc = chain.Complement
123 | 	}
124 | 
125 | 	tr := func(ctx context.Context, stdio unix.StandardIO, _ int, _ string) error {
126 | 		scanner := bufio.NewScanner(stdio.Stdin())
127 | 		stdout := bufio.NewWriterSize(stdio.Stdout(), 4096)
128 | 		defer stdout.Flush()
129 | 		scanner.Split(bufio.ScanRunes)
130 | 		for scanner.Scan() {
131 | 			if scanner.Err() != nil {
132 | 				return scanner.Err()
133 | 			}
134 | 			in, _ := utf8.DecodeRuneInString(scanner.Text())
135 | 			rn, _ := trFunc(in)
136 | 			if rn == -1 {
137 | 				continue
138 | 			}
139 | 			_, err := writeRune(stdout, rn)
140 | 			if err != nil {
141 | 				return err
142 | 			}
143 | 		}
144 | 		return nil
145 | 	}
146 | 	runFiles := internal.NewRunFiles(c.files, stdio, tr)
147 | 	return runFiles.Do(ctx)
148 | }
149 | 
150 | type trPred func(rune) bool
151 | 
152 | // trMap translates a rune to another rune
153 | type trMap map[rune]rune
154 | 
155 | func (s trMap) in(in rune) bool {
156 | 	_, ok := s[in]
157 | 	return ok
158 | }
159 | 
160 | // tr interface
161 | func (s trMap) Tr(in rune) (rune, bool) {
162 | 	to, ok := s[in]
163 | 	return to, ok
164 | }
165 | func (s trMap) Complement(in rune) (rune, bool) {
166 | 	panic("trMap --complement is not yet supported")
167 | }
168 | 
169 | // [:alnum:]
170 | func alnum(in rune) bool {
171 | 	return unicode.IsLetter(in) || unicode.IsDigit(in)
172 | }
173 | 
174 | // [:alpha:]
175 | func alpha(in rune) bool {
176 | 	return unicode.IsLetter(in)
177 | }
178 | 
179 | // [:blank:]
180 | func blank(in rune) bool {
181 | 	return in == ' ' || in == '\t'
182 | }
183 | 
184 | // [:cntrl:]
185 | func cntrl(in rune) bool {
186 | 	return unicode.IsControl(in)
187 | }
188 | 
189 | // [:digit:]
190 | func digit(in rune) bool {
191 | 	return unicode.IsDigit(in)
192 | }
193 | 
194 | // [:graph:]
195 | func graph(in rune) bool {
196 | 	return unicode.IsPrint(in) && in != ' '
197 | }
198 | 
199 | // [:lower:]
200 | func lower(in rune) bool {
201 | 	return unicode.IsLower(in)
202 | }
203 | 
204 | // [:prnt:]
205 | func prnt(in rune) bool {
206 | 	return unicode.IsPrint(in)
207 | }
208 | 
209 | // [:punct:]
210 | func punct(in rune) bool {
211 | 	return unicode.IsPunct(in)
212 | }
213 | 
214 | // [:space:]
215 | func space(in rune) bool {
216 | 	return unicode.Is(unicode.White_Space, in)
217 | }
218 | 
219 | // [:upper:]
220 | func upper(in rune) bool {
221 | 	return unicode.IsUpper(in)
222 | }
223 | 
224 | // [:xdigit:]
225 | func xdigit(in rune) bool {
226 | 	return unicode.IsDigit(in) || (in >= 'a' && in <= 'f') || (in >= 'A' && in <= 'F')
227 | }
228 | 
229 | // delTr implements tr interface for --delete and --delete --complement operations
230 | type delTr struct {
231 | 	pred trPred
232 | 	name string
233 | }
234 | 
235 | func (t delTr) Tr(in rune) (rune, bool) {
236 | 	if ok := t.pred(in); ok {
237 | 		return -1, true
238 | 	}
239 | 	return in, false
240 | }
241 | 
242 | func (t delTr) Complement(in rune) (rune, bool) {
243 | 	if ok := t.pred(in); ok {
244 | 		return in, true
245 | 	}
246 | 	return -1, false
247 | }
248 | 
249 | type tr interface {
250 | 	// Tr translate rune to other rune and returns true if it was done
251 | 	// -1 means rune is not going to be written
252 | 	Tr(rune) (rune, bool)
253 | 	Complement(rune) (rune, bool)
254 | }
255 | 
256 | type chain struct {
257 | 	trs []tr
258 | }
259 | 
260 | func (t chain) Tr(in rune) (rune, bool) {
261 | 	for _, tr := range t.trs {
262 | 		dst, found := tr.Tr(in)
263 | 		if !found {
264 | 			continue
265 | 		}
266 | 		return dst, true
267 | 	}
268 | 	// pass
269 | 	return in, true
270 | }
271 | 
272 | func (t chain) Complement(in rune) (rune, bool) {
273 | 	var dst rune
274 | 	for _, tr := range t.trs {
275 | 		var found bool
276 | 		dst, found = tr.Complement(in)
277 | 		if found {
278 | 			return dst, true
279 | 		}
280 | 	}
281 | 	// pass
282 | 	return dst, true
283 | }
284 | 
285 | // https://cs.opensource.google/go/go/+/refs/tags/go1.19.1:src/strings/builder.go;l=104
286 | // WriteRune appends the UTF-8 encoding of Unicode code point r to b's buffer.
287 | // It returns the length of r and a nil error.
288 | func writeRune(w io.Writer, r rune) (int, error) {
289 | 	// Compare as uint32 to correctly handle negative runes.
290 | 	if uint32(r) < utf8.RuneSelf {
291 | 		return w.Write([]byte{byte(r)})
292 | 	}
293 | 
294 | 	var buf [utf8.UTFMax]byte
295 | 	n := utf8.EncodeRune(buf[:], r)
296 | 	return w.Write(buf[:n])
297 | }
298 | 
299 | /*
300 |    #########       parse command line      #################
301 | */
302 | 
303 | var trClasses = map[string]trPred{
304 | 	"alnum":  alnum,
305 | 	"alpha":  alpha,
306 | 	"blank":  blank,
307 | 	"cntrl":  cntrl,
308 | 	"digit":  digit,
309 | 	"graph":  graph,
310 | 	"lower":  lower,
311 | 	"print":  prnt,
312 | 	"punct":  punct,
313 | 	"space":  space,
314 | 	"upper":  upper,
315 | 	"xdigit": xdigit,
316 | }
317 | 
318 | // makeDelChain parse ARRAY1 to generate a proper tr chain for --delete
319 | func (c Tr) makeDelChain(array1 string) ([]tr, error) {
320 | 	sprintf := func(string, ...any) string { return "" }
321 | 	if c.debug {
322 | 		if c.complement {
323 | 			sprintf = func(f string, a ...any) string { return fmt.Sprintf("! "+f, a) }
324 | 		} else {
325 | 			sprintf = func(f string, a ...any) string { return fmt.Sprintf(f, a) }
326 | 		}
327 | 	}
328 | 
329 | 	ret := make([]tr, 0, 10)
330 | 	globalSet := make(trMap)
331 | 
332 | 	in := newRunes(array1)
333 | 
334 | 	for idx := 0; idx < len(in); idx++ {
335 | 
336 | 		if in.at(idx) == '\\' {
337 | 			goto singleChar
338 | 		}
339 | 
340 | 		if klass, next := in.klass(idx); klass != "" {
341 | 			in, ok := trClasses[klass]
342 | 			if !ok {
343 | 				return nil, fmt.Errorf("invalid character class %q", klass)
344 | 			}
345 | 			ret = append(ret, delTr{
346 | 				pred: in,
347 | 				name: sprintf("[:%s:]", klass)},
348 | 			)
349 | 			idx = next
350 | 			continue
351 | 		}
352 | 
353 | 		if equiv, next := in.equiv(idx); equiv != -1 {
354 | 			idx = next
355 | 			globalSet[equiv] = -1
356 | 			continue
357 | 		}
358 | 
359 | 		if from, to, next := in.set(idx); next != idx {
360 | 			set := make(trMap, int(to-from))
361 | 			for rn := from; rn > to; rn++ {
362 | 				set[rn] = -1
363 | 			}
364 | 			ret = append(ret, delTr{pred: set.in, name: sprintf("%c-%c", to, from)})
365 | 			idx = next
366 | 			continue
367 | 		}
368 | 
369 | 	singleChar:
370 | 		rn, next, err := in.charAt(idx)
371 | 		if err != nil {
372 | 			return nil, err
373 | 		}
374 | 		globalSet[rn] = -1
375 | 		idx = next
376 | 	}
377 | 
378 | 	if len(globalSet) != 0 {
379 | 		name := ""
380 | 		if c.debug {
381 | 			var sb strings.Builder
382 | 			sb.Grow(len(globalSet))
383 | 			for rn := range globalSet {
384 | 				_, _ = writeRune(&sb, rn)
385 | 			}
386 | 			name = fmt.Sprintf("%+v", sb.String())
387 | 		}
388 | 		ret = append(ret, delTr{pred: globalSet.in, name: name})
389 | 	}
390 | 
391 | 	return ret, nil
392 | }
393 | 
394 | func equivInArray2(e rune, _ int) error {
395 | 	return fmt.Errorf("[=%c=] character equivalence cannot appear in array2", e)
396 | }
397 | 
398 | var errNoUpperNeitherLower error = fmt.Errorf("allowed character classes in array2 are UPPER and lower only")
399 | var errMisalignedUpperAndLower error = fmt.Errorf("misaligned UPPER and lower classes")
400 | 
401 | // makeTrChain parse ARRAY1 and ARRAY2 to generate a proper tr chain for translation
402 | func (c Tr) makeTrChain(array1, array2 string) ([]tr, error) {
403 | 	if len(array1) == 0 {
404 | 		return nil, fmt.Errorf("array1 is empty")
405 | 	}
406 | 	if len(array2) == 0 {
407 | 		return nil, fmt.Errorf("array2 is empty")
408 | 	}
409 | 
410 | 	if c.complement {
411 | 		panic("tr --complement is not yet implemented")
412 | 	}
413 | 	sprintf := func(string, ...any) string { return "" }
414 | 	if c.debug {
415 | 		if c.complement {
416 | 			panic("tr --complement is not yet implemented")
417 | 			//sprintf = func(f string, a ...any) string { return fmt.Sprintf("! "+f, a) }
418 | 		} else {
419 | 			sprintf = func(f string, a ...any) string { return fmt.Sprintf(f, a) }
420 | 		}
421 | 	}
422 | 
423 | 	ret := make([]tr, 0, 10)
424 | 	globalSet := make(trMap)
425 | 
426 | 	in1 := newRunes(array1)
427 | 	in2 := newRunes(array2)
428 | 	var lastIn2 rune
429 | 
430 | 	idx2 := 0
431 | 	for idx1 := 0; idx1 < len(in1); idx1++ {
432 | 		if in1.at(idx1) == '\\' {
433 | 			goto singleChar
434 | 		}
435 | 
436 | 		if klass, _ := in1.klass(idx1); klass != "" {
437 | 			sprintf(klass)
438 | 			panic("character classes for tr are not yet implemented")
439 | 		}
440 | 
441 | 		if equiv, _ := in1.equiv(idx1); equiv != -1 {
442 | 			goto singleChar
443 | 		}
444 | 
445 | 		if _, _, next := in1.set(idx1); next != idx1 {
446 | 			panic("ranges/sets for tr are not yet implemented")
447 | 		}
448 | 
449 | 	singleChar:
450 | 		from, next, err := in1.charOrEquivAt(idx1)
451 | 		if err != nil {
452 | 			return nil, err
453 | 		}
454 | 		idx1 = next
455 | 
456 | 		switch in2.typ2(idx2) {
457 | 		case NONE:
458 | 			// pass
459 | 		case CHAR:
460 | 			to, next, err := in2.charAt(idx2)
461 | 			if err != nil {
462 | 				return nil, err
463 | 			}
464 | 			lastIn2 = to
465 | 			idx2 = next + 1
466 | 		case EQUIV:
467 | 			return nil, equivInArray2(in2.equiv(idx2))
468 | 		case KLASS:
469 | 			klass, _ := in2.klass(idx2)
470 | 			if klass == "lower" || klass == "upper" {
471 | 				return nil, errMisalignedUpperAndLower
472 | 			}
473 | 			return nil, errNoUpperNeitherLower
474 | 		case REPEAT:
475 | 			panic("[CHAR*] is not yet implemented")
476 | 		}
477 | 		globalSet[from] = lastIn2
478 | 		continue
479 | 	}
480 | 
481 | 	if len(globalSet) != 0 {
482 | 		/*
483 | 			        name := ""
484 | 					if c.debug {
485 | 						var sb strings.Builder
486 | 						sb.Grow(len(globalSet))
487 | 						for rn := range globalSet {
488 | 							_, _ = writeRune(&sb, rn)
489 | 						}
490 | 						name = fmt.Sprintf("%+v", sb.String())
491 | 					}
492 | 		*/
493 | 		ret = append(ret, globalSet)
494 | 	}
495 | 
496 | 	return ret, nil
497 | }
498 | 
499 | // safeRunes is a helper for []rune, gracefully handle out of bound access
500 | // and provides various parsing helpers
501 | type safeRunes []rune
502 | 
503 | func newRunes(s string) safeRunes {
504 | 	ret := make([]rune, utf8.RuneCountInString(s))
505 | 	for idx, rn := range s {
506 | 		ret[idx] = rn
507 | 	}
508 | 	return ret
509 | }
510 | 
511 | func (s safeRunes) at(idx int) rune {
512 | 	if idx < 0 || idx >= len(s) {
513 | 		return -1
514 | 	}
515 | 	return s[idx]
516 | }
517 | 
518 | type typ uint8
519 | 
520 | const (
521 | 	NONE   typ = 0
522 | 	CHAR   typ = 1
523 | 	KLASS  typ = 2
524 | 	EQUIV  typ = 3
525 | 	REPEAT typ = 4
526 | )
527 | 
528 | func (s safeRunes) typ(idx int) typ {
529 | 	if idx < 0 || idx >= len(s) {
530 | 		return NONE
531 | 	}
532 | 	if klass, _ := s.klass(idx); klass != "" {
533 | 		return KLASS
534 | 	}
535 | 	if equiv, _ := s.equiv(idx); equiv != -1 {
536 | 		return EQUIV
537 | 	}
538 | 	if s.at(idx) == '\\' {
539 | 		if _, _, err := s.sequence(idx); err != nil {
540 | 			return CHAR
541 | 		}
542 | 	}
543 | 	return CHAR
544 | }
545 | 
546 | // typ2 is variant for typ except it recognize REPEAT type compatible with ARRAY2
547 | // the [CHAR*] is recognized as CHAR1 '[' via typ
548 | func (s safeRunes) typ2(idx int) typ {
549 | 	typ := s.typ(idx)
550 | 	if typ == CHAR && s.at(idx) == '[' {
551 | 		if rn, _, _ := s.repeat(idx); rn != -1 {
552 | 			return REPEAT
553 | 		}
554 | 	}
555 | 	return typ
556 | }
557 | 
558 | func (s safeRunes) lookAhead(from int, needle rune) int {
559 | 	if from < 0 || from > len(s) {
560 | 		return -1
561 | 	}
562 | 	for idx := from; idx != len(s); idx++ {
563 | 		if s[idx] == needle {
564 | 			return idx
565 | 		}
566 | 	}
567 | 	return -1
568 | }
569 | 
570 | // [:class:] support, returns a string as `class` and index of ] in slice
571 | func (s safeRunes) klass(from int) (string, int) {
572 | 	if s.at(from) == '[' && s.at(from+1) == ':' {
573 | 		if colIdx := s.lookAhead(from+2, ':'); colIdx != -1 {
574 | 			if s.at(colIdx+1) == ']' {
575 | 				return s.substr(from+2, colIdx), colIdx + 1
576 | 			}
577 | 		}
578 | 	}
579 | 	return "", from
580 | }
581 | 
582 | // [=C=] support, returns a string as `C` and a idex of ] in slice
583 | func (s safeRunes) equiv(from int) (rune, int) {
584 | 	if s.at(from) == '[' && s.at(from+1) == '=' && s.at(from+3) == '=' && s.at(from+4) == ']' {
585 | 		return s[from+2], from + 4
586 | 	}
587 | 	return -1, from
588 | }
589 | 
590 | // [CHAR*] or [CHAR*REPEAT] returns a char, repeats (0 means infinity) and index of last ] in slice
591 | func (s safeRunes) repeat(from int) (rune, int, int) {
592 | 	if s.at(from) == '[' {
593 | 		if rn, from, err := s.charAt(from + 1); err != nil {
594 | 			if s.at(from+1) == '*' && s.at(from+2) == ']' {
595 | 				return rn, 0, from + 2
596 | 			}
597 | 			if repeat, from, err := s.numberAt(from); err != nil {
598 | 				if s.at(from+1) == ']' {
599 | 					return rn, repeat, from + 1
600 | 				}
601 | 			}
602 | 		}
603 | 	}
604 | 	return -1, 0, from
605 | }
606 | 
607 | func (s safeRunes) numberAt(from int) (int, int, error) {
608 | 	start := from
609 | 	to := from
610 | 	base := 10
611 | 	if s.at(from) == '0' {
612 | 		base = 8
613 | 		start += 1
614 | 	}
615 | 	for {
616 | 		if digit(s.at(from)) {
617 | 			to++
618 | 			continue
619 | 		}
620 | 		break
621 | 	}
622 | 	n, err := strconv.ParseInt(string(s[start:to+1]), base, 32)
623 | 	if err != nil {
624 | 		return 0, from, err
625 | 	}
626 | 	return int(n), to + 1, nil
627 | }
628 | 
629 | // return a character at index or error - supports escape sequences too
630 | func (s safeRunes) charAt(from int) (rune, int, error) {
631 | 	if s.at(from) == '\\' {
632 | 		return s.sequence(from)
633 | 	}
634 | 	rn := s.at(from)
635 | 	if rn == -1 {
636 | 		return rn, from, fmt.Errorf("charAt index %d our of range <0;%d>", from, len(s)-1)
637 | 	}
638 | 	return rn, from, nil
639 | }
640 | 
641 | // return a character or equivalence class at index or error - supports escape sequences too
642 | func (s safeRunes) charOrEquivAt(from int) (rune, int, error) {
643 | 	switch s.typ(from) {
644 | 	case CHAR:
645 | 		return s.charAt(from)
646 | 	case EQUIV:
647 | 		e, next := s.equiv(from)
648 | 		return e, next, nil
649 | 	default:
650 | 		return -1, from, fmt.Errorf("Unsupported type, expected 1 - CHAR or 3 - EQUIV, got %d", s.typ(from))
651 | 	}
652 | }
653 | 
654 | func (s safeRunes) sequence(from int) (rune, int, error) {
655 | 	if s.at(from) != '\\' {
656 | 		return -1, from, fmt.Errorf("can't interpret as a sequence: missing \\ at the start")
657 | 	}
658 | 	if octal(s.at(from+1)) && octal(s.at(from+2)) && octal(s.at(from+3)) {
659 | 		n, err := strconv.ParseInt(s.substr(from+1, from+4), 8, 32)
660 | 		if err != nil {
661 | 			return -1, from, fmt.Errorf("can't parse octal sequence: %w", err)
662 | 		}
663 | 		return rune(n), from + 3, nil
664 | 	}
665 | 	switch s.at(from + 1) {
666 | 	case '\\':
667 | 		return '\\', from + 1, nil
668 | 	case 'a':
669 | 		return '\a', from + 1, nil
670 | 	case 'b':
671 | 		return '\b', from + 1, nil
672 | 	case 'f':
673 | 		return '\f', from + 1, nil
674 | 	case 'n':
675 | 		return '\n', from + 1, nil
676 | 	case 'r':
677 | 		return '\r', from + 1, nil
678 | 	case 't':
679 | 		return '\t', from + 1, nil
680 | 	case 'v':
681 | 		return '\v', from + 1, nil
682 | 	default:
683 | 		return -1, from, fmt.Errorf("can't interpret sequence \\%c", s.at(from+1))
684 | 	}
685 | }
686 | 
687 | func octal(rn rune) bool {
688 | 	return rn >= '0' && rn <= '7'
689 | }
690 | 
691 | // character set - returns rune from, rune to and index of last processed rune in slice
692 | // characters must be in ascending order, so first rune is always smaller than second
693 | func (s safeRunes) set(from int) (rune, rune, int) {
694 | 	if s.at(from+1) != '-' {
695 | 		return -1, -1, from
696 | 	}
697 | 	start := s.at(from)
698 | 	stop := s.at(from + 2)
699 | 	if start < stop {
700 | 		return start, stop, from + 2
701 | 	}
702 | 	return -1, -1, from
703 | }
704 | 
705 | func (s safeRunes) substr(from, to int) string {
706 | 	if from >= to || from <= 0 || to > len(s) {
707 | 		return ""
708 | 	}
709 | 	var sb strings.Builder
710 | 	sb.Grow(to - from)
711 | 	for idx := from; idx != to; idx++ {
712 | 		_, err := sb.WriteRune(s[idx])
713 | 		if err != nil {
714 | 			return ""
715 | 		}
716 | 	}
717 | 	return sb.String()
718 | }
719 | 


--------------------------------------------------------------------------------
/cksum/cksum.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 Michal Vyskocil. All rights reserved.
  2 | // Use of this source code is governed by a MIT
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Contains portions of cksum.c from suckless sbase under MIT license
  6 | // https://git.suckless.org/sbase/file/LICENSE.html
  7 | 
  8 | /*
  9 | check is surprisingly complex problem
 10 | 
 11 | what is implemented
 12 | ✅ check don't work with crc
 13 | ✅ check works with recent hashes like md5 or sha
 14 | ✅ untagged format
 15 | ✅ untagged format requires explicit --algorithm switch
 16 | ✅ tagged format - happy path
 17 | ✅ --check --algorithm returns no properly formatted lines error for a different hash
 18 | 🚀 autodetect hash for untagged format - this include sha512 and blake2b
 19 | 🚀 parallel check or checksums from one file limited by -j/--threds, defaults to GOMAXPROC
 20 |  parallel generation of checksums
 21 | 🚀 parallel check or checksums from one file limited by -j/--threds, defaults to GOMAXPROC
 22 | 
 23 | 
 24 | what is not (yet)
 25 | ❌ GNU options:
 26 |     -l/--length
 27 |     -z/--zero
 28 |     --strict    - cksum will return 1 by default
 29 |     -w/--warn
 30 |     --debug
 31 | ❌ everything around warns and counting various errors - this looks to be completely random at least
 32 | 
 33 | */
 34 | 
 35 | package cksum
 36 | 
 37 | import (
 38 | 	"bufio"
 39 | 	"context"
 40 | 	"crypto/md5"
 41 | 	"crypto/sha1"
 42 | 	"crypto/sha256"
 43 | 	"crypto/sha512"
 44 | 	"encoding/hex"
 45 | 	"errors"
 46 | 	"fmt"
 47 | 	"io"
 48 | 	"log"
 49 | 	"os"
 50 | 	"regexp"
 51 | 	"runtime"
 52 | 	"strings"
 53 | 
 54 | 	"github.com/gomoni/gio/pipe"
 55 | 	"github.com/gomoni/gio/unix"
 56 | 	"github.com/gomoni/gonix/internal"
 57 | 	"github.com/gomoni/gonix/internal/dbg"
 58 | 	"github.com/spf13/pflag"
 59 | 	"golang.org/x/crypto/blake2b"
 60 | )
 61 | 
 62 | type Algorithm int
 63 | 
 64 | const (
 65 | 	NONE Algorithm = 0
 66 | 	//sysv algorithm = 1
 67 | 	//bsd  algorithm = 2
 68 | 	CRC     Algorithm = 3
 69 | 	MD5     Algorithm = 4
 70 | 	SHA1    Algorithm = 5
 71 | 	SHA224  Algorithm = 6
 72 | 	SHA256  Algorithm = 7
 73 | 	SHA384  Algorithm = 8
 74 | 	SHA512  Algorithm = 9
 75 | 	BLAKE2B Algorithm = 10
 76 | )
 77 | 
 78 | // https://pkg.go.dev/github.com/spf13/pflag#Value
 79 | func (a Algorithm) String() string {
 80 | 	switch a {
 81 | 	case CRC:
 82 | 		return `crc`
 83 | 	case MD5:
 84 | 		return `md5`
 85 | 	case SHA1:
 86 | 		return `sha1`
 87 | 	case SHA224:
 88 | 		return `sha224`
 89 | 	case SHA256:
 90 | 		return `sha256`
 91 | 	case SHA384:
 92 | 		return `sha1384`
 93 | 	case SHA512:
 94 | 		return `sha512`
 95 | 	case BLAKE2B:
 96 | 		return `blake2b`
 97 | 	default:
 98 | 		return `!unknown`
 99 | 	}
100 | }
101 | 
102 | func (a Algorithm) Type() string {
103 | 	return "algorithm"
104 | }
105 | 
106 | func (a *Algorithm) Set(value string) error {
107 | 	switch value {
108 | 	case `crc`:
109 | 		*a = CRC
110 | 	case `md5`:
111 | 		*a = MD5
112 | 	case `sha1`:
113 | 		*a = SHA1
114 | 	case `sha224`:
115 | 		*a = SHA224
116 | 	case `sha256`:
117 | 		*a = SHA256
118 | 	case `sha384`:
119 | 		*a = SHA384
120 | 	case `sha512`:
121 | 		*a = SHA512
122 | 	case `blake2b`:
123 | 		*a = BLAKE2B
124 | 	default:
125 | 		return fmt.Errorf("invalid argument %q for --algorithm", value)
126 | 	}
127 | 	return nil
128 | }
129 | 
130 | type CKSum struct {
131 | 	threads       uint
132 | 	debug         bool
133 | 	algorithm     Algorithm
134 | 	check         bool
135 | 	untagged      bool
136 | 	ignoreMissing bool
137 | 	quiet         bool
138 | 	status        bool
139 | 	files         []string
140 | }
141 | 
142 | func New() CKSum {
143 | 	return CKSum{}
144 | }
145 | 
146 | // Files are input files, where - denotes stdin
147 | func (c CKSum) Files(f ...string) CKSum {
148 | 	c.files = append(c.files, f...)
149 | 	return c
150 | }
151 | 
152 | func (c CKSum) Algorithm(algorithm Algorithm) CKSum {
153 | 	c.algorithm = algorithm
154 | 	return c
155 | }
156 | 
157 | func (c CKSum) Check(check bool) CKSum {
158 | 	c.check = true
159 | 	return c
160 | }
161 | 
162 | func (c CKSum) IgnoreMissing(ignoreMissing bool) CKSum {
163 | 	c.ignoreMissing = ignoreMissing
164 | 	return c
165 | }
166 | 
167 | func (c CKSum) Parallel(limit uint) CKSum {
168 | 	c.threads = limit
169 | 	return c
170 | }
171 | 
172 | func (c CKSum) Quiet(quiet bool) CKSum {
173 | 	c.quiet = quiet
174 | 	return c
175 | }
176 | 
177 | func (c CKSum) Untagged(untagged bool) CKSum {
178 | 	c.untagged = untagged
179 | 	return c
180 | }
181 | 
182 | func (c CKSum) Status(status bool) CKSum {
183 | 	c.status = status
184 | 	return c
185 | }
186 | func (c CKSum) SetDebug(debug bool) CKSum {
187 | 	c.debug = debug
188 | 	return c
189 | }
190 | 
191 | func (c CKSum) FromArgs(argv []string) (CKSum, error) {
192 | 	flag := pflag.FlagSet{}
193 | 	var algorithm Algorithm = NONE
194 | 	flag.VarP(&algorithm, "algorithm", "a", "checksum algorithm to use, crc is default")
195 | 	check := flag.BoolP("check", "c", false, "check checksums from file(s)")
196 | 	_ = flag.Bool("tag", true, "create BSD style checksum (default)")
197 | 	untagged := flag.Bool("untagged", false, "create checksum without digest type")
198 | 	ignoreMissing := flag.Bool("ignore-missing", false, "ignore missing files")
199 | 	quiet := flag.Bool("quiet", false, "do not print OK for every verified file")
200 | 	status := flag.Bool("status", false, "report status code only")
201 | 
202 | 	// GNU is not consistent with parallel naming (make uses -j/--jobs, xargs -P and so
203 | 	// used -j/--threads as ripgrep does
204 | 	var threads uint
205 | 	flag.UintVarP(&threads, "threads", "j", 0, "generate or check using N goroutines, 0 equals GOMAXPROCS")
206 | 	err := flag.Parse(argv)
207 | 	if err != nil {
208 | 		return CKSum{}, pipe.NewErrorf(1, "cksum: parsing failed: %w", err)
209 | 	}
210 | 
211 | 	if len(flag.Args()) > 0 {
212 | 		c.files = flag.Args()
213 | 	}
214 | 
215 | 	c.algorithm = algorithm
216 | 	if c.algorithm == CRC || *untagged {
217 | 		c.untagged = true
218 | 	}
219 | 	c.check = *check
220 | 	c.ignoreMissing = *ignoreMissing
221 | 	c.quiet = *quiet
222 | 	c.status = *status
223 | 	c.threads = threads
224 | 	return c, nil
225 | }
226 | 
227 | func (c CKSum) Run(ctx context.Context, stdio unix.StandardIO) error {
228 | 	debug := dbg.Logger(c.debug, "cksum", stdio.Stderr())
229 | 	if c.threads == 0 {
230 | 		c.threads = uint(runtime.GOMAXPROCS(0))
231 | 	}
232 | 	debug.Printf("running with --threads %d", c.threads)
233 | 
234 | 	if c.check {
235 | 		debug.Printf("about to call c.checkSum")
236 | 		return c.checkSum(ctx, stdio, debug)
237 | 	}
238 | 	return c.makeSum(ctx, stdio, debug)
239 | }
240 | 
241 | func (c CKSum) makeSum(ctx context.Context, stdio unix.StandardIO, _ *log.Logger) error {
242 | 	if c.algorithm == NONE {
243 | 		c.algorithm = CRC
244 | 	}
245 | 
246 | 	var makeSum func(context.Context, unix.StandardIO, int, string) error
247 | 
248 | 	switch c.algorithm {
249 | 	case CRC:
250 | 		makeSum = func(ctx context.Context, stdio unix.StandardIO, _ int, name string) error {
251 | 			cksum, size, err := docrc(ctx, func() simpleHash { return &crc{} }, stdio.Stdin())
252 | 			if err != nil {
253 | 				return err
254 | 			}
255 | 			fmt.Fprintf(stdio.Stdout(), "%s %d %s\n", cksum, size, name)
256 | 			return nil
257 | 		}
258 | 	default:
259 | 		hash, name, ok := c.algorithm.hashFunc()
260 | 		if !ok {
261 | 			return fmt.Errorf("invalid argument %q for --algorithm", c.algorithm)
262 | 		}
263 | 		makeSum = newDigestFunc(hash, name, c.untagged)
264 | 	}
265 | 
266 | 	runFiles := internal.NewRunFiles(
267 | 		c.files,
268 | 		stdio,
269 | 		makeSum,
270 | 	)
271 | 	return runFiles.DoThreads(ctx, c.threads)
272 | }
273 | 
274 | func (c CKSum) checkSum(ctx context.Context, stdio unix.StandardIO, debug *log.Logger) error {
275 | 	if c.check && c.algorithm == CRC {
276 | 		return fmt.Errorf("--check is not supported with algorithm=%s", c.algorithm)
277 | 	}
278 | 
279 | 	ckSumOne := func(_ context.Context, line string) (checkResult, error) {
280 | 		res, err := c.checkLine(line, debug)
281 | 		if !c.ignoreMissing && err != nil {
282 | 			return res, err
283 | 		}
284 | 		return res, nil
285 | 	}
286 | 
287 | 	ckSum := func(ctx context.Context, stdio unix.StandardIO, _ int, name string) error {
288 | 		r := bufio.NewScanner(stdio.Stdin())
289 | 		input := make([]string, 0, 16)
290 | 		for r.Scan() {
291 | 			if r.Err() != nil {
292 | 				return pipe.Error{Code: 1, Err: r.Err()}
293 | 			}
294 | 			input = append(input, r.Text())
295 | 		}
296 | 
297 | 		results, err := internal.PMap(ctx, c.threads, input, ckSumOne)
298 | 		var ret int
299 | 		if err != nil {
300 | 			ret = 1
301 | 		}
302 | 
303 | 		for _, result := range results {
304 | 			switch result.state {
305 | 			case stNONE:
306 | 				continue
307 | 			case stOK:
308 | 				if !c.quiet && !c.status {
309 | 					fmt.Fprintf(stdio.Stdout(), "%s: OK\n", result.name)
310 | 				}
311 | 			case stFAILED:
312 | 				if !c.status {
313 | 					fmt.Fprintf(stdio.Stdout(), "%s: FAILED\n", result.name)
314 | 					ret = 1
315 | 				}
316 | 			case stIO:
317 | 				if c.ignoreMissing {
318 | 					continue
319 | 				}
320 | 				if !c.status {
321 | 					fmt.Fprintf(stdio.Stdout(), "%s: FAILED open or read error\n", result.name)
322 | 					ret = 1
323 | 				}
324 | 			default:
325 | 				panic("unknown result state")
326 | 			}
327 | 		}
328 | 
329 | 		if ret != 0 {
330 | 			return pipe.NewError(ret, err)
331 | 		}
332 | 		return nil
333 | 	}
334 | 
335 | 	runFiles := internal.NewRunFiles(
336 | 		c.files,
337 | 		stdio,
338 | 		ckSum,
339 | 	)
340 | 	return runFiles.Do(ctx)
341 | }
342 | 
343 | // copy from https://git.suckless.org/sbase/file/cksum.c.html#l11
344 | var crctab = [256]uint32{0x00000000,
345 | 	0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b,
346 | 	0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6,
347 | 	0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
348 | 	0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac,
349 | 	0x5bd4b01b, 0x569796c2, 0x52568b75, 0x6a1936c8, 0x6ed82b7f,
350 | 	0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a,
351 | 	0x745e66cd, 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
352 | 	0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58,
353 | 	0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033,
354 | 	0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027, 0xddb056fe,
355 | 	0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95,
356 | 	0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4,
357 | 	0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0,
358 | 	0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5,
359 | 	0x2ac12072, 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16,
360 | 	0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, 0x7897ab07,
361 | 	0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c,
362 | 	0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1,
363 | 	0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
364 | 	0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b,
365 | 	0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f, 0x8e6c3698,
366 | 	0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d,
367 | 	0x94ea7b2a, 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e,
368 | 	0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 0xc6bcf05f,
369 | 	0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34,
370 | 	0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59, 0x608edb80,
371 | 	0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb,
372 | 	0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a,
373 | 	0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e, 0x21dc2629,
374 | 	0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5, 0x3f9b762c,
375 | 	0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
376 | 	0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 0xf12f560e,
377 | 	0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65,
378 | 	0xeba91bbc, 0xef68060b, 0xd727bbb6, 0xd3e6a601, 0xdea580d8,
379 | 	0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3,
380 | 	0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2,
381 | 	0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71,
382 | 	0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74,
383 | 	0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640,
384 | 	0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, 0x7b827d21,
385 | 	0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd, 0x6c47164a,
386 | 	0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e, 0x18197087,
387 | 	0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
388 | 	0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d,
389 | 	0x2056cd3a, 0x2d15ebe3, 0x29d4f654, 0xc5a92679, 0xc1683bce,
390 | 	0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb,
391 | 	0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18,
392 | 	0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, 0x89b8fd09,
393 | 	0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662,
394 | 	0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06, 0xa6322bdf,
395 | 	0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4,
396 | }
397 | 
398 | func docrc(_ context.Context, hashFunc func() simpleHash, stdin io.Reader) (string, int64, error) {
399 | 	var buf [4096]byte
400 | 	hash := hashFunc()
401 | 	n, err := io.CopyBuffer(hash, stdin, buf[:])
402 | 	if err != nil {
403 | 		return "", -1, err
404 | 	}
405 | 	return string(hash.Sum(nil)), n, nil
406 | }
407 | 
408 | type crc struct {
409 | 	ck   uint32
410 | 	size int
411 | }
412 | 
413 | func (c *crc) Write(buf []byte) (int, error) {
414 | 	n := len(buf)
415 | 	c.size += n
416 | 	ck := c.ck
417 | 	// https://git.suckless.org/sbase/file/cksum.c.html#l74
418 | 	for i := 0; i < n; i++ {
419 | 		ck = (ck << 8) ^ crctab[(ck>>24)^uint32(buf[i])]
420 | 	}
421 | 	c.ck = ck
422 | 	return n, nil
423 | }
424 | 
425 | func (c crc) Sum(_ []byte) []byte {
426 | 	ck := c.ck
427 | 	for i := c.size; i != 0; i >>= 8 {
428 | 		ck = (ck << 8) ^ crctab[(ck>>24)^uint32((i&0xFF))]
429 | 	}
430 | 	return []byte(fmt.Sprintf("%d", ^ck))
431 | }
432 | 
433 | func (a Algorithm) Size() int {
434 | 	switch a {
435 | 	case MD5:
436 | 		return 32
437 | 	case SHA1:
438 | 		return 40
439 | 	case SHA224:
440 | 		return 56
441 | 	case SHA256:
442 | 		return 64
443 | 	case SHA384:
444 | 		return 96
445 | 	case SHA512:
446 | 		return 128
447 | 	case BLAKE2B:
448 | 		return 128
449 | 	default:
450 | 		return -1
451 | 	}
452 | }
453 | 
454 | func (a Algorithm) hashFunc() (func() simpleHash, string, bool) {
455 | 	switch a {
456 | 	case MD5:
457 | 		return func() simpleHash { return md5.New() }, "MD5", true
458 | 	case SHA1:
459 | 		return func() simpleHash { return sha1.New() }, "SHA1", true
460 | 	case SHA224:
461 | 		return func() simpleHash { return sha256.New224() }, "SHA224", true
462 | 	case SHA256:
463 | 		return func() simpleHash { return sha256.New() }, "SHA256", true
464 | 	case SHA384:
465 | 		return func() simpleHash { return sha512.New384() }, "SHA338", true
466 | 	case SHA512:
467 | 		return func() simpleHash { return sha512.New() }, "SHA512", true
468 | 	case BLAKE2B:
469 | 		hash, err := blake2b.New(64, nil)
470 | 		if err != nil {
471 | 			return nil, "", false
472 | 		}
473 | 		return func() simpleHash { return hash }, "BLAKE2b", true
474 | 	default:
475 | 		return nil, "", false
476 | 	}
477 | }
478 | 
479 | func parseAlgorithm(s string) (Algorithm, error) {
480 | 	var a Algorithm
481 | 	switch strings.ToUpper(s) {
482 | 	case "MD5":
483 | 		a = MD5
484 | 	case "SHA1":
485 | 		a = SHA1
486 | 	case "SHA224":
487 | 		a = SHA224
488 | 	case "SHA256":
489 | 		a = SHA256
490 | 	case "SHA384":
491 | 		a = SHA384
492 | 	case "SHA512":
493 | 		a = SHA512
494 | 	case "BLAKE2B":
495 | 		a = BLAKE2B
496 | 	default:
497 | 		return NONE, fmt.Errorf("invalid argument %q for --algorithm", s)
498 | 	}
499 | 	return a, nil
500 | }
501 | 
502 | // simpleHash implements just enough from hash.Hash interface
503 | // so suckless crc does not need to implement all methods
504 | type simpleHash interface {
505 | 	io.Writer
506 | 	Sum([]byte) []byte
507 | }
508 | 
509 | // digest implements a digest for hash.Hash compatible stuff
510 | // md5, sha256
511 | func digest(hash simpleHash, stdin io.Reader) (string, error) {
512 | 	var buf [4096]byte
513 | 	_, err := io.CopyBuffer(hash, stdin, buf[:])
514 | 	if err != nil {
515 | 		return "", err
516 | 	}
517 | 
518 | 	return hex.EncodeToString(hash.Sum(nil)), nil
519 | }
520 | 
521 | func newDigestFunc(hashFunc func() simpleHash, hashName string, untagged bool) func(ctx context.Context, stdio unix.StandardIO, _ int, name string) error {
522 | 	return func(ctx context.Context, stdio unix.StandardIO, _ int, name string) error {
523 | 		hash := hashFunc()
524 | 		cksum, err := digest(hash, stdio.Stdin())
525 | 		if err != nil {
526 | 			return err
527 | 		}
528 | 		if name == "" {
529 | 			name = "-"
530 | 		}
531 | 		if untagged {
532 | 			fmt.Fprintf(stdio.Stdout(), "%s  %s\n", cksum, name)
533 | 		} else {
534 | 			fmt.Fprintf(stdio.Stdout(), "%s (%s) = %s\n", hashName, name, cksum)
535 | 		}
536 | 		return nil
537 | 	}
538 | }
539 | 
540 | var (
541 | 	tagged = regexp.MustCompile("[A-Z]")
542 | )
543 | 
544 | type BadLineFormatError string
545 | 
546 | func badLineFormatErrorf(temp string, args ...any) error {
547 | 	return BadLineFormatError(fmt.Sprintf(temp, args...))
548 | }
549 | func (e BadLineFormatError) Error() string {
550 | 	return fmt.Sprintf("BadLineFormatError(%q)", string(e))
551 | }
552 | 
553 | type checkState int
554 | 
555 | const (
556 | 	stNONE   checkState = 0
557 | 	stOK     checkState = 1
558 | 	stFAILED checkState = 2
559 | 	stIO     checkState = 3
560 | )
561 | 
562 | type checkResult struct {
563 | 	name  string
564 | 	state checkState
565 | }
566 | 
567 | func stateOK(name string) checkResult {
568 | 	return checkResult{name: name, state: stOK}
569 | }
570 | 
571 | func stateFAILED(name string) checkResult {
572 | 	return checkResult{name: name, state: stFAILED}
573 | }
574 | 
575 | func stateIO(name string) checkResult {
576 | 	return checkResult{name: name, state: stIO}
577 | }
578 | 
579 | // parse untagged and tagged formats
580 | //   - untagged  hash name
581 | //   - tagged HASH(name) = hash
582 | //
583 | // returns errors
584 | // 1. BadLineFormatError for untagged format and algorithm NONE (unless autodetected)
585 | // 2. BadLineFormatError for tagged format and a different hash
586 | // 3. BadLineFormatError for wrong size of a hash
587 | // 4. MismatchError for mismatched hash
588 | func (c CKSum) checkLine(line string, debug *log.Logger) (checkResult, error) {
589 | 	var zero checkResult
590 | 
591 | 	if len(line) == 0 {
592 | 		return zero, BadLineFormatError("empty")
593 | 	}
594 | 
595 | 	if !tagged.MatchString(line[0:1]) {
596 | 		debug.Printf("checkLine: detected --untagged format")
597 | 		algorithms := make([]Algorithm, 0, 2)
598 | 		if c.algorithm == NONE {
599 | 
600 | 			debug.Printf("checkLine: try to autodetect checksum")
601 | 			expected, _, ok := strings.Cut(line, " ")
602 | 			if !ok {
603 | 				debug.Printf("checLine: no space in --untagged format")
604 | 				goto cantDetect
605 | 			}
606 | 			switch len(expected) {
607 | 			case MD5.Size():
608 | 				c.algorithm = MD5
609 | 			case SHA1.Size():
610 | 				c.algorithm = SHA1
611 | 			case SHA224.Size():
612 | 				c.algorithm = SHA224
613 | 			case SHA256.Size():
614 | 				c.algorithm = SHA256
615 | 			case SHA384.Size():
616 | 				c.algorithm = SHA384
617 | 			case SHA512.Size():
618 | 				c.algorithm = SHA512 // or blake2b
619 | 				algorithms = []Algorithm{SHA512, BLAKE2B}
620 | 				debug.Printf("checLine: detected 512 bytes, trying SHA512 or BLAKE2b")
621 | 			default:
622 | 				goto cantDetect
623 | 			}
624 | 			goto detected
625 | 
626 | 		cantDetect:
627 | 			return zero, BadLineFormatError("--algorithm must be specified with --untagged")
628 | 		}
629 | 
630 | 	detected:
631 | 		checkSum := func(algorithm Algorithm) (checkResult, error) {
632 | 			// untagged format is hash<space><space>name: check there are two spaces there
633 | 			if line[algorithm.Size()] != ' ' || line[algorithm.Size()+1] != ' ' {
634 | 				return zero, BadLineFormatError("--untagged must have two spaces between sum and file name")
635 | 			}
636 | 
637 | 			hash, _, ok := algorithm.hashFunc()
638 | 			if !ok {
639 | 				return zero, fmt.Errorf("unsupported --algorithm %q", c.algorithm)
640 | 			}
641 | 
642 | 			name := line[algorithm.Size()+2:]
643 | 			err := checkSum(name, hash, line[:algorithm.Size()])
644 | 			if err == nil {
645 | 				return stateOK(name), nil
646 | 			}
647 | 			if errors.Is(err, errMismatch) {
648 | 				return stateFAILED(name), nil
649 | 			}
650 | 			return stateIO(name), nil
651 | 		}
652 | 		if len(algorithms) == 0 {
653 | 			return checkSum(c.algorithm)
654 | 		}
655 | 		res, err := checkSum(algorithms[0])
656 | 		if err == nil {
657 | 			return res, nil
658 | 		}
659 | 		var blferr BadLineFormatError
660 | 		if errors.As(err, &blferr) {
661 | 			res, err = checkSum(algorithms[1])
662 | 			if err == nil {
663 | 				return res, nil
664 | 			}
665 | 		}
666 | 		return res, err
667 | 	}
668 | 
669 | 	debug.Printf("checkLine: detected --tag format")
670 | 	//TAG (file) = <hash>
671 | 	tag, rest, ok := strings.Cut(line, " ")
672 | 	if !ok {
673 | 		return zero, BadLineFormatError("no space after digest tag")
674 | 	}
675 | 	algorithm, err := parseAlgorithm(tag)
676 | 	if err != nil {
677 | 		return zero, badLineFormatErrorf("unsupported --algorithm tag %q", tag)
678 | 	}
679 | 
680 | 	if c.algorithm != NONE && !strings.EqualFold(c.algorithm.String(), tag) {
681 | 		return zero, badLineFormatErrorf("line tag %q does not match --algorithm %q", tag, c.algorithm.String())
682 | 	}
683 | 
684 | 	if len(rest) <= algorithm.Size() {
685 | 		return zero, badLineFormatErrorf("wrong size of hash: expected %d, got %d", algorithm.Size(), len(rest))
686 | 	}
687 | 	expected := rest[len(rest)-algorithm.Size():]
688 | 
689 | 	// rest is now (name) =
690 | 	rest = rest[:len(rest)-algorithm.Size()]
691 | 	// so check and remove all remaining bytes
692 | 	lr := len(rest)
693 | 	if lr <= 5 || rest[0] != '(' || rest[lr-4:] != ") = " {
694 | 		return zero, badLineFormatErrorf("missing `() = ` around file name")
695 | 	}
696 | 	name := rest[1 : lr-4]
697 | 
698 | 	hash, _, ok := algorithm.hashFunc()
699 | 	if !ok {
700 | 		return zero, fmt.Errorf("unsupported --algorithm %q", c.algorithm)
701 | 	}
702 | 	err = checkSum(name, hash, expected)
703 | 	if err == nil {
704 | 		return stateOK(name), nil
705 | 	}
706 | 	if errors.Is(err, errMismatch) {
707 | 		return stateFAILED(name), nil
708 | 	}
709 | 	if err != nil {
710 | 		return stateIO(name), err
711 | 	}
712 | 	panic("checkLine: tagged: should never go there")
713 | }
714 | 
715 | var errMismatch = errors.New("checksum mismatch") // never returned upper
716 | 
717 | func checkSum(name string, hashFunc func() simpleHash, expected string) error {
718 | 	f, err := os.Open(name)
719 | 	if err != nil {
720 | 		return err
721 | 	}
722 | 	defer f.Close()
723 | 	checkSum, err := digest(hashFunc(), f)
724 | 	if err != nil {
725 | 		return err
726 | 	}
727 | 
728 | 	if expected == checkSum {
729 | 		return nil
730 | 	}
731 | 	return errMismatch
732 | }
733 | 


--------------------------------------------------------------------------------