├── go.mod ├── README.md ├── Makefile ├── main.go ├── rsort2b.go ├── rsort2a.go ├── randomdata.go └── rsort2a_test.go /go.mod: -------------------------------------------------------------------------------- 1 | module gordxsort 2 | 3 | go 1.22.3 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | This is a simple radix sort that is able to sort variable length 3 | strings. 4 | 5 | use the Makefile to format, vet, build, or test. 6 | 7 | The test file show the performance of radix sort as compared to 8 | slices.Sort 9 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL := build 2 | 3 | .PHONY:fmt vet build 4 | 5 | fmt: 6 | go fmt *.go 7 | 8 | vet: fmt 9 | go vet *.go 10 | 11 | build: vet 12 | go build -o rdxsort *.go 13 | 14 | profile: 15 | go test -cpuprofile cpu.prof -memprofile mem.prof -bench . 16 | 17 | test: 18 | go test 19 | 20 | clean: 21 | /bin/rm -f rdxsort gordxsort.test 22 | /bin/rm -f cpu.prof mem.prof profile001.callgraph.out 23 | 24 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "os" 9 | ) 10 | 11 | //type line []byte 12 | //type lines []line 13 | 14 | func main() { 15 | var fn string 16 | flag.StringVar(&fn, "file", "", "name of file to sort") 17 | flag.Parse() 18 | var lns lines 19 | 20 | var err error 21 | 22 | fp := os.Stdin 23 | if fn != "" { 24 | fp, err = os.Open(fn) 25 | if err != nil { 26 | log.Fatal(err) 27 | } 28 | defer fp.Close() 29 | } 30 | 31 | scanner := bufio.NewScanner(fp) 32 | // option, resize scanner's capacity for lines over 64K, see next example 33 | for scanner.Scan() { 34 | //fmt.Println(scanner.Text()) 35 | l := scanner.Text() 36 | bln := []byte(l) 37 | lns = append(lns, bln) 38 | } 39 | slns := rsort2a(lns, 0) 40 | for i, _ := range slns { 41 | fmt.Println(string(slns[i])) 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /rsort2b.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | ) 6 | 7 | func binsertionsort2b(lns []string) []string { 8 | n := len(lns) 9 | if n == 1 { 10 | return lns 11 | } 12 | for i := 0; i < n; i++ { 13 | for j := i; j > 0 && lns[j-1] > lns[j]; j-- { 14 | lns[j], lns[j-1] = lns[j-1], lns[j] 15 | } 16 | } 17 | return lns 18 | } 19 | 20 | // bostic 21 | func rsort2b(lns []string, recix int) []string { 22 | var piles = make([][]string, 256) 23 | var nc int 24 | nl := len(lns) 25 | 26 | if nl == 0 { 27 | log.Fatal("rsort2b: 0 len []string: ", recix) 28 | } 29 | if nl < THRESHOLD { 30 | return binsertionsort2b(lns) 31 | } 32 | 33 | // deal []string into piles 34 | for i, _ := range lns { 35 | var c int 36 | 37 | if len(lns[i]) == 0 { 38 | log.Fatal("rsort2b 0 length string") 39 | } 40 | if recix >= len(lns[i]) { 41 | c = 0 42 | } else { 43 | c = int(lns[i][recix]) 44 | } 45 | piles[c] = append(piles[c], string(lns[i])) 46 | if len(piles[c]) == 1 { 47 | nc++ // number of piles so far 48 | } 49 | } 50 | 51 | // sort the piles 52 | if nc == 1 { 53 | return binsertionsort2b(lns) 54 | } 55 | for i, _ := range piles { 56 | if len(piles[i]) == 0 { 57 | continue 58 | } 59 | 60 | // sort pile 61 | if len(piles[i]) < THRESHOLD { 62 | piles[i] = binsertionsort2b(piles[i]) 63 | } else { 64 | piles[i] = rsort2b(piles[i], recix+1) 65 | } 66 | nc-- 67 | if nc == 0 { 68 | break 69 | } 70 | } 71 | 72 | // combine the sorted piles 73 | var slns []string 74 | for i, _ := range piles { 75 | for j, _ := range piles[i] { 76 | slns = append(slns, piles[i][j]) 77 | } 78 | } 79 | return slns 80 | } 81 | -------------------------------------------------------------------------------- /rsort2a.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "log" 6 | ) 7 | 8 | const THRESHOLD int = 1 << 5 9 | 10 | type line = []byte 11 | type lines = []line 12 | 13 | func binsertionsort(lns lines) lines { 14 | n := len(lns) 15 | if n == 1 { 16 | return lns 17 | } 18 | for i := 0; i < n; i++ { 19 | for j := i; j > 0 && bytes.Compare(lns[j-1], lns[j]) > 0; j-- { 20 | lns[j], lns[j-1] = lns[j-1], lns[j] 21 | } 22 | } 23 | return lns 24 | } 25 | 26 | // bostic 27 | func rsort2a(lns lines, recix int) lines { 28 | var piles = make([][]line, 256) 29 | var nc int 30 | nl := len(lns) 31 | 32 | if nl == 0 { 33 | log.Fatal("rsort2a: 0 len lines: ", recix) 34 | } 35 | if nl < THRESHOLD { 36 | return binsertionsort(lns) 37 | } 38 | 39 | // deal lines into piles 40 | for i, _ := range lns { 41 | var c int 42 | 43 | if len(lns[i]) == 0 { 44 | log.Fatal("rsort2a 0 length string") 45 | } 46 | if recix >= len(lns[i]) { 47 | c = 0 48 | } else { 49 | c = int(lns[i][recix]) 50 | } 51 | piles[c] = append(piles[c], line(lns[i])) 52 | if len(piles[c]) == 1 { 53 | nc++ // number of piles so far 54 | } 55 | } 56 | 57 | // sort the piles 58 | if nc == 1 { 59 | return binsertionsort(lns) 60 | } 61 | for i, _ := range piles { 62 | if len(piles[i]) == 0 { 63 | continue 64 | } 65 | 66 | // sort pile 67 | if len(piles[i]) < THRESHOLD { 68 | piles[i] = binsertionsort(piles[i]) 69 | } else { 70 | piles[i] = rsort2a(piles[i], recix+1) 71 | } 72 | nc-- 73 | if nc == 0 { 74 | break 75 | } 76 | } 77 | 78 | // combine the sorted piles 79 | var slns lines 80 | for i, _ := range piles { 81 | for j, _ := range piles[i] { 82 | slns = append(slns, piles[i][j]) 83 | } 84 | } 85 | return slns 86 | } 87 | -------------------------------------------------------------------------------- /randomdata.go: -------------------------------------------------------------------------------- 1 | // package goranddatagen generates random string, uint64, and datetime data 2 | package main 3 | 4 | //package goranddatagen 5 | 6 | import ( 7 | "fmt" 8 | "math/rand" 9 | "time" 10 | ) 11 | 12 | var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") 13 | 14 | // randSeq(n int) 15 | // generate a random string length n with lower, upper case letters and digits 16 | func randSeq(n int, rlen bool) string { 17 | b := make([]rune, n) 18 | ll := len(letters) 19 | for i := range b { 20 | b[i] = letters[rand.Intn(ll)] 21 | } 22 | if rlen == true { 23 | rl := rand.Intn(n) 24 | if rl != 0 { 25 | b = b[:rl] 26 | } 27 | } 28 | return string(b) 29 | } 30 | 31 | // randomstrings(n int, slen int) 32 | // generate n random strings with length slen 33 | // return a slice containing the strings 34 | func randomstrings(n int, slen int, rlen bool) []string { 35 | ssl := make([]string, 0) 36 | for _ = range n { 37 | ssl = append(ssl, randSeq(slen, rlen)) 38 | } 39 | return ssl 40 | } 41 | 42 | // randomints(ņ int) 43 | // generate n random int64 values 44 | // return a slice containing the int64 values 45 | func randomuints(n int, rlen bool) []uint64 { 46 | usl := make([]uint64, 0) 47 | for _ = range n { 48 | fmt.Println(rand.Uint64()) 49 | usl = append(usl, rand.Uint64()) 50 | } 51 | if rlen == true { 52 | rl := rand.Intn(n) 53 | if rl != 0 { 54 | usl = usl[:rl] 55 | } 56 | } 57 | return usl 58 | } 59 | 60 | // randomdates(n int, format string) 61 | // generate n random dates with format 62 | // return a slice containing the random date strings 63 | func randomdates(n int, format string) []string { 64 | now := time.Now().Unix() 65 | var mod = int64(now) 66 | var s string 67 | dsl := make([]string, 0) 68 | for _ = range n { 69 | ri := rand.Int63() % mod 70 | tm := time.Unix(int64(ri), int64(0)) 71 | 72 | switch format { 73 | case "DateTime": 74 | s = fmt.Sprint(tm.Format(time.DateTime)) 75 | case "Layout": 76 | s = fmt.Sprint(tm.Format(time.Layout)) 77 | case "RubyDate": 78 | s = fmt.Sprint(tm.Format(time.RubyDate)) 79 | case "UnixDate": 80 | s = fmt.Sprint(tm.Format(time.UnixDate)) 81 | case "RFC3339": 82 | s = fmt.Sprint(tm.Format(time.RFC3339)) 83 | default: 84 | s = fmt.Sprint(tm) 85 | } 86 | dsl = append(dsl, s) 87 | } 88 | return dsl 89 | } 90 | -------------------------------------------------------------------------------- /rsort2a_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "runtime" 7 | "slices" 8 | "sort" 9 | "testing" 10 | ) 11 | 12 | func BenchmarkStrings(b *testing.B) { 13 | for _, n := range []int{1 << 4, 1 << 8, 1 << 12, 1 << 16, 1 << 20, 1 << 24} { 14 | b.Run(fmt.Sprint("n=", n), func(b *testing.B) { 15 | for _, s := range []int{8, 16, 32, 64, 128, 256, 512} { 16 | var rand, lines []string 17 | b.Run(fmt.Sprint("len=", s), func(b *testing.B) { 18 | if rand == nil { 19 | rand = randomstrings(n, s, true) 20 | lines = make([]string, n) 21 | } 22 | b.Run("alg=radix", func(b *testing.B) { 23 | b.ReportAllocs() 24 | for i := 0; i < b.N; i++ { 25 | copy(lines, rand) 26 | out := rsort2b(lines, 0) 27 | runtime.KeepAlive(&out[0]) 28 | } 29 | }) 30 | b.Run("alg=sort.Strings", func(b *testing.B) { 31 | b.ReportAllocs() 32 | for i := 0; i < b.N; i++ { 33 | copy(lines, rand) 34 | sort.Strings(lines) 35 | } 36 | }) 37 | b.Run("alg=slices.Sort", func(b *testing.B) { 38 | b.ReportAllocs() 39 | for i := 0; i < b.N; i++ { 40 | copy(lines, rand) 41 | slices.Sort(lines) 42 | } 43 | }) 44 | }) 45 | } 46 | }) 47 | } 48 | } 49 | 50 | func BenchmarkBytes(b *testing.B) { 51 | for _, n := range []int{1 << 4, 1 << 8, 1 << 12, 1 << 16, 1 << 20, 1 << 24} { 52 | b.Run(fmt.Sprint("n=", n), func(b *testing.B) { 53 | for _, s := range []int{8, 16, 32, 64, 128, 256, 512} { 54 | var rand, lines [][]byte 55 | var randstr []string 56 | b.Run(fmt.Sprint("len=", s), func(b *testing.B) { 57 | if rand == nil { 58 | randstr = randomstrings(n, s, true) 59 | rand = make([][]byte, n) 60 | for i, s := range randstr { 61 | rand[i] = []byte(s) 62 | } 63 | lines = make([][]byte, n) 64 | b.ResetTimer() 65 | } 66 | b.Run("alg=radix", func(b *testing.B) { 67 | b.ReportAllocs() 68 | for i := 0; i < b.N; i++ { 69 | copy(lines, rand) 70 | out := rsort2a(rand, 0) 71 | runtime.KeepAlive(&out[0]) 72 | } 73 | }) 74 | b.Run("alg=slices.Sort", func(b *testing.B) { 75 | b.ReportAllocs() 76 | for i := 0; i < b.N; i++ { 77 | copy(lines, rand) 78 | slices.SortFunc(lines, bytes.Compare) 79 | } 80 | }) 81 | }) 82 | } 83 | }) 84 | } 85 | } 86 | --------------------------------------------------------------------------------