├── LICENSE └── freq.go /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 The Go Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * The names of its contributors may not be used to endorse or 14 | promote products derived from this software without specific prior 15 | written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /freq.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Freq (frequency) counts how many times each distinct 6 | // Unicode code point appears in the input. The -bytes 7 | // option counts bytes instead. The table is then printed 8 | // to standard output, one count per line. Nothing is 9 | // printed for a code point if its count is zero. 10 | package main // import "robpike.io/cmd/freq" 11 | 12 | import ( 13 | "bufio" 14 | "flag" 15 | "fmt" 16 | "io" 17 | "os" 18 | "strconv" 19 | ) 20 | 21 | var ( 22 | countBytes bool 23 | ) 24 | 25 | func init() { 26 | flag.BoolVar(&countBytes, "bytes", false, "count bytes (default is runes)") 27 | flag.BoolVar(&countBytes, "b", false, "alias for -bytes") 28 | } 29 | 30 | func main() { 31 | flag.Parse() 32 | if flag.NArg() == 0 { 33 | read("", os.Stdin) 34 | } 35 | for _, file := range flag.Args() { 36 | f, err := os.Open(file) 37 | if err != nil { 38 | fmt.Fprintln(os.Stderr, "freq:", err) 39 | os.Exit(1) 40 | } 41 | read(file, f) 42 | f.Close() 43 | } 44 | print() 45 | } 46 | 47 | // We lazily fill in the intermediate arrays, each 256 entries long. 48 | // Unicode is 22 bits, so we only need 3 levels max. 49 | // Indexing starts with the uppermost byte, so the innermost array 50 | // (of uint64 elements) represents 256 consecutive code points. 51 | type Counts [256]*[256]*[256]uint64 52 | 53 | var counts = new(Counts) // Allocate the top level; we know we'll need it unless the input is empty. 54 | var errors uint64 // Special count to distinguish FFFD from real errors. 55 | 56 | func (c *Counts) Inc(r rune) { 57 | b2 := (r >> 16) & 0xFF 58 | b1 := (r >> 8) & 0xFF 59 | b0 := (r >> 0) & 0xFF 60 | c2 := (*c)[b2] 61 | if c2 == nil { 62 | c2 = new([256]*[256]uint64) 63 | (*c)[b2] = c2 64 | } 65 | c1 := c2[b1] 66 | if c1 == nil { 67 | c1 = new([256]uint64) 68 | c2[b1] = c1 69 | } 70 | c1[b0]++ 71 | } 72 | 73 | func read(file string, f *os.File) { 74 | if countBytes { 75 | readBytes(file, f) 76 | } else { 77 | readRunes(file, f) 78 | } 79 | } 80 | 81 | func readBytes(file string, f *os.File) { 82 | buf := bufio.NewReader(f) 83 | for { 84 | byte, err := buf.ReadByte() 85 | if err != nil { 86 | if err == io.EOF { 87 | return 88 | } 89 | fmt.Fprintf(os.Stderr, "freq: %s: %s\n", file, err) 90 | os.Exit(1) 91 | } 92 | counts.Inc(rune(byte)) 93 | } 94 | } 95 | 96 | func readRunes(file string, f *os.File) { 97 | buf := bufio.NewReader(f) 98 | for { 99 | rune, width, err := buf.ReadRune() 100 | if err != nil { 101 | if err == io.EOF { 102 | return 103 | } 104 | fmt.Fprintf(os.Stderr, "freq: %s: %s\n", file, err) 105 | os.Exit(1) 106 | } 107 | if rune == 0xFFFD && width == 1 { 108 | errors++ 109 | } else { 110 | counts.Inc(rune) 111 | } 112 | } 113 | } 114 | 115 | func print() { 116 | if countBytes { 117 | printCounts("%.2x %c\t%d\n", "%.2x -\t%d\n") 118 | } else { 119 | printCounts("%.4x %c\t%d\n", "%.4x -\t%d\n") 120 | } 121 | } 122 | 123 | func printCounts(printable, unprintable string) { 124 | for b2 := range *counts { 125 | c2 := (*counts)[b2] 126 | if c2 == nil { 127 | continue 128 | } 129 | for b1 := range c2 { 130 | c1 := c2[b1] 131 | if c1 == nil { 132 | continue 133 | } 134 | for b0, count := range c1 { 135 | if count == 0 { 136 | continue 137 | } 138 | r := rune((b2 << 16) | (b1 << 8) | b0) 139 | if r != ' ' && strconv.IsPrint(r) { 140 | fmt.Printf(printable, r, r, count) 141 | } else { 142 | fmt.Printf(unprintable, r, count) 143 | } 144 | } 145 | } 146 | } 147 | if errors > 0 { 148 | fmt.Printf("error -\t%d\n", errors) 149 | } 150 | } 151 | --------------------------------------------------------------------------------