├── Makefile ├── .travis.yml ├── util_test.go ├── LICENSE ├── tables.go ├── runes_test.go ├── runes.go ├── cmd └── bootstrap │ └── main.go ├── testdata └── corpus.txt ├── bench_test.go ├── bench_std_test.go └── README.md /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: bench 2 | 3 | bench: 4 | @go test -v -bench=. -tags=std > old.txt 5 | @go test -v -bench=. > new.txt 6 | @benchcmp old.txt new.txt 7 | @rm -f old.txt new.txt 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - "1.2" 5 | - "1.3" 6 | - "1.4" 7 | - "1.5" 8 | - "1.6" 9 | - "1.7" 10 | - "1.8" 11 | - "1.9" 12 | - "1.10" 13 | - "tip" 14 | 15 | script: 16 | - go test 17 | -------------------------------------------------------------------------------- /util_test.go: -------------------------------------------------------------------------------- 1 | package runes 2 | 3 | import "unicode" 4 | 5 | func getTestCase(t uint) (rune, rune) { 6 | var pos, neg rune = -1, -1 7 | 8 | for r := unicode.MaxRune; r > -1; r-- { 9 | if pos == -1 && is(t, r) { 10 | pos = r 11 | } 12 | 13 | if neg == -1 && !is(t, r) { 14 | neg = r 15 | } 16 | 17 | if pos != -1 && neg != -1 { 18 | break 19 | } 20 | } 21 | 22 | return pos, neg 23 | } 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Martin Gallagher 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tables.go: -------------------------------------------------------------------------------- 1 | package runes 2 | 3 | import ( 4 | "unicode" 5 | "unicode/utf8" 6 | ) 7 | 8 | const ( 9 | digit = 1 << iota 10 | graphic 11 | letter 12 | lower 13 | mark 14 | number 15 | print 16 | punct 17 | space 18 | symbol 19 | title 20 | upper 21 | ) 22 | 23 | type char struct { 24 | lower rune 25 | title rune 26 | upper rune 27 | folded rune 28 | flags uint 29 | } 30 | 31 | var properties [unicode.MaxRune + 1]char 32 | 33 | func init() { 34 | for r := rune(0); r <= unicode.MaxRune; r++ { 35 | if !utf8.ValidRune(r) { 36 | continue 37 | } 38 | 39 | var v char 40 | 41 | if unicode.IsDigit(r) { 42 | v.flags |= digit 43 | } 44 | 45 | if unicode.IsGraphic(r) { 46 | v.flags |= graphic 47 | } 48 | 49 | if unicode.IsLetter(r) { 50 | v.flags |= letter 51 | } 52 | 53 | if unicode.IsLower(r) { 54 | v.flags |= lower 55 | } 56 | 57 | if unicode.IsMark(r) { 58 | v.flags |= mark 59 | } 60 | 61 | if unicode.IsNumber(r) { 62 | v.flags |= number 63 | } 64 | 65 | if unicode.IsPrint(r) { 66 | v.flags |= print 67 | } 68 | 69 | if unicode.IsPunct(r) { 70 | v.flags |= punct 71 | } 72 | 73 | if unicode.IsSpace(r) { 74 | v.flags |= space 75 | } 76 | 77 | if unicode.IsSymbol(r) { 78 | v.flags |= symbol 79 | } 80 | 81 | if unicode.IsTitle(r) { 82 | v.flags |= title 83 | } 84 | 85 | if unicode.IsUpper(r) { 86 | v.flags |= upper 87 | } 88 | 89 | v.lower = unicode.ToLower(r) 90 | v.title = unicode.ToTitle(r) 91 | v.upper = unicode.ToUpper(r) 92 | v.folded = unicode.SimpleFold(r) 93 | properties[r] = v 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /runes_test.go: -------------------------------------------------------------------------------- 1 | package runes 2 | 3 | import ( 4 | "io/ioutil" 5 | "strings" 6 | "testing" 7 | "unicode" 8 | ) 9 | 10 | var ( 11 | testString string 12 | testStringLen int64 13 | ) 14 | 15 | func init() { 16 | b, err := ioutil.ReadFile("./testdata/corpus.txt") 17 | 18 | if err != nil { 19 | panic(err) 20 | } 21 | 22 | testString = string(b) 23 | testStringLen = int64(len(testString)) 24 | } 25 | 26 | func TestFuncs(t *testing.T) { 27 | for r := rune(0); r <= unicode.MaxRune; r++ { 28 | if IsDigit(r) != unicode.IsDigit(r) { 29 | t.Fatalf("IsDigit failed for: %U, expected %v, got %v", 30 | r, unicode.IsDigit(r), IsDigit(r)) 31 | } 32 | 33 | if IsDigitUnsafe(r) != unicode.IsDigit(r) { 34 | t.Fatalf("IsDigitUnsafe failed for: %U, expected %v, got %v", 35 | r, unicode.IsDigit(r), IsDigitUnsafe(r)) 36 | } 37 | 38 | if IsGraphic(r) != unicode.IsGraphic(r) { 39 | t.Fatalf("IsGraphic failed for: %U, expected %v, got %v", 40 | r, unicode.IsGraphic(r), IsGraphic(r)) 41 | } 42 | 43 | if IsGraphicUnsafe(r) != unicode.IsGraphic(r) { 44 | t.Fatalf("IsGraphicUnsafe failed for: %U, expected %v, got %v", 45 | r, unicode.IsGraphic(r), IsGraphicUnsafe(r)) 46 | } 47 | 48 | if IsLetter(r) != unicode.IsLetter(r) { 49 | t.Fatalf("IsLetter failed for: %U, expected %v, got %v", 50 | r, unicode.IsLetter(r), IsLetter(r)) 51 | } 52 | 53 | if IsLetterUnsafe(r) != unicode.IsLetter(r) { 54 | t.Fatalf("IsLetterUnsafe failed for: %U, expected %v, got %v", 55 | r, unicode.IsLetter(r), IsLetterUnsafe(r)) 56 | } 57 | 58 | if IsLower(r) != unicode.IsLower(r) { 59 | t.Fatalf("IsLower failed for: %U, expected %v, got %v", 60 | r, unicode.IsLower(r), IsLower(r)) 61 | } 62 | 63 | if IsLowerUnsafe(r) != unicode.IsLower(r) { 64 | t.Fatalf("IsLowerUnsafe failed for: %U, expected %v, got %v", 65 | r, unicode.IsLower(r), IsLowerUnsafe(r)) 66 | } 67 | 68 | if IsMark(r) != unicode.IsMark(r) { 69 | t.Fatalf("IsMark failed for: %U, expected %v, got %v", 70 | r, unicode.IsMark(r), IsMark(r)) 71 | } 72 | 73 | if IsMarkUnsafe(r) != unicode.IsMark(r) { 74 | t.Fatalf("IsMarkUnsafe failed for: %U, expected %v, got %v", 75 | r, unicode.IsMark(r), IsMarkUnsafe(r)) 76 | } 77 | 78 | if IsNumber(r) != unicode.IsNumber(r) { 79 | t.Fatalf("IsNumber failed for: %U, expected %v, got %v", 80 | r, unicode.IsNumber(r), IsNumber(r)) 81 | } 82 | 83 | if IsNumberUnsafe(r) != unicode.IsNumber(r) { 84 | t.Fatalf("IsNumberUnsafe failed for: %U, expected %v, got %v", 85 | r, unicode.IsNumber(r), IsNumberUnsafe(r)) 86 | } 87 | 88 | if IsPrint(r) != unicode.IsPrint(r) { 89 | t.Fatalf("IsPrint failed for: %U, expected %v, got %v", 90 | r, unicode.IsPrint(r), IsPrint(r)) 91 | } 92 | 93 | if IsPrintUnsafe(r) != unicode.IsPrint(r) { 94 | t.Fatalf("IsPrintUnsafe failed for: %U, expected %v, got %v", 95 | r, unicode.IsPrint(r), IsPrintUnsafe(r)) 96 | } 97 | 98 | if IsPunct(r) != unicode.IsPunct(r) { 99 | t.Fatalf("IsPunct failed for: %U, expected %v, got %v", 100 | r, unicode.IsPunct(r), IsPunct(r)) 101 | } 102 | 103 | if IsPunctUnsafe(r) != unicode.IsPunct(r) { 104 | t.Fatalf("IsPunctUnsafe failed for: %U, expected %v, got %v", 105 | r, unicode.IsPunct(r), IsPunctUnsafe(r)) 106 | } 107 | 108 | if IsSpace(r) != unicode.IsSpace(r) { 109 | t.Fatalf("IsSpace failed for: %U, expected %v, got %v", 110 | r, unicode.IsSpace(r), IsSpace(r)) 111 | } 112 | 113 | if IsSpaceUnsafe(r) != unicode.IsSpace(r) { 114 | t.Fatalf("IsSpaceUnsafe failed for: %U, expected %v, got %v", 115 | r, unicode.IsSpace(r), IsSpaceUnsafe(r)) 116 | } 117 | 118 | if IsSymbol(r) != unicode.IsSymbol(r) { 119 | t.Fatalf("IsSymbol failed for: %U, expected %v, got %v", 120 | r, unicode.IsSymbol(r), IsSymbol(r)) 121 | } 122 | 123 | if IsSymbolUnsafe(r) != unicode.IsSymbol(r) { 124 | t.Fatalf("IsSymbolUnsafe failed for: %U, expected %v, got %v", 125 | r, unicode.IsSymbol(r), IsSymbolUnsafe(r)) 126 | } 127 | 128 | if IsTitle(r) != unicode.IsTitle(r) { 129 | t.Fatalf("IsTitle failed for: %U, expected %v, got %v", 130 | r, unicode.IsTitle(r), IsTitle(r)) 131 | } 132 | 133 | if IsTitleUnsafe(r) != unicode.IsTitle(r) { 134 | t.Fatalf("IsTitleUnsafe failed for: %U, expected %v, got %v", 135 | r, unicode.IsTitle(r), IsTitleUnsafe(r)) 136 | } 137 | 138 | if IsUpper(r) != unicode.IsUpper(r) { 139 | t.Fatalf("IsUpper failed for: %U, expected %v, got %v", 140 | r, unicode.IsUpper(r), IsUpper(r)) 141 | } 142 | 143 | if IsUpperUnsafe(r) != unicode.IsUpper(r) { 144 | t.Fatalf("IsUpperUnsafe failed for: %U, expected %v, got %v", 145 | r, unicode.IsUpper(r), IsUpperUnsafe(r)) 146 | } 147 | 148 | if SimpleFold(r) != unicode.SimpleFold(r) { 149 | t.Fatalf("SimpleFold failed for: %U, expected %v, got %v", 150 | r, unicode.SimpleFold(r), SimpleFold(r)) 151 | } 152 | 153 | if ToLower(r) != unicode.ToLower(r) { 154 | t.Fatalf("Lower failed for: %U, expected %U, got %U", 155 | r, unicode.ToLower(r), ToLower(r)) 156 | } 157 | 158 | if To(unicode.LowerCase, r) != unicode.To(unicode.LowerCase, r) { 159 | t.Fatalf("To failed for: %U, expected %U, got %U", 160 | r, unicode.To(unicode.LowerCase, r), To(unicode.LowerCase, r)) 161 | } 162 | 163 | if ToTitle(r) != unicode.ToTitle(r) { 164 | t.Fatalf("Title failed for: %U, expected %U, got %U", 165 | r, unicode.ToTitle(r), ToTitle(r)) 166 | } 167 | 168 | if To(unicode.TitleCase, r) != unicode.To(unicode.TitleCase, r) { 169 | t.Fatalf("To failed for: %U, expected %U, got %U", 170 | r, unicode.To(unicode.TitleCase, r), To(unicode.TitleCase, r)) 171 | } 172 | 173 | if ToUpper(r) != unicode.ToUpper(r) { 174 | t.Fatalf("Upper failed for: %U, expected %U, got %U", 175 | r, unicode.ToUpper(r), ToUpper(r)) 176 | } 177 | 178 | if To(unicode.UpperCase, r) != unicode.To(unicode.UpperCase, r) { 179 | t.Fatalf("To failed for: %U, expected %U, got %U", 180 | r, unicode.To(unicode.UpperCase, r), To(unicode.UpperCase, r)) 181 | } 182 | } 183 | 184 | const ( 185 | _case = 99 186 | r = 'A' 187 | ) 188 | 189 | if To(_case, r) != unicode.To(_case, r) { 190 | t.Fatalf("To failed for: %U, expected %U, got %U", 191 | r, unicode.To(_case, r), To(_case, r)) 192 | } 193 | 194 | if strings.Map(ToLower, testString) != strings.Map(unicode.ToLower, testString) { 195 | t.Fatal("ToLower failed ") 196 | } 197 | 198 | if strings.Map(ToTitle, testString) != strings.Map(unicode.ToTitle, testString) { 199 | t.Fatal("ToTitle failed ") 200 | } 201 | 202 | if strings.Map(ToUpper, testString) != strings.Map(unicode.ToUpper, testString) { 203 | t.Fatal("ToUpper failed ") 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /runes.go: -------------------------------------------------------------------------------- 1 | //go:generate go run cmd/bootstrap/main.go 2 | 3 | // Package runes provides functions to test some properties of 4 | // Unicode code points. 5 | package runes 6 | 7 | import ( 8 | "unicode" 9 | "unicode/utf8" 10 | ) 11 | 12 | // IsControl reports whether the rune is a control character. 13 | // The C (Other) Unicode category includes more code points 14 | // such as surrogates; use Is(C, r) to test for them. 15 | func IsControl(r rune) bool { 16 | // Included for symmetry 17 | return unicode.IsControl(r) 18 | } 19 | 20 | // IsDigit reports whether the rune is a decimal digit. 21 | func IsDigit(r rune) bool { 22 | return is(digit, r) 23 | } 24 | 25 | // IsDigitUnsafe is the unsafe version of IsDigit. 26 | func IsDigitUnsafe(r rune) bool { 27 | return isUnsafe(digit, r) 28 | } 29 | 30 | // IsGraphic reports whether the rune is defined as a Graphic by Unicode. 31 | // Such characters include letters, marks, numbers, punctuation, symbols, and 32 | // spaces, from categories L, M, N, P, S, Zs. 33 | func IsGraphic(r rune) bool { 34 | return is(graphic, r) 35 | } 36 | 37 | // IsGraphicUnsafe is the unsafe version of IsGraphic. 38 | func IsGraphicUnsafe(r rune) bool { 39 | return isUnsafe(graphic, r) 40 | } 41 | 42 | // IsLetter reports whether the rune is a letter (category L). 43 | func IsLetter(r rune) bool { 44 | return is(letter, r) 45 | } 46 | 47 | // IsLetterUnsafe is the unsafe version of IsLetter. 48 | func IsLetterUnsafe(r rune) bool { 49 | return isUnsafe(letter, r) 50 | } 51 | 52 | // IsLower reports whether the rune is a lower case letter. 53 | func IsLower(r rune) bool { 54 | return is(lower, r) 55 | } 56 | 57 | // IsLowerUnsafe is the unsafe version of IsLower. 58 | func IsLowerUnsafe(r rune) bool { 59 | return isUnsafe(lower, r) 60 | } 61 | 62 | // IsMark reports whether the rune is a mark character (category M). 63 | func IsMark(r rune) bool { 64 | return is(mark, r) 65 | } 66 | 67 | // IsMarkUnsafe is the unsafe version of IsMark. 68 | func IsMarkUnsafe(r rune) bool { 69 | return isUnsafe(mark, r) 70 | } 71 | 72 | // IsNumber reports whether the rune is a number (category N). 73 | func IsNumber(r rune) bool { 74 | return is(number, r) 75 | } 76 | 77 | // IsNumberUnsafe is the unsafe version of IsNumber. 78 | func IsNumberUnsafe(r rune) bool { 79 | return isUnsafe(number, r) 80 | } 81 | 82 | // IsPrint reports whether the rune is defined as printable by Go. Such 83 | // characters include letters, marks, numbers, punctuation, symbols, and the 84 | // ASCII space character, from categories L, M, N, P, S and the ASCII space 85 | // character. This categorization is the same as IsGraphic except that the 86 | // only spacing character is ASCII space, U+0020. 87 | func IsPrint(r rune) bool { 88 | return is(print, r) 89 | } 90 | 91 | // IsPrintUnsafe is the unsafe version of IsPrint. 92 | func IsPrintUnsafe(r rune) bool { 93 | return isUnsafe(print, r) 94 | } 95 | 96 | // IsPunct reports whether the rune is a Unicode punctuation character 97 | // (category P). 98 | func IsPunct(r rune) bool { 99 | return is(punct, r) 100 | } 101 | 102 | // IsPunctUnsafe is the unsafe version of IsPunct. 103 | func IsPunctUnsafe(r rune) bool { 104 | return isUnsafe(punct, r) 105 | } 106 | 107 | // IsSpace reports whether the rune is a space character as defined 108 | // by Unicode's White Space property; in the Latin-1 space 109 | // this is 110 | // '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP). 111 | // Other definitions of spacing characters are set by category 112 | // Z and property Pattern_White_Space. 113 | func IsSpace(r rune) bool { 114 | return is(space, r) 115 | } 116 | 117 | // IsSpaceUnsafe is the unsafe version of IsSpace. 118 | func IsSpaceUnsafe(r rune) bool { 119 | return isUnsafe(space, r) 120 | } 121 | 122 | // IsSymbol reports whether the rune is a symbolic character. 123 | func IsSymbol(r rune) bool { 124 | return is(symbol, r) 125 | } 126 | 127 | // IsSymbolUnsafe is the unsafe version of IsSymbol. 128 | func IsSymbolUnsafe(r rune) bool { 129 | return isUnsafe(symbol, r) 130 | } 131 | 132 | // IsTitle reports whether the rune is a title case letter. 133 | func IsTitle(r rune) bool { 134 | return is(title, r) 135 | } 136 | 137 | // IsTitleUnsafe is the unsafe version of IsTitle. 138 | func IsTitleUnsafe(r rune) bool { 139 | return isUnsafe(title, r) 140 | } 141 | 142 | // IsUpper reports whether the rune is an upper case letter. 143 | func IsUpper(r rune) bool { 144 | return is(upper, r) 145 | } 146 | 147 | // IsUpperUnsafe is the unsafe version of IsUpper. 148 | func IsUpperUnsafe(r rune) bool { 149 | return isUnsafe(upper, r) 150 | } 151 | 152 | // SimpleFold iterates over Unicode code points equivalent under 153 | // the Unicode-defined simple case folding. Among the code points 154 | // equivalent to rune (including rune itself), SimpleFold returns the 155 | // smallest rune > r if one exists, or else the smallest rune >= 0. 156 | // If r is not a valid Unicode code point, SimpleFold(r) returns r. 157 | // 158 | // For example: 159 | // SimpleFold('A') = 'a' 160 | // SimpleFold('a') = 'A' 161 | // 162 | // SimpleFold('K') = 'k' 163 | // SimpleFold('k') = '\u212A' (Kelvin symbol, K) 164 | // SimpleFold('\u212A') = 'K' 165 | // 166 | // SimpleFold('1') = '1' 167 | // 168 | // SimpleFold(-2) = -2 169 | // 170 | func SimpleFold(r rune) rune { 171 | if !utf8.ValidRune(r) { 172 | return r 173 | } 174 | 175 | return properties[r].folded 176 | } 177 | 178 | // SimpleFoldUnsafe is the unsafe version of SimpleFold. 179 | func SimpleFoldUnsafe(r rune) rune { 180 | return properties[r].folded 181 | } 182 | 183 | func isUnsafe(t uint, r rune) bool { 184 | return properties[r].flags&t != 0 185 | } 186 | 187 | func is(t uint, r rune) bool { 188 | return r <= unicode.MaxRune && properties[r].flags&t != 0 189 | } 190 | 191 | // To maps the rune to the specified case: UpperCase, LowerCase, or TitleCase. 192 | func To(_case int, r rune) rune { 193 | switch _case { 194 | case unicode.LowerCase: 195 | return ToLower(r) 196 | case unicode.TitleCase: 197 | return ToTitle(r) 198 | case unicode.UpperCase: 199 | return ToUpper(r) 200 | } 201 | 202 | // Emulate standard library behaviour 203 | return unicode.ReplacementChar 204 | } 205 | 206 | // ToUnsafe is the unsafe version of To. 207 | func ToUnsafe(_case int, r rune) rune { 208 | switch _case { 209 | case unicode.LowerCase: 210 | return ToLowerUnsafe(r) 211 | case unicode.TitleCase: 212 | return ToTitleUnsafe(r) 213 | case unicode.UpperCase: 214 | return ToUpperUnsafe(r) 215 | } 216 | 217 | // Emulate standard library behaviour 218 | return unicode.ReplacementChar 219 | } 220 | 221 | // ToLower maps the rune to lower case. 222 | func ToLower(r rune) rune { 223 | if !utf8.ValidRune(r) { 224 | return r 225 | } 226 | 227 | return properties[r].lower 228 | } 229 | 230 | // ToLowerUnsafe is the unsafe version of ToLower. 231 | func ToLowerUnsafe(r rune) rune { 232 | return properties[r].lower 233 | } 234 | 235 | // ToTitle maps the rune to title case. 236 | func ToTitle(r rune) rune { 237 | if !utf8.ValidRune(r) { 238 | return r 239 | } 240 | 241 | return properties[r].title 242 | } 243 | 244 | // ToTitleUnsafe is the unsafe version of ToTitle. 245 | func ToTitleUnsafe(r rune) rune { 246 | return properties[r].title 247 | } 248 | 249 | // ToUpper maps the rune to upper case. 250 | func ToUpper(r rune) rune { 251 | if !utf8.ValidRune(r) { 252 | return r 253 | } 254 | 255 | return properties[r].upper 256 | } 257 | 258 | // ToUpperUnsafe is the unsafe version of ToUpper. 259 | func ToUpperUnsafe(r rune) rune { 260 | return properties[r].upper 261 | } 262 | -------------------------------------------------------------------------------- /cmd/bootstrap/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "go/format" 7 | "io/ioutil" 8 | "strings" 9 | ) 10 | 11 | var ( 12 | suffixes = []string{"", "Unsafe"} 13 | cases = []string{"Lower", "Title", "Upper"} 14 | funcs = []string{ 15 | "IsDigit", "IsGraphic", "IsLetter", "IsLower", "IsMark", "IsNumber", 16 | "IsPrint", "IsPunct", "IsSpace", "IsSymbol", "IsTitle", "IsUpper", 17 | "SimpleFold", 18 | } 19 | ) 20 | 21 | func main() { 22 | errs := make(chan error, 3) 23 | 24 | go func() { 25 | errs <- generateTables() 26 | }() 27 | 28 | go func() { 29 | errs <- generateTests() 30 | }() 31 | 32 | go func() { 33 | errs <- generateBenchmarks() 34 | }() 35 | 36 | for i := 0; i < 3; i++ { 37 | if err := <-errs; err != nil { 38 | panic(err) 39 | } 40 | } 41 | } 42 | 43 | func generateTables() error { 44 | buf := bytes.NewBufferString(` 45 | package runes 46 | 47 | import ( 48 | "unicode" 49 | "unicode/utf8" 50 | ) 51 | 52 | const (`) 53 | 54 | for i, v := range funcs { 55 | if v == "SimpleFold" { 56 | continue 57 | } 58 | 59 | buf.WriteByte('\n') 60 | buf.WriteString(strings.ToLower(v[2:])) 61 | 62 | if i == 0 { 63 | buf.WriteString(" = 1 << iota") 64 | } 65 | } 66 | 67 | buf.WriteString(` 68 | ) 69 | 70 | type char struct { 71 | lower rune 72 | title rune 73 | upper rune 74 | folded rune 75 | flags uint 76 | } 77 | 78 | var properties [unicode.MaxRune + 1]char 79 | 80 | func init() { 81 | for r := rune(0); r <= unicode.MaxRune; r++ { 82 | if !utf8.ValidRune(r) { 83 | continue 84 | } 85 | 86 | var v char 87 | `) 88 | 89 | for _, v := range funcs { 90 | if v == "SimpleFold" { 91 | continue 92 | } 93 | 94 | fmt.Fprintf(buf, ` 95 | 96 | if unicode.%s(r) { 97 | v.flags |= %s 98 | }`, v, strings.ToLower(v[2:])) 99 | } 100 | 101 | buf.WriteString(` 102 | 103 | v.lower = unicode.ToLower(r) 104 | v.title = unicode.ToTitle(r) 105 | v.upper = unicode.ToUpper(r) 106 | v.folded = unicode.SimpleFold(r) 107 | properties[r] = v 108 | } 109 | }`) 110 | 111 | b, err := format.Source(buf.Bytes()) 112 | 113 | if err != nil { 114 | return err 115 | } 116 | 117 | return ioutil.WriteFile("tables.go", b, 0644) 118 | } 119 | 120 | func generateTests() error { 121 | buf := bytes.NewBufferString(` 122 | package runes 123 | 124 | import ( 125 | "io/ioutil" 126 | "strings" 127 | "testing" 128 | "unicode" 129 | ) 130 | 131 | var ( 132 | testString string 133 | testStringLen int64 134 | ) 135 | 136 | func init() { 137 | b, err := ioutil.ReadFile("./testdata/corpus.txt") 138 | 139 | if err != nil { 140 | panic(err) 141 | } 142 | 143 | testString = string(b) 144 | testStringLen = int64(len(testString)) 145 | } 146 | 147 | func TestFuncs(t *testing.T) { 148 | for r := rune(0); r <= unicode.MaxRune; r++ {`) 149 | 150 | for i, v := range funcs { 151 | if i > 0 { 152 | buf.WriteString("\n") 153 | } 154 | 155 | for j, suffix := range suffixes { 156 | if v == "SimpleFold" && suffix == "Unsafe" { 157 | continue 158 | } 159 | 160 | if j > 0 { 161 | buf.WriteString("\n") 162 | } 163 | 164 | fmt.Fprintf(buf, ` 165 | if %[2]s%[1]s(r) != unicode.%[2]s(r) { 166 | t.Fatalf("%[2]s%[1]s failed for: %%U, expected %%v, got %%v", 167 | r, unicode.%[2]s(r), %[2]s%[1]s(r)) 168 | }`, suffix, v) 169 | } 170 | } 171 | 172 | for _, v := range cases { 173 | fmt.Fprintf(buf, ` 174 | 175 | if To%[1]s(r) != unicode.To%[1]s(r) { 176 | t.Fatalf("%[1]s failed for: %%U, expected %%U, got %%U", 177 | r, unicode.To%[1]s(r), To%[1]s(r)) 178 | } 179 | 180 | if To(unicode.%[1]sCase, r) != unicode.To(unicode.%[1]sCase, r) { 181 | t.Fatalf("To failed for: %%U, expected %%U, got %%U", 182 | r, unicode.To(unicode.%[1]sCase, r), To(unicode.%[1]sCase, r)) 183 | }`, v) 184 | } 185 | 186 | buf.WriteString(` 187 | } 188 | 189 | const ( 190 | _case = 99 191 | r = 'A' 192 | ) 193 | 194 | if To(_case, r) != unicode.To(_case, r) { 195 | t.Fatalf("To failed for: %U, expected %U, got %U", 196 | r, unicode.To(_case, r), To(_case, r)) 197 | }`) 198 | 199 | for i, v := range cases { 200 | if i > 0 { 201 | buf.WriteByte('\n') 202 | } 203 | 204 | fmt.Fprintf(buf, ` 205 | 206 | if strings.Map(To%[1]s, testString) != strings.Map(unicode.To%[1]s, testString) { 207 | t.Fatal("To%[1]s failed ") 208 | }`, v) 209 | } 210 | 211 | buf.WriteString("\n}") 212 | 213 | b, err := format.Source(buf.Bytes()) 214 | 215 | if err != nil { 216 | return err 217 | } 218 | 219 | return ioutil.WriteFile("runes_test.go", b, 0644) 220 | } 221 | 222 | func generateBenchmarks() error { 223 | for _, prefix := range []string{"", "unicode."} { 224 | buf := bytes.NewBufferString("// +build ") 225 | 226 | if prefix == "" { 227 | buf.WriteByte('!') 228 | } 229 | 230 | buf.WriteString(`std 231 | 232 | package runes 233 | 234 | import ( 235 | "strings" 236 | "testing" 237 | "unicode" 238 | "unicode/utf8" 239 | ) 240 | `) 241 | 242 | for _, v := range funcs { 243 | if v == "SimpleFold" { 244 | continue 245 | } 246 | 247 | for _, suffix := range suffixes { 248 | title := suffix 249 | 250 | if prefix != "" { 251 | suffix = "" 252 | } 253 | 254 | fmt.Fprintf(buf, ` 255 | 256 | func Benchmark%[1]s%[5]s(b *testing.B) { 257 | pos, neg := getTestCase(%[2]s) 258 | 259 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 260 | b.ResetTimer() 261 | 262 | for i := 0; i <= b.N; i++ { 263 | %[3]s%[1]s%[4]s(pos) 264 | %[3]s%[1]s%[4]s(neg) 265 | } 266 | }`, v, strings.ToLower(v[2:]), prefix, suffix, title) 267 | } 268 | } 269 | 270 | for _, suffix := range suffixes { 271 | title := suffix 272 | 273 | if prefix != "" { 274 | suffix = "" 275 | } 276 | 277 | fmt.Fprintf(buf, ` 278 | 279 | func BenchmarkSimpleFold%[3]s(b *testing.B) { 280 | pos, neg := '\u212A', 'a' 281 | 282 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 283 | b.ResetTimer() 284 | 285 | for i := 0; i <= b.N; i++ { 286 | %[1]sSimpleFold%[2]s(pos) 287 | %[1]sSimpleFold%[2]s(neg) 288 | } 289 | }`, prefix, suffix, title) 290 | } 291 | 292 | for _, v := range cases { 293 | for _, suffix := range suffixes { 294 | title := suffix 295 | 296 | if prefix != "" { 297 | suffix = "" 298 | } 299 | 300 | fmt.Fprintf(buf, ` 301 | 302 | func BenchmarkTo%[1]s%[4]s(b *testing.B) { 303 | b.SetBytes(testStringLen) 304 | b.ResetTimer() 305 | 306 | for i := 0; i <= b.N; i++ { 307 | strings.Map(%[2]sTo%[1]s%[3]s, testString) 308 | } 309 | }`, v, prefix, suffix, title) 310 | } 311 | } 312 | 313 | for _, v := range cases { 314 | for _, suffix := range suffixes { 315 | title := suffix 316 | 317 | if prefix != "" { 318 | suffix = "" 319 | } 320 | 321 | fmt.Fprintf(buf, ` 322 | 323 | func BenchmarkStringTo%[1]sCase%[4]s(b *testing.B) { 324 | f := func (r rune) rune { 325 | return %[2]sTo%[3]s(unicode.%[1]sCase, r) 326 | } 327 | 328 | b.SetBytes(testStringLen) 329 | b.ResetTimer() 330 | 331 | for i := 0; i <= b.N; i++ { 332 | strings.Map(f, testString) 333 | } 334 | }`, v, prefix, suffix, title) 335 | } 336 | } 337 | 338 | b, err := format.Source(buf.Bytes()) 339 | 340 | if err != nil { 341 | return err 342 | } 343 | 344 | file := "bench_" 345 | 346 | if prefix != "" { 347 | file += "std_" 348 | } 349 | 350 | if err = ioutil.WriteFile(file+"test.go", b, 0644); err != nil { 351 | return err 352 | } 353 | } 354 | 355 | return nil 356 | } 357 | -------------------------------------------------------------------------------- /testdata/corpus.txt: -------------------------------------------------------------------------------- 1 | Charles Robert Darwin, FRS FRGS FLS FZS (/ˈdɑːrwɪn/; 12 February 1809 – 19 April 1882) was an English naturalist, geologist and biologist, best known for his contributions to the science of evolution.[I] He established that all species of life have descended over time from common ancestors and, in a joint publication with Alfred Russel Wallace, introduced his scientific theory that this branching pattern of evolution resulted from a process that he called natural selection, in which the struggle for existence has a similar effect to the artificial selection involved in selective breeding. 2 | 3 | Darwin published his theory of evolution with compelling evidence in his 1859 book On the Origin of Species, overcoming scientific rejection of earlier concepts of transmutation of species. By the 1870s, the scientific community and much of the general public had accepted evolution as a fact. However, many favoured competing explanations and it was not until the emergence of the modern evolutionary synthesis from the 1930s to the 1950s that a broad consensus developed in which natural selection was the basic mechanism of evolution. Darwin's scientific discovery is the unifying theory of the life sciences, explaining the diversity of life. 4 | 5 | Darwin's early interest in nature led him to neglect his medical education at the University of Edinburgh; instead, he helped to investigate marine invertebrates. Studies at the University of Cambridge (Christ's College) encouraged his passion for natural science. His five-year voyage on HMS Beagle established him as an eminent geologist whose observations and theories supported Charles Lyell's uniformitarian ideas, and publication of his journal of the voyage made him famous as a popular author. 6 | 7 | Puzzled by the geographical distribution of wildlife and fossils he collected on the voyage, Darwin began detailed investigations, and in 1838 conceived his theory of natural selection. Although he discussed his ideas with several naturalists, he needed time for extensive research and his geological work had priority. He was writing up his theory in 1858 when Alfred Russel Wallace sent him an essay that described the same idea, prompting immediate joint publication of both of their theories. Darwin's work established evolutionary descent with modification as the dominant scientific explanation of diversification in nature. In 1871 he examined human evolution and sexual selection in The Descent of Man, and Selection in Relation to Sex, followed by The Expression of the Emotions in Man and Animals (1872). His research on plants was published in a series of books, and in his final book, The Formation of Vegetable Mould, through the Actions of Worms (1881), he examined earthworms and their effect on soil. 8 | 9 | Darwin has been described as one of the most influential figures in human history, and he was honoured by burial in Westminster Abbey. 10 | 11 | شارلز روبرت داروين (بالإنجليزية: Charles Robert Darwin) عالم تاريخ طبيعي وجيولوجي بريطاني ولد في إنجلترا في 12 فبراير 1809 في شرو سبوري لعائلة إنجليزية علمية وتوفي في 19 أبريل 1882. والده هو الدكتور روبرت وارنج داروين، وكان جده "ارازموس داروين" عالماً ومؤلفاً بدوره. 12 | 13 | اكتسب داروين شهرته كمؤسس لنظرية التطور والتي تنص على أن كل الكائنات الحية على مر الزمان تنحدر من أسلاف مشتركة ، وقام باقتراح نظرية تتضمن أن هذه الأنماط المتفرعة من عملية التطور ناتجة لعملية وصفها بالانتقاء (الانتخاب) الطبيعي، وكذلك الصراع من أجل البقاء له نفس تأثير الاختيار الصناعي المساهم في التكاثر الانتقائي للكائنات الحية. ومن خلال ملاحظاته للأحياء قام داروين بدراسة التحول في الكائنات الحية عن طريق الطفرات وطوّر نظريته الشهيرة في الانتخاب الطبيعي عام 1838 م. ومع إداركه لردّة الفعل التي يمكن أن تحدثها هذه النظرية، لم يصرّح داروين بنظريته في البداية إلا إلى أصدقائه المقربين في حين تابع أبحاثه ليحضّر نفسه للإجابة على الاعتراضات التي كان يتوقعها على نظريته. وفي عام 1858 م بلغ داروين أن هنالك رجل آخر، وهو ألفريد رسل ووليس، يعمل على نظرية مشابهة لنظريته مما أجبر داروين على نشر نتائج بحثه. 14 | 15 | يعد داروين من أشهر علماء علم الأحياء. ألف عدة كتب في ما يخص هذا الميدان لكن نظريته الشهيرة واجهت انتقاد كبير وخصوصاً من طرف رجال الدين في جميع أنحاء العالم، دارون نفسه ظل حائراً في ما عرف بما سماه الحلقة المفقودة، التي تتوسط الانتقال من طبيعة القردة للإنسان الحديث. في عام 1859 م، قام داروين بنشر نظرية التطور مع أدلة دامغة في كتاب (أصل الأنواع) متغلباً على الرفض الذي تلقاه مسبقاً من المجتمع العلمي على نظرية تحول المخلوقات. في 1870 م، تقبل المجتمع العلمي والمجتمع عامة نظرية التطور كحقيقة.مع ذلك كان الكثير يفضلون التفسيرات الأخرى، واستمر ذلك حتى نشوء التوليفة التطويرية الحديثة، (1930 م - 1950 م) حيث أصبح هناك إجماع واسع على أن الاستمرار الطبيعي كان المحرك الأساسي للتطور. وبصياغة أخرى فإن اكتشاف داروين العلمي هو نظرية موحدة لكل علوم الأحياء وموضحة للتنوع فيها. 16 | 17 | قاده اهتمامه المبكر بالطبيعة إلى إهمال تعليمه الطبي في جامعة أدنبرة؛ فبدلاً من دراسة الطب قام بالمساعدة بالدراسات التي تجريها جامعة كامبريج بالتحقيق عن اللافقريات البحرية. وهذا عزز حبه للعلوم الطبيعية وجعلته رحلته على (سفينة بيجل التابعة للملكية البريطانية HMS Beagle) ذات الـخمس سنوات عالم جيولوجيا بارز حيث دعمت ملاحظاته ونظرياته أفكار العالم تشارليز ليل (Charles Lyell)وكذلك نشره لمذكرات رحلته جعل منه كاتباً مشهوراً. كان مأخوذاً بالتوزيع الجغرافي للحياة البرية والأحافير التي جمعها أثناء رحلته. وفي عام 1838 م، بدأ داروين بتحقيقات دقيقة رسخت نظريته في الانتقاء الطبيعي. وعلى الرغم من مناقشة أفكاره مع العديد من علماء الطبيعة، إلا أنه احتاج إلى مزيد من الوقت ليقوم ببحث مستفيض، وكان لعمله الجيولوجي الأولوية. 18 | 19 | في عام 1859 م، عندما كان يكتب نظريته قام العالم ألفريد راسيل والس (Alfred Russel Wallace) بإرسال مقالاً إليه شارحاً به نفس الفكرة مما دفعهم لنشر منشور مشترك يضم كلا النظريتين نظرية داروين عن أصل التطور قامت بالشرح والتفسير العلمي للتنوع في الطبيعة. في عام 1871 م، دَرسَ تطور الإنسان والانتقاء الجنسي في كتاب (علاقة أصل الإنسان والاختيار بالجنس)، يتبعه بـ (التعبير عن العواطف عند الإنسان والحيوان). وقد نَشرت أبحاثه عن النباتات في سلسلة من الكتب، وفي كتابه الأخير، قام بفحص ديدان الأرض وتأثيرها على التربة. وتقديراً لتفوقه كعالم كُرِّم داروين بجنازة رسمية وتم دفنه في كنيسة وستمنستر (Westminster Abbey) بالقرب من جون هرشل وإسحاق نيوتن وقد وّصف دارون كواحد من أكثر الشخصيات المؤثرة في العالم. 20 | 21 | चार्ल्स डार्विन (१२ फरवरी, १८०९ – १९ अप्रैल १८८२) ने क्रमविकास (evolution) के सिद्धान्त का प्रतिपादन किया। उनका शोध आंशिक रूप से १८३१ से १८३६ में एचएमएस बीगल पर उनकी समुद्र यात्रा के संग्रहों पर आधारित था। इनमें से कई संग्रह इस संग्रहालय में अभी भी उपस्थित हैं। डार्विन महान वैज्ञानिक थे - आज जो हम सजीव चीजें देखते हैं, उनकी उत्पत्ति तथा विविधता को समझने के लिए उनका विकास का सिद्धान्त सर्वश्रेष्ठ माध्यम बन चुका है। 22 | 23 | संचार डार्विन के शोध का केन्द्र-बिन्दु था। उनकी सर्वाधिक प्रसिद्ध पुस्तक जीवजाति का उद्भव (Origin of Species (हिंदी में - 'ऑरिजिन ऑफ स्पीसीज़')) प्रजातियों की उत्पत्ति सामान्य पाठकों पर केंद्रित थी। डार्विन चाहते थे कि उनका सिद्धान्त यथासम्भव व्यापक रूप से प्रसारित हो। 24 | 25 | डार्विन के विकास के सिद्धान्त से हमें यह समझने में मदद मिलती है कि किस प्रकार विभिन्न प्रजातियां एक दूसरे के साथ जुङी हुई हैं। उदाहरणतः वैज्ञानिक यह समझने का प्रयास कर रहे हैं कि रूस की बैकाल झील में प्रजातियों की विविधता कैसे विकसित हुई। 26 | 27 | Чарлз Ро́берт Да́рвин (англ. Charles Robert Darwin (tʃɑrlz 'dɑː.wɪn); 12 февраля 1809 — 19 апреля 1882) — английский натуралист и путешественник, одним из первых пришедший к выводу, и обосновавший идею о том, что все виды живых организмов эволюционируют во времени, и происходят от общих предков. В своей теории, развёрнутое изложение которой было опубликовано в 1859 году в книге «Происхождение видов», основным механизмом эволюции видов Дарвин назвал естественный отбор. Позднее развивал теорию полового отбора. Ему также принадлежит одно из первых обобщающих исследований о происхождении человека. 28 | 29 | Дарвин опубликовал одну из первых работ по этологии «Выражение эмоций у человека и животных». Другими направлениями его исследований были создание модели возникновения коралловых рифов и определение законов наследственности. По итогам селекционных экспериментов Дарвин выдвинул гипотезу наследственности (пангенезис), которая так и не получила подтверждения. 30 | 31 | Происхождение биологического разнообразия в результате эволюции было признано большинством биологов ещё при жизни Дарвина, в то время как его теория естественного отбора как основного механизма эволюции стала общепризнанной только в 50-х годах XX столетия с появлением синтетической теории эволюции. Идеи и открытия Дарвина в переработанном виде формируют фундамент современной синтетической теории эволюции и составляют основу биологии как обеспечивающие объяснение биоразнообразия. Термин «дарвинизм» используют для обозначения эволюционных моделей, восходящих в основе к идеям Дарвина, а в обыденной речи «дарвинизмом» часто называют эволюционную теорию и современный научный взгляд на эволюцию в целом. 32 | 33 | 查尔斯·罗伯特·达尔文,FRS(英语:Charles Robert Darwin,1809年2月12日-1882年4月19日)又譯達爾溫,英国博物学家、生物學家,達爾文早期因為地質學研究而著名,而後又提出科學證據,證明所有生物物種是由少數共同祖先,經過長時間的自然選擇過程後演化而成。到了1930年代,達爾文的理論成為對演化機制的主要詮釋,並成為現代演化思想的基礎,在科學上可對生物多樣性進行一致且合理的解釋,是現今生物學的基石。 34 | 35 | 在愛丁堡大學研讀醫學期間,達爾文對自然史逐漸產生興趣。而他後來又到劍橋大學學習神學。達爾文在參與小獵犬號的五年航行後,成為一位地質學家。他進行觀察並提出理論來支持查理斯·萊爾的均變思想。回英國後所出版的《小獵犬號航行之旅》,使其成為著名作家。由於在航行期間對所見生物與化石的地理分佈感到困惑,達爾文開始對物種轉變進行研究,並且在1838年得出了他的自然選擇理論。由於這類思想在當時被視為異端,因此達爾文剛開始只對親近的朋友透露這些想法,並持續進行進一步的研究,以應付可能遭遇的反對。到了1858年,華萊士寄給他一篇含有相似理論的論文,促使達爾文決定與其共同發表這項理論。 36 | 37 | 1859年出版的《物種起源》,使起源於共同祖先的演化,成為對自然界多樣性的一項重要科學解釋。之後達爾文《人類與動物的情感表達》以及《人類由來與性擇》中,闡釋人類的演化與性選擇的作用。他也針對植物研究發表了一系列的書籍,在最後一本著作中,達爾文討論了蚯蚓對土壤的影響。為了表彰他傑出成就,達爾文死後安葬於牛頓與約翰·赫歇爾的墓旁,地點就在英國倫敦的西敏寺。 38 | -------------------------------------------------------------------------------- /bench_test.go: -------------------------------------------------------------------------------- 1 | // +build !std 2 | 3 | package runes 4 | 5 | import ( 6 | "strings" 7 | "testing" 8 | "unicode" 9 | "unicode/utf8" 10 | ) 11 | 12 | func BenchmarkIsDigit(b *testing.B) { 13 | pos, neg := getTestCase(digit) 14 | 15 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 16 | b.ResetTimer() 17 | 18 | for i := 0; i <= b.N; i++ { 19 | IsDigit(pos) 20 | IsDigit(neg) 21 | } 22 | } 23 | 24 | func BenchmarkIsDigitUnsafe(b *testing.B) { 25 | pos, neg := getTestCase(digit) 26 | 27 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 28 | b.ResetTimer() 29 | 30 | for i := 0; i <= b.N; i++ { 31 | IsDigitUnsafe(pos) 32 | IsDigitUnsafe(neg) 33 | } 34 | } 35 | 36 | func BenchmarkIsGraphic(b *testing.B) { 37 | pos, neg := getTestCase(graphic) 38 | 39 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 40 | b.ResetTimer() 41 | 42 | for i := 0; i <= b.N; i++ { 43 | IsGraphic(pos) 44 | IsGraphic(neg) 45 | } 46 | } 47 | 48 | func BenchmarkIsGraphicUnsafe(b *testing.B) { 49 | pos, neg := getTestCase(graphic) 50 | 51 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 52 | b.ResetTimer() 53 | 54 | for i := 0; i <= b.N; i++ { 55 | IsGraphicUnsafe(pos) 56 | IsGraphicUnsafe(neg) 57 | } 58 | } 59 | 60 | func BenchmarkIsLetter(b *testing.B) { 61 | pos, neg := getTestCase(letter) 62 | 63 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 64 | b.ResetTimer() 65 | 66 | for i := 0; i <= b.N; i++ { 67 | IsLetter(pos) 68 | IsLetter(neg) 69 | } 70 | } 71 | 72 | func BenchmarkIsLetterUnsafe(b *testing.B) { 73 | pos, neg := getTestCase(letter) 74 | 75 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 76 | b.ResetTimer() 77 | 78 | for i := 0; i <= b.N; i++ { 79 | IsLetterUnsafe(pos) 80 | IsLetterUnsafe(neg) 81 | } 82 | } 83 | 84 | func BenchmarkIsLower(b *testing.B) { 85 | pos, neg := getTestCase(lower) 86 | 87 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 88 | b.ResetTimer() 89 | 90 | for i := 0; i <= b.N; i++ { 91 | IsLower(pos) 92 | IsLower(neg) 93 | } 94 | } 95 | 96 | func BenchmarkIsLowerUnsafe(b *testing.B) { 97 | pos, neg := getTestCase(lower) 98 | 99 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 100 | b.ResetTimer() 101 | 102 | for i := 0; i <= b.N; i++ { 103 | IsLowerUnsafe(pos) 104 | IsLowerUnsafe(neg) 105 | } 106 | } 107 | 108 | func BenchmarkIsMark(b *testing.B) { 109 | pos, neg := getTestCase(mark) 110 | 111 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 112 | b.ResetTimer() 113 | 114 | for i := 0; i <= b.N; i++ { 115 | IsMark(pos) 116 | IsMark(neg) 117 | } 118 | } 119 | 120 | func BenchmarkIsMarkUnsafe(b *testing.B) { 121 | pos, neg := getTestCase(mark) 122 | 123 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 124 | b.ResetTimer() 125 | 126 | for i := 0; i <= b.N; i++ { 127 | IsMarkUnsafe(pos) 128 | IsMarkUnsafe(neg) 129 | } 130 | } 131 | 132 | func BenchmarkIsNumber(b *testing.B) { 133 | pos, neg := getTestCase(number) 134 | 135 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 136 | b.ResetTimer() 137 | 138 | for i := 0; i <= b.N; i++ { 139 | IsNumber(pos) 140 | IsNumber(neg) 141 | } 142 | } 143 | 144 | func BenchmarkIsNumberUnsafe(b *testing.B) { 145 | pos, neg := getTestCase(number) 146 | 147 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 148 | b.ResetTimer() 149 | 150 | for i := 0; i <= b.N; i++ { 151 | IsNumberUnsafe(pos) 152 | IsNumberUnsafe(neg) 153 | } 154 | } 155 | 156 | func BenchmarkIsPrint(b *testing.B) { 157 | pos, neg := getTestCase(print) 158 | 159 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 160 | b.ResetTimer() 161 | 162 | for i := 0; i <= b.N; i++ { 163 | IsPrint(pos) 164 | IsPrint(neg) 165 | } 166 | } 167 | 168 | func BenchmarkIsPrintUnsafe(b *testing.B) { 169 | pos, neg := getTestCase(print) 170 | 171 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 172 | b.ResetTimer() 173 | 174 | for i := 0; i <= b.N; i++ { 175 | IsPrintUnsafe(pos) 176 | IsPrintUnsafe(neg) 177 | } 178 | } 179 | 180 | func BenchmarkIsPunct(b *testing.B) { 181 | pos, neg := getTestCase(punct) 182 | 183 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 184 | b.ResetTimer() 185 | 186 | for i := 0; i <= b.N; i++ { 187 | IsPunct(pos) 188 | IsPunct(neg) 189 | } 190 | } 191 | 192 | func BenchmarkIsPunctUnsafe(b *testing.B) { 193 | pos, neg := getTestCase(punct) 194 | 195 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 196 | b.ResetTimer() 197 | 198 | for i := 0; i <= b.N; i++ { 199 | IsPunctUnsafe(pos) 200 | IsPunctUnsafe(neg) 201 | } 202 | } 203 | 204 | func BenchmarkIsSpace(b *testing.B) { 205 | pos, neg := getTestCase(space) 206 | 207 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 208 | b.ResetTimer() 209 | 210 | for i := 0; i <= b.N; i++ { 211 | IsSpace(pos) 212 | IsSpace(neg) 213 | } 214 | } 215 | 216 | func BenchmarkIsSpaceUnsafe(b *testing.B) { 217 | pos, neg := getTestCase(space) 218 | 219 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 220 | b.ResetTimer() 221 | 222 | for i := 0; i <= b.N; i++ { 223 | IsSpaceUnsafe(pos) 224 | IsSpaceUnsafe(neg) 225 | } 226 | } 227 | 228 | func BenchmarkIsSymbol(b *testing.B) { 229 | pos, neg := getTestCase(symbol) 230 | 231 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 232 | b.ResetTimer() 233 | 234 | for i := 0; i <= b.N; i++ { 235 | IsSymbol(pos) 236 | IsSymbol(neg) 237 | } 238 | } 239 | 240 | func BenchmarkIsSymbolUnsafe(b *testing.B) { 241 | pos, neg := getTestCase(symbol) 242 | 243 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 244 | b.ResetTimer() 245 | 246 | for i := 0; i <= b.N; i++ { 247 | IsSymbolUnsafe(pos) 248 | IsSymbolUnsafe(neg) 249 | } 250 | } 251 | 252 | func BenchmarkIsTitle(b *testing.B) { 253 | pos, neg := getTestCase(title) 254 | 255 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 256 | b.ResetTimer() 257 | 258 | for i := 0; i <= b.N; i++ { 259 | IsTitle(pos) 260 | IsTitle(neg) 261 | } 262 | } 263 | 264 | func BenchmarkIsTitleUnsafe(b *testing.B) { 265 | pos, neg := getTestCase(title) 266 | 267 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 268 | b.ResetTimer() 269 | 270 | for i := 0; i <= b.N; i++ { 271 | IsTitleUnsafe(pos) 272 | IsTitleUnsafe(neg) 273 | } 274 | } 275 | 276 | func BenchmarkIsUpper(b *testing.B) { 277 | pos, neg := getTestCase(upper) 278 | 279 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 280 | b.ResetTimer() 281 | 282 | for i := 0; i <= b.N; i++ { 283 | IsUpper(pos) 284 | IsUpper(neg) 285 | } 286 | } 287 | 288 | func BenchmarkIsUpperUnsafe(b *testing.B) { 289 | pos, neg := getTestCase(upper) 290 | 291 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 292 | b.ResetTimer() 293 | 294 | for i := 0; i <= b.N; i++ { 295 | IsUpperUnsafe(pos) 296 | IsUpperUnsafe(neg) 297 | } 298 | } 299 | 300 | func BenchmarkSimpleFold(b *testing.B) { 301 | pos, neg := '\u212A', 'a' 302 | 303 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 304 | b.ResetTimer() 305 | 306 | for i := 0; i <= b.N; i++ { 307 | SimpleFold(pos) 308 | SimpleFold(neg) 309 | } 310 | } 311 | 312 | func BenchmarkSimpleFoldUnsafe(b *testing.B) { 313 | pos, neg := '\u212A', 'a' 314 | 315 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 316 | b.ResetTimer() 317 | 318 | for i := 0; i <= b.N; i++ { 319 | SimpleFoldUnsafe(pos) 320 | SimpleFoldUnsafe(neg) 321 | } 322 | } 323 | 324 | func BenchmarkToLower(b *testing.B) { 325 | b.SetBytes(testStringLen) 326 | b.ResetTimer() 327 | 328 | for i := 0; i <= b.N; i++ { 329 | strings.Map(ToLower, testString) 330 | } 331 | } 332 | 333 | func BenchmarkToLowerUnsafe(b *testing.B) { 334 | b.SetBytes(testStringLen) 335 | b.ResetTimer() 336 | 337 | for i := 0; i <= b.N; i++ { 338 | strings.Map(ToLowerUnsafe, testString) 339 | } 340 | } 341 | 342 | func BenchmarkToTitle(b *testing.B) { 343 | b.SetBytes(testStringLen) 344 | b.ResetTimer() 345 | 346 | for i := 0; i <= b.N; i++ { 347 | strings.Map(ToTitle, testString) 348 | } 349 | } 350 | 351 | func BenchmarkToTitleUnsafe(b *testing.B) { 352 | b.SetBytes(testStringLen) 353 | b.ResetTimer() 354 | 355 | for i := 0; i <= b.N; i++ { 356 | strings.Map(ToTitleUnsafe, testString) 357 | } 358 | } 359 | 360 | func BenchmarkToUpper(b *testing.B) { 361 | b.SetBytes(testStringLen) 362 | b.ResetTimer() 363 | 364 | for i := 0; i <= b.N; i++ { 365 | strings.Map(ToUpper, testString) 366 | } 367 | } 368 | 369 | func BenchmarkToUpperUnsafe(b *testing.B) { 370 | b.SetBytes(testStringLen) 371 | b.ResetTimer() 372 | 373 | for i := 0; i <= b.N; i++ { 374 | strings.Map(ToUpperUnsafe, testString) 375 | } 376 | } 377 | 378 | func BenchmarkStringToLowerCase(b *testing.B) { 379 | f := func(r rune) rune { 380 | return To(unicode.LowerCase, r) 381 | } 382 | 383 | b.SetBytes(testStringLen) 384 | b.ResetTimer() 385 | 386 | for i := 0; i <= b.N; i++ { 387 | strings.Map(f, testString) 388 | } 389 | } 390 | 391 | func BenchmarkStringToLowerCaseUnsafe(b *testing.B) { 392 | f := func(r rune) rune { 393 | return ToUnsafe(unicode.LowerCase, r) 394 | } 395 | 396 | b.SetBytes(testStringLen) 397 | b.ResetTimer() 398 | 399 | for i := 0; i <= b.N; i++ { 400 | strings.Map(f, testString) 401 | } 402 | } 403 | 404 | func BenchmarkStringToTitleCase(b *testing.B) { 405 | f := func(r rune) rune { 406 | return To(unicode.TitleCase, r) 407 | } 408 | 409 | b.SetBytes(testStringLen) 410 | b.ResetTimer() 411 | 412 | for i := 0; i <= b.N; i++ { 413 | strings.Map(f, testString) 414 | } 415 | } 416 | 417 | func BenchmarkStringToTitleCaseUnsafe(b *testing.B) { 418 | f := func(r rune) rune { 419 | return ToUnsafe(unicode.TitleCase, r) 420 | } 421 | 422 | b.SetBytes(testStringLen) 423 | b.ResetTimer() 424 | 425 | for i := 0; i <= b.N; i++ { 426 | strings.Map(f, testString) 427 | } 428 | } 429 | 430 | func BenchmarkStringToUpperCase(b *testing.B) { 431 | f := func(r rune) rune { 432 | return To(unicode.UpperCase, r) 433 | } 434 | 435 | b.SetBytes(testStringLen) 436 | b.ResetTimer() 437 | 438 | for i := 0; i <= b.N; i++ { 439 | strings.Map(f, testString) 440 | } 441 | } 442 | 443 | func BenchmarkStringToUpperCaseUnsafe(b *testing.B) { 444 | f := func(r rune) rune { 445 | return ToUnsafe(unicode.UpperCase, r) 446 | } 447 | 448 | b.SetBytes(testStringLen) 449 | b.ResetTimer() 450 | 451 | for i := 0; i <= b.N; i++ { 452 | strings.Map(f, testString) 453 | } 454 | } 455 | -------------------------------------------------------------------------------- /bench_std_test.go: -------------------------------------------------------------------------------- 1 | // +build std 2 | 3 | package runes 4 | 5 | import ( 6 | "strings" 7 | "testing" 8 | "unicode" 9 | "unicode/utf8" 10 | ) 11 | 12 | func BenchmarkIsDigit(b *testing.B) { 13 | pos, neg := getTestCase(digit) 14 | 15 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 16 | b.ResetTimer() 17 | 18 | for i := 0; i <= b.N; i++ { 19 | unicode.IsDigit(pos) 20 | unicode.IsDigit(neg) 21 | } 22 | } 23 | 24 | func BenchmarkIsDigitUnsafe(b *testing.B) { 25 | pos, neg := getTestCase(digit) 26 | 27 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 28 | b.ResetTimer() 29 | 30 | for i := 0; i <= b.N; i++ { 31 | unicode.IsDigit(pos) 32 | unicode.IsDigit(neg) 33 | } 34 | } 35 | 36 | func BenchmarkIsGraphic(b *testing.B) { 37 | pos, neg := getTestCase(graphic) 38 | 39 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 40 | b.ResetTimer() 41 | 42 | for i := 0; i <= b.N; i++ { 43 | unicode.IsGraphic(pos) 44 | unicode.IsGraphic(neg) 45 | } 46 | } 47 | 48 | func BenchmarkIsGraphicUnsafe(b *testing.B) { 49 | pos, neg := getTestCase(graphic) 50 | 51 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 52 | b.ResetTimer() 53 | 54 | for i := 0; i <= b.N; i++ { 55 | unicode.IsGraphic(pos) 56 | unicode.IsGraphic(neg) 57 | } 58 | } 59 | 60 | func BenchmarkIsLetter(b *testing.B) { 61 | pos, neg := getTestCase(letter) 62 | 63 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 64 | b.ResetTimer() 65 | 66 | for i := 0; i <= b.N; i++ { 67 | unicode.IsLetter(pos) 68 | unicode.IsLetter(neg) 69 | } 70 | } 71 | 72 | func BenchmarkIsLetterUnsafe(b *testing.B) { 73 | pos, neg := getTestCase(letter) 74 | 75 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 76 | b.ResetTimer() 77 | 78 | for i := 0; i <= b.N; i++ { 79 | unicode.IsLetter(pos) 80 | unicode.IsLetter(neg) 81 | } 82 | } 83 | 84 | func BenchmarkIsLower(b *testing.B) { 85 | pos, neg := getTestCase(lower) 86 | 87 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 88 | b.ResetTimer() 89 | 90 | for i := 0; i <= b.N; i++ { 91 | unicode.IsLower(pos) 92 | unicode.IsLower(neg) 93 | } 94 | } 95 | 96 | func BenchmarkIsLowerUnsafe(b *testing.B) { 97 | pos, neg := getTestCase(lower) 98 | 99 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 100 | b.ResetTimer() 101 | 102 | for i := 0; i <= b.N; i++ { 103 | unicode.IsLower(pos) 104 | unicode.IsLower(neg) 105 | } 106 | } 107 | 108 | func BenchmarkIsMark(b *testing.B) { 109 | pos, neg := getTestCase(mark) 110 | 111 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 112 | b.ResetTimer() 113 | 114 | for i := 0; i <= b.N; i++ { 115 | unicode.IsMark(pos) 116 | unicode.IsMark(neg) 117 | } 118 | } 119 | 120 | func BenchmarkIsMarkUnsafe(b *testing.B) { 121 | pos, neg := getTestCase(mark) 122 | 123 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 124 | b.ResetTimer() 125 | 126 | for i := 0; i <= b.N; i++ { 127 | unicode.IsMark(pos) 128 | unicode.IsMark(neg) 129 | } 130 | } 131 | 132 | func BenchmarkIsNumber(b *testing.B) { 133 | pos, neg := getTestCase(number) 134 | 135 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 136 | b.ResetTimer() 137 | 138 | for i := 0; i <= b.N; i++ { 139 | unicode.IsNumber(pos) 140 | unicode.IsNumber(neg) 141 | } 142 | } 143 | 144 | func BenchmarkIsNumberUnsafe(b *testing.B) { 145 | pos, neg := getTestCase(number) 146 | 147 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 148 | b.ResetTimer() 149 | 150 | for i := 0; i <= b.N; i++ { 151 | unicode.IsNumber(pos) 152 | unicode.IsNumber(neg) 153 | } 154 | } 155 | 156 | func BenchmarkIsPrint(b *testing.B) { 157 | pos, neg := getTestCase(print) 158 | 159 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 160 | b.ResetTimer() 161 | 162 | for i := 0; i <= b.N; i++ { 163 | unicode.IsPrint(pos) 164 | unicode.IsPrint(neg) 165 | } 166 | } 167 | 168 | func BenchmarkIsPrintUnsafe(b *testing.B) { 169 | pos, neg := getTestCase(print) 170 | 171 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 172 | b.ResetTimer() 173 | 174 | for i := 0; i <= b.N; i++ { 175 | unicode.IsPrint(pos) 176 | unicode.IsPrint(neg) 177 | } 178 | } 179 | 180 | func BenchmarkIsPunct(b *testing.B) { 181 | pos, neg := getTestCase(punct) 182 | 183 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 184 | b.ResetTimer() 185 | 186 | for i := 0; i <= b.N; i++ { 187 | unicode.IsPunct(pos) 188 | unicode.IsPunct(neg) 189 | } 190 | } 191 | 192 | func BenchmarkIsPunctUnsafe(b *testing.B) { 193 | pos, neg := getTestCase(punct) 194 | 195 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 196 | b.ResetTimer() 197 | 198 | for i := 0; i <= b.N; i++ { 199 | unicode.IsPunct(pos) 200 | unicode.IsPunct(neg) 201 | } 202 | } 203 | 204 | func BenchmarkIsSpace(b *testing.B) { 205 | pos, neg := getTestCase(space) 206 | 207 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 208 | b.ResetTimer() 209 | 210 | for i := 0; i <= b.N; i++ { 211 | unicode.IsSpace(pos) 212 | unicode.IsSpace(neg) 213 | } 214 | } 215 | 216 | func BenchmarkIsSpaceUnsafe(b *testing.B) { 217 | pos, neg := getTestCase(space) 218 | 219 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 220 | b.ResetTimer() 221 | 222 | for i := 0; i <= b.N; i++ { 223 | unicode.IsSpace(pos) 224 | unicode.IsSpace(neg) 225 | } 226 | } 227 | 228 | func BenchmarkIsSymbol(b *testing.B) { 229 | pos, neg := getTestCase(symbol) 230 | 231 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 232 | b.ResetTimer() 233 | 234 | for i := 0; i <= b.N; i++ { 235 | unicode.IsSymbol(pos) 236 | unicode.IsSymbol(neg) 237 | } 238 | } 239 | 240 | func BenchmarkIsSymbolUnsafe(b *testing.B) { 241 | pos, neg := getTestCase(symbol) 242 | 243 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 244 | b.ResetTimer() 245 | 246 | for i := 0; i <= b.N; i++ { 247 | unicode.IsSymbol(pos) 248 | unicode.IsSymbol(neg) 249 | } 250 | } 251 | 252 | func BenchmarkIsTitle(b *testing.B) { 253 | pos, neg := getTestCase(title) 254 | 255 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 256 | b.ResetTimer() 257 | 258 | for i := 0; i <= b.N; i++ { 259 | unicode.IsTitle(pos) 260 | unicode.IsTitle(neg) 261 | } 262 | } 263 | 264 | func BenchmarkIsTitleUnsafe(b *testing.B) { 265 | pos, neg := getTestCase(title) 266 | 267 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 268 | b.ResetTimer() 269 | 270 | for i := 0; i <= b.N; i++ { 271 | unicode.IsTitle(pos) 272 | unicode.IsTitle(neg) 273 | } 274 | } 275 | 276 | func BenchmarkIsUpper(b *testing.B) { 277 | pos, neg := getTestCase(upper) 278 | 279 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 280 | b.ResetTimer() 281 | 282 | for i := 0; i <= b.N; i++ { 283 | unicode.IsUpper(pos) 284 | unicode.IsUpper(neg) 285 | } 286 | } 287 | 288 | func BenchmarkIsUpperUnsafe(b *testing.B) { 289 | pos, neg := getTestCase(upper) 290 | 291 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 292 | b.ResetTimer() 293 | 294 | for i := 0; i <= b.N; i++ { 295 | unicode.IsUpper(pos) 296 | unicode.IsUpper(neg) 297 | } 298 | } 299 | 300 | func BenchmarkSimpleFold(b *testing.B) { 301 | pos, neg := '\u212A', 'a' 302 | 303 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 304 | b.ResetTimer() 305 | 306 | for i := 0; i <= b.N; i++ { 307 | unicode.SimpleFold(pos) 308 | unicode.SimpleFold(neg) 309 | } 310 | } 311 | 312 | func BenchmarkSimpleFoldUnsafe(b *testing.B) { 313 | pos, neg := '\u212A', 'a' 314 | 315 | b.SetBytes(int64(utf8.RuneLen(pos) + utf8.RuneLen(neg))) 316 | b.ResetTimer() 317 | 318 | for i := 0; i <= b.N; i++ { 319 | unicode.SimpleFold(pos) 320 | unicode.SimpleFold(neg) 321 | } 322 | } 323 | 324 | func BenchmarkToLower(b *testing.B) { 325 | b.SetBytes(testStringLen) 326 | b.ResetTimer() 327 | 328 | for i := 0; i <= b.N; i++ { 329 | strings.Map(unicode.ToLower, testString) 330 | } 331 | } 332 | 333 | func BenchmarkToLowerUnsafe(b *testing.B) { 334 | b.SetBytes(testStringLen) 335 | b.ResetTimer() 336 | 337 | for i := 0; i <= b.N; i++ { 338 | strings.Map(unicode.ToLower, testString) 339 | } 340 | } 341 | 342 | func BenchmarkToTitle(b *testing.B) { 343 | b.SetBytes(testStringLen) 344 | b.ResetTimer() 345 | 346 | for i := 0; i <= b.N; i++ { 347 | strings.Map(unicode.ToTitle, testString) 348 | } 349 | } 350 | 351 | func BenchmarkToTitleUnsafe(b *testing.B) { 352 | b.SetBytes(testStringLen) 353 | b.ResetTimer() 354 | 355 | for i := 0; i <= b.N; i++ { 356 | strings.Map(unicode.ToTitle, testString) 357 | } 358 | } 359 | 360 | func BenchmarkToUpper(b *testing.B) { 361 | b.SetBytes(testStringLen) 362 | b.ResetTimer() 363 | 364 | for i := 0; i <= b.N; i++ { 365 | strings.Map(unicode.ToUpper, testString) 366 | } 367 | } 368 | 369 | func BenchmarkToUpperUnsafe(b *testing.B) { 370 | b.SetBytes(testStringLen) 371 | b.ResetTimer() 372 | 373 | for i := 0; i <= b.N; i++ { 374 | strings.Map(unicode.ToUpper, testString) 375 | } 376 | } 377 | 378 | func BenchmarkStringToLowerCase(b *testing.B) { 379 | f := func(r rune) rune { 380 | return unicode.To(unicode.LowerCase, r) 381 | } 382 | 383 | b.SetBytes(testStringLen) 384 | b.ResetTimer() 385 | 386 | for i := 0; i <= b.N; i++ { 387 | strings.Map(f, testString) 388 | } 389 | } 390 | 391 | func BenchmarkStringToLowerCaseUnsafe(b *testing.B) { 392 | f := func(r rune) rune { 393 | return unicode.To(unicode.LowerCase, r) 394 | } 395 | 396 | b.SetBytes(testStringLen) 397 | b.ResetTimer() 398 | 399 | for i := 0; i <= b.N; i++ { 400 | strings.Map(f, testString) 401 | } 402 | } 403 | 404 | func BenchmarkStringToTitleCase(b *testing.B) { 405 | f := func(r rune) rune { 406 | return unicode.To(unicode.TitleCase, r) 407 | } 408 | 409 | b.SetBytes(testStringLen) 410 | b.ResetTimer() 411 | 412 | for i := 0; i <= b.N; i++ { 413 | strings.Map(f, testString) 414 | } 415 | } 416 | 417 | func BenchmarkStringToTitleCaseUnsafe(b *testing.B) { 418 | f := func(r rune) rune { 419 | return unicode.To(unicode.TitleCase, r) 420 | } 421 | 422 | b.SetBytes(testStringLen) 423 | b.ResetTimer() 424 | 425 | for i := 0; i <= b.N; i++ { 426 | strings.Map(f, testString) 427 | } 428 | } 429 | 430 | func BenchmarkStringToUpperCase(b *testing.B) { 431 | f := func(r rune) rune { 432 | return unicode.To(unicode.UpperCase, r) 433 | } 434 | 435 | b.SetBytes(testStringLen) 436 | b.ResetTimer() 437 | 438 | for i := 0; i <= b.N; i++ { 439 | strings.Map(f, testString) 440 | } 441 | } 442 | 443 | func BenchmarkStringToUpperCaseUnsafe(b *testing.B) { 444 | f := func(r rune) rune { 445 | return unicode.To(unicode.UpperCase, r) 446 | } 447 | 448 | b.SetBytes(testStringLen) 449 | b.ResetTimer() 450 | 451 | for i := 0; i <= b.N; i++ { 452 | strings.Map(f, testString) 453 | } 454 | } 455 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Runes 2 | 3 | A drop-in replacement for some features of Go's standard `unicode` package. 4 | 5 | [![Build Status](https://travis-ci.org/martingallagher/runes.svg)](https://travis-ci.org/martingallagher/runes) [![GoDoc](https://godoc.org/github.com/martingallagher/runes?status.svg)](https://godoc.org/github.com/martingallagher/runes) [![Go Report Card](https://goreportcard.com/badge/github.com/martingallagher/runes)](https://goreportcard.com/report/github.com/martingallagher/runes) [![license](https://img.shields.io/github/license/martingallagher/runes.svg)](https://github.com/martingallagher/runes/blob/master/LICENSE) 6 | 7 | This package works by computing an array jump table for fast Unicode property lookup. 8 | 9 | Drawbacks include: 10 | 11 | - The startup cost of computing the array of properties 12 | - Memory consumption of ~25Mb on x86-64 13 | 14 | This package isn't a replacement for the standard `unicode` package; it's recommended this package is trialled when performance is a priority; e.g. mass-text normalisation and segmentation. For input containing highly randomised data across the Unicode spectrum it will perform worse than the standard `unicode` package due to cache locality. As always: measure. 15 | 16 | ## Benchmarks 17 | 18 | Use `make bench` to run the benchmarks. 19 | 20 | - [Intel (x86-64)](#intel) 21 | - [Raspberry Pi 2 Model B (ARMv7)](#raspberry-pi-2) 22 | 23 | ### Intel 24 | 25 | Dell XPS 13 9370 (Intel i7-8550U) / Ubuntu 16.04, CPU in performance mode: 26 | 27 | ``` 28 | benchmark old ns/op new ns/op delta 29 | BenchmarkIsDigit-8 34.1 1.10 -96.77% 30 | BenchmarkIsDigitUnsafe-8 33.8 0.55 -98.37% 31 | BenchmarkIsGraphic-8 127 1.11 -99.13% 32 | BenchmarkIsGraphicUnsafe-8 128 0.83 -99.35% 33 | BenchmarkIsLetter-8 40.9 1.10 -97.31% 34 | BenchmarkIsLetterUnsafe-8 40.7 0.55 -98.65% 35 | BenchmarkIsLower-8 30.2 1.12 -96.29% 36 | BenchmarkIsLowerUnsafe-8 30.2 0.83 -97.25% 37 | BenchmarkIsMark-8 35.0 1.10 -96.86% 38 | BenchmarkIsMarkUnsafe-8 35.1 0.55 -98.43% 39 | BenchmarkIsNumber-8 30.1 1.10 -96.35% 40 | BenchmarkIsNumberUnsafe-8 30.3 0.83 -97.26% 41 | BenchmarkIsPrint-8 125 1.10 -99.12% 42 | BenchmarkIsPrintUnsafe-8 125 0.54 -99.57% 43 | BenchmarkIsPunct-8 30.1 1.11 -96.31% 44 | BenchmarkIsPunctUnsafe-8 29.9 0.82 -97.26% 45 | BenchmarkIsSpace-8 14.2 1.10 -92.25% 46 | BenchmarkIsSpaceUnsafe-8 14.3 0.55 -96.15% 47 | BenchmarkIsSymbol-8 30.5 1.11 -96.36% 48 | BenchmarkIsSymbolUnsafe-8 30.2 0.83 -97.25% 49 | BenchmarkIsTitle-8 14.8 1.10 -92.57% 50 | BenchmarkIsTitleUnsafe-8 14.7 0.55 -96.26% 51 | BenchmarkIsUpper-8 30.0 1.11 -96.30% 52 | BenchmarkIsUpperUnsafe-8 30.2 0.82 -97.28% 53 | BenchmarkSimpleFold-8 15.3 0.28 -98.17% 54 | BenchmarkSimpleFoldUnsafe-8 15.1 0.55 -96.36% 55 | BenchmarkToLower-8 209746 90991 -56.62% 56 | BenchmarkToLowerUnsafe-8 208996 91548 -56.20% 57 | BenchmarkToTitle-8 222480 101441 -54.40% 58 | BenchmarkToTitleUnsafe-8 219774 102589 -53.32% 59 | BenchmarkToUpper-8 223247 103880 -53.47% 60 | BenchmarkToUpperUnsafe-8 221264 104025 -52.99% 61 | BenchmarkStringToLowerCase-8 308085 108448 -64.80% 62 | BenchmarkStringToLowerCaseUnsafe-8 308406 91254 -70.41% 63 | BenchmarkStringToTitleCase-8 318338 120817 -62.05% 64 | BenchmarkStringToTitleCaseUnsafe-8 321452 103651 -67.76% 65 | BenchmarkStringToUpperCase-8 325173 120402 -62.97% 66 | BenchmarkStringToUpperCaseUnsafe-8 326598 104489 -68.01% 67 | 68 | benchmark old MB/s new MB/s speedup 69 | BenchmarkIsDigit-8 234.40 7267.85 31.01x 70 | BenchmarkIsDigitUnsafe-8 236.47 14623.67 61.84x 71 | BenchmarkIsGraphic-8 62.55 7229.24 115.58x 72 | BenchmarkIsGraphicUnsafe-8 62.04 9668.54 155.84x 73 | BenchmarkIsLetter-8 195.50 7264.06 37.16x 74 | BenchmarkIsLetterUnsafe-8 196.44 14480.12 73.71x 75 | BenchmarkIsLower-8 264.99 7117.67 26.86x 76 | BenchmarkIsLowerUnsafe-8 264.74 9621.64 36.34x 77 | BenchmarkIsMark-8 228.38 7278.42 31.87x 78 | BenchmarkIsMarkUnsafe-8 227.88 14459.79 63.45x 79 | BenchmarkIsNumber-8 266.12 7285.09 27.38x 80 | BenchmarkIsNumberUnsafe-8 264.07 9664.48 36.60x 81 | BenchmarkIsPrint-8 63.93 7242.14 113.28x 82 | BenchmarkIsPrintUnsafe-8 63.95 14881.46 232.70x 83 | BenchmarkIsPunct-8 266.03 7217.83 27.13x 84 | BenchmarkIsPunctUnsafe-8 267.55 9734.01 36.38x 85 | BenchmarkIsSpace-8 493.22 6351.52 12.88x 86 | BenchmarkIsSpaceUnsafe-8 490.55 12647.13 25.78x 87 | BenchmarkIsSymbol-8 262.54 7225.87 27.52x 88 | BenchmarkIsSymbolUnsafe-8 265.11 9598.59 36.21x 89 | BenchmarkIsTitle-8 471.93 6381.03 13.52x 90 | BenchmarkIsTitleUnsafe-8 475.34 12706.11 26.73x 91 | BenchmarkIsUpper-8 266.55 7236.34 27.15x 92 | BenchmarkIsUpperUnsafe-8 264.68 9711.01 36.69x 93 | BenchmarkSimpleFold-8 261.92 14454.61 55.19x 94 | BenchmarkSimpleFoldUnsafe-8 264.39 7266.35 27.48x 95 | BenchmarkToLower-8 76.34 175.98 2.31x 96 | BenchmarkToLowerUnsafe-8 76.62 174.91 2.28x 97 | BenchmarkToTitle-8 71.97 157.85 2.19x 98 | BenchmarkToTitleUnsafe-8 72.86 156.09 2.14x 99 | BenchmarkToUpper-8 71.73 154.15 2.15x 100 | BenchmarkToUpperUnsafe-8 72.37 153.93 2.13x 101 | BenchmarkStringToLowerCase-8 51.98 147.66 2.84x 102 | BenchmarkStringToLowerCaseUnsafe-8 51.92 175.48 3.38x 103 | BenchmarkStringToTitleCase-8 50.30 132.54 2.63x 104 | BenchmarkStringToTitleCaseUnsafe-8 49.81 154.49 3.10x 105 | BenchmarkStringToUpperCase-8 49.24 133.00 2.70x 106 | BenchmarkStringToUpperCaseUnsafe-8 49.03 153.25 3.13x 107 | ``` 108 | 109 | ### Raspberry Pi 2 110 | 111 | Raspberry Pi 2 Model B / Ubuntu 16.04: 112 | 113 | ``` 114 | benchmark old ns/op new ns/op delta 115 | BenchmarkIsDigit-4 712 14.7 -97.94% 116 | BenchmarkIsDigitUnsafe-4 716 6.69 -99.07% 117 | BenchmarkIsGraphic-4 2051 13.4 -99.35% 118 | BenchmarkIsGraphicUnsafe-4 2046 6.72 -99.67% 119 | BenchmarkIsLetter-4 601 13.4 -97.77% 120 | BenchmarkIsLetterUnsafe-4 600 6.73 -98.88% 121 | BenchmarkIsLower-4 494 13.4 -97.29% 122 | BenchmarkIsLowerUnsafe-4 495 6.70 -98.65% 123 | BenchmarkIsMark-4 542 13.4 -97.53% 124 | BenchmarkIsMarkUnsafe-4 542 6.73 -98.76% 125 | BenchmarkIsNumber-4 492 14.5 -97.05% 126 | BenchmarkIsNumberUnsafe-4 490 6.69 -98.63% 127 | BenchmarkIsPrint-4 1985 13.4 -99.32% 128 | BenchmarkIsPrintUnsafe-4 1980 6.72 -99.66% 129 | BenchmarkIsPunct-4 499 13.4 -97.31% 130 | BenchmarkIsPunctUnsafe-4 501 6.72 -98.66% 131 | BenchmarkIsSpace-4 315 14.4 -95.43% 132 | BenchmarkIsSpaceUnsafe-4 315 6.69 -97.88% 133 | BenchmarkIsSymbol-4 492 13.4 -97.28% 134 | BenchmarkIsSymbolUnsafe-4 493 6.71 -98.64% 135 | BenchmarkIsTitle-4 334 13.5 -95.96% 136 | BenchmarkIsTitleUnsafe-4 333 6.72 -97.98% 137 | BenchmarkIsUpper-4 495 14.7 -97.03% 138 | BenchmarkIsUpperUnsafe-4 495 6.69 -98.65% 139 | BenchmarkSimpleFold-4 213 4.48 -97.90% 140 | BenchmarkSimpleFoldUnsafe-4 213 4.46 -97.91% 141 | BenchmarkToLower-4 159897 58494 -63.42% 142 | BenchmarkToLowerUnsafe-4 159290 62179 -60.96% 143 | BenchmarkToTitle-4 158902 61126 -61.53% 144 | BenchmarkToTitleUnsafe-4 159651 65347 -59.07% 145 | BenchmarkToUpper-4 159147 61988 -61.05% 146 | BenchmarkToUpperUnsafe-4 159190 55153 -65.35% 147 | BenchmarkStringToLowerCase-4 245836 67834 -72.41% 148 | BenchmarkStringToLowerCaseUnsafe-4 245460 66175 -73.04% 149 | BenchmarkStringToTitleCase-4 245313 81020 -66.97% 150 | BenchmarkStringToTitleCaseUnsafe-4 246914 61785 -74.98% 151 | BenchmarkStringToUpperCase-4 246976 69070 -72.03% 152 | BenchmarkStringToUpperCaseUnsafe-4 246685 55126 -77.65% 153 | 154 | benchmark old MB/s new MB/s speedup 155 | BenchmarkIsDigit-4 11.24 545.27 48.51x 156 | BenchmarkIsDigitUnsafe-4 11.17 1195.03 106.99x 157 | BenchmarkIsGraphic-4 3.90 597.51 153.21x 158 | BenchmarkIsGraphicUnsafe-4 3.91 1190.33 304.43x 159 | BenchmarkIsLetter-4 13.31 595.63 44.75x 160 | BenchmarkIsLetterUnsafe-4 13.33 1188.44 89.16x 161 | BenchmarkIsLower-4 16.18 595.76 36.82x 162 | BenchmarkIsLowerUnsafe-4 16.16 1194.75 73.93x 163 | BenchmarkIsMark-4 14.74 595.65 40.41x 164 | BenchmarkIsMarkUnsafe-4 14.74 1188.45 80.63x 165 | BenchmarkIsNumber-4 16.26 550.34 33.85x 166 | BenchmarkIsNumberUnsafe-4 16.29 1195.30 73.38x 167 | BenchmarkIsPrint-4 4.03 595.63 147.80x 168 | BenchmarkIsPrintUnsafe-4 4.04 1191.12 294.83x 169 | BenchmarkIsPunct-4 16.00 595.16 37.20x 170 | BenchmarkIsPunctUnsafe-4 15.96 1191.26 74.64x 171 | BenchmarkIsSpace-4 22.21 485.16 21.84x 172 | BenchmarkIsSpaceUnsafe-4 22.20 1045.68 47.10x 173 | BenchmarkIsSymbol-4 16.24 595.77 36.69x 174 | BenchmarkIsSymbolUnsafe-4 16.22 1191.50 73.46x 175 | BenchmarkIsTitle-4 20.94 519.86 24.83x 176 | BenchmarkIsTitleUnsafe-4 20.97 1042.31 49.70x 177 | BenchmarkIsUpper-4 16.16 545.92 33.78x 178 | BenchmarkIsUpperUnsafe-4 16.16 1195.17 73.96x 179 | BenchmarkSimpleFold-4 18.73 893.80 47.72x 180 | BenchmarkSimpleFoldUnsafe-4 18.73 896.38 47.86x 181 | BenchmarkToLower-4 4.43 12.10 2.73x 182 | BenchmarkToLowerUnsafe-4 4.44 11.39 2.57x 183 | BenchmarkToTitle-4 4.46 11.58 2.60x 184 | BenchmarkToTitleUnsafe-4 4.43 10.83 2.44x 185 | BenchmarkToUpper-4 4.45 11.42 2.57x 186 | BenchmarkToUpperUnsafe-4 4.45 12.84 2.89x 187 | BenchmarkStringToLowerCase-4 2.88 10.44 3.62x 188 | BenchmarkStringToLowerCaseUnsafe-4 2.88 10.70 3.72x 189 | BenchmarkStringToTitleCase-4 2.89 8.74 3.02x 190 | BenchmarkStringToTitleCaseUnsafe-4 2.87 11.46 3.99x 191 | BenchmarkStringToUpperCase-4 2.87 10.25 3.57x 192 | BenchmarkStringToUpperCaseUnsafe-4 2.87 12.84 4.47x 193 | ``` 194 | --------------------------------------------------------------------------------