├── .github └── workflows │ └── go.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── common.go ├── convert.go ├── convert_test.go ├── count.go ├── count_test.go ├── doc.go ├── format.go ├── format_test.go ├── go.mod ├── manipulate.go ├── manipulate_test.go ├── stringbuilder.go ├── stringbuilder_go110.go ├── translate.go ├── translate_test.go └── util_test.go /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | branches: [master] 8 | 9 | jobs: 10 | build: 11 | name: Build 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Set up Go 1.x 15 | uses: actions/setup-go@v2 16 | with: 17 | go-version: 1.17 18 | 19 | - name: Check out code into the Go module directory 20 | uses: actions/checkout@v2 21 | 22 | - name: Get dependencies 23 | run: | 24 | go mod download 25 | go get 26 | 27 | - name: Test 28 | run: go test -v -coverprofile=covprofile.cov ./... 29 | 30 | - name: Send coverage 31 | env: 32 | COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }} 33 | run: | 34 | go get github.com/mattn/goveralls 35 | go install github.com/mattn/goveralls 36 | goveralls -coverprofile=covprofile.cov -service=github 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing # 2 | 3 | Thanks for your contribution in advance. No matter what you will contribute to this project, pull request or bug report or feature discussion, it's always highly appreciated. 4 | 5 | ## New API or feature ## 6 | 7 | I want to speak more about how to add new functions to this package. 8 | 9 | Package `xstring` is a collection of useful string functions which should be implemented in Go. It's a bit subject to say which function should be included and which should not. I set up following rules in order to make it clear and as objective as possible. 10 | 11 | * Rule 1: Only string algorithm, which takes string as input, can be included. 12 | * Rule 2: If a function has been implemented in package `string`, it must not be included. 13 | * Rule 3: If a function is not language neutral, it must not be included. 14 | * Rule 4: If a function is a part of standard library in other languages, it can be included. 15 | * Rule 5: If a function is quite useful in some famous framework or library, it can be included. 16 | 17 | New function must be discussed in project issues before submitting any code. If a pull request with new functions is sent without any ref issue, it will be rejected. 18 | 19 | ## Pull request ## 20 | 21 | Pull request is always welcome. Just make sure you have run `go fmt` and all test cases passed before submit. 22 | 23 | If the pull request is to add a new API or feature, don't forget to update README.md and add new API in function list. 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Huan Du 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xstrings 2 | 3 | [![Build Status](https://github.com/huandu/xstrings/workflows/Go/badge.svg)](https://github.com/huandu/xstrings/actions) 4 | [![Go Doc](https://godoc.org/github.com/huandu/xstrings?status.svg)](https://pkg.go.dev/github.com/huandu/xstrings) 5 | [![Go Report](https://goreportcard.com/badge/github.com/huandu/xstrings)](https://goreportcard.com/report/github.com/huandu/xstrings) 6 | [![Coverage Status](https://coveralls.io/repos/github/huandu/xstrings/badge.svg?branch=master)](https://coveralls.io/github/huandu/xstrings?branch=master) 7 | 8 | Go package [xstrings](https://godoc.org/github.com/huandu/xstrings) is a collection of string functions, which are widely used in other languages but absent in Go package [strings](http://golang.org/pkg/strings). 9 | 10 | All functions are well tested and carefully tuned for performance. 11 | 12 | ## Propose a new function 13 | 14 | Please review [contributing guideline](CONTRIBUTING.md) and [create new issue](https://github.com/huandu/xstrings/issues) to state why it should be included. 15 | 16 | ## Install 17 | 18 | Use `go get` to install this library. 19 | 20 | go get github.com/huandu/xstrings 21 | 22 | ## API document 23 | 24 | See [GoDoc](https://godoc.org/github.com/huandu/xstrings) for full document. 25 | 26 | ## Function list 27 | 28 | Go functions have a unique naming style. One, who has experience in other language but new in Go, may have difficulties to find out right string function to use. 29 | 30 | Here is a list of functions in [strings](http://golang.org/pkg/strings) and [xstrings](https://godoc.org/github.com/huandu/xstrings) with enough extra information about how to map these functions to their friends in other languages. Hope this list could be helpful for fresh gophers. 31 | 32 | ### Package `xstrings` functions 33 | 34 | _Keep this table sorted by Function in ascending order._ 35 | 36 | | Function | Friends | # | 37 | | --------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | --------------------------------------------------- | 38 | | [Center](https://godoc.org/github.com/huandu/xstrings#Center) | `str.center` in Python; `String#center` in Ruby | [#30](https://github.com/huandu/xstrings/issues/30) | 39 | | [Count](https://godoc.org/github.com/huandu/xstrings#Count) | `String#count` in Ruby | [#16](https://github.com/huandu/xstrings/issues/16) | 40 | | [Delete](https://godoc.org/github.com/huandu/xstrings#Delete) | `String#delete` in Ruby | [#17](https://github.com/huandu/xstrings/issues/17) | 41 | | [ExpandTabs](https://godoc.org/github.com/huandu/xstrings#ExpandTabs) | `str.expandtabs` in Python | [#27](https://github.com/huandu/xstrings/issues/27) | 42 | | [FirstRuneToLower](https://godoc.org/github.com/huandu/xstrings#FirstRuneToLower) | `lcfirst` in PHP or Perl | [#15](https://github.com/huandu/xstrings/issues/15) | 43 | | [FirstRuneToUpper](https://godoc.org/github.com/huandu/xstrings#FirstRuneToUpper) | `String#capitalize` in Ruby; `ucfirst` in PHP or Perl | [#15](https://github.com/huandu/xstrings/issues/15) | 44 | | [Insert](https://godoc.org/github.com/huandu/xstrings#Insert) | `String#insert` in Ruby | [#18](https://github.com/huandu/xstrings/issues/18) | 45 | | [LastPartition](https://godoc.org/github.com/huandu/xstrings#LastPartition) | `str.rpartition` in Python; `String#rpartition` in Ruby | [#19](https://github.com/huandu/xstrings/issues/19) | 46 | | [LeftJustify](https://godoc.org/github.com/huandu/xstrings#LeftJustify) | `str.ljust` in Python; `String#ljust` in Ruby | [#28](https://github.com/huandu/xstrings/issues/28) | 47 | | [Len](https://godoc.org/github.com/huandu/xstrings#Len) | `mb_strlen` in PHP | [#23](https://github.com/huandu/xstrings/issues/23) | 48 | | [Partition](https://godoc.org/github.com/huandu/xstrings#Partition) | `str.partition` in Python; `String#partition` in Ruby | [#10](https://github.com/huandu/xstrings/issues/10) | 49 | | [Reverse](https://godoc.org/github.com/huandu/xstrings#Reverse) | `String#reverse` in Ruby; `strrev` in PHP; `reverse` in Perl | [#7](https://github.com/huandu/xstrings/issues/7) | 50 | | [RightJustify](https://godoc.org/github.com/huandu/xstrings#RightJustify) | `str.rjust` in Python; `String#rjust` in Ruby | [#29](https://github.com/huandu/xstrings/issues/29) | 51 | | [RuneWidth](https://godoc.org/github.com/huandu/xstrings#RuneWidth) | - | [#27](https://github.com/huandu/xstrings/issues/27) | 52 | | [Scrub](https://godoc.org/github.com/huandu/xstrings#Scrub) | `String#scrub` in Ruby | [#20](https://github.com/huandu/xstrings/issues/20) | 53 | | [Shuffle](https://godoc.org/github.com/huandu/xstrings#Shuffle) | `str_shuffle` in PHP | [#13](https://github.com/huandu/xstrings/issues/13) | 54 | | [ShuffleSource](https://godoc.org/github.com/huandu/xstrings#ShuffleSource) | `str_shuffle` in PHP | [#13](https://github.com/huandu/xstrings/issues/13) | 55 | | [Slice](https://godoc.org/github.com/huandu/xstrings#Slice) | `mb_substr` in PHP | [#9](https://github.com/huandu/xstrings/issues/9) | 56 | | [Squeeze](https://godoc.org/github.com/huandu/xstrings#Squeeze) | `String#squeeze` in Ruby | [#11](https://github.com/huandu/xstrings/issues/11) | 57 | | [Successor](https://godoc.org/github.com/huandu/xstrings#Successor) | `String#succ` or `String#next` in Ruby | [#22](https://github.com/huandu/xstrings/issues/22) | 58 | | [SwapCase](https://godoc.org/github.com/huandu/xstrings#SwapCase) | `str.swapcase` in Python; `String#swapcase` in Ruby | [#12](https://github.com/huandu/xstrings/issues/12) | 59 | | [ToCamelCase](https://godoc.org/github.com/huandu/xstrings#ToCamelCase) | `String#camelize` in RoR | [#1](https://github.com/huandu/xstrings/issues/1) | 60 | | [ToKebab](https://godoc.org/github.com/huandu/xstrings#ToKebabCase) | - | [#41](https://github.com/huandu/xstrings/issues/41) | 61 | | [ToPascalCase](https://godoc.org/github.com/huandu/xstrings#ToPascalCase) | - | [#1](https://github.com/huandu/xstrings/issues/1) | 62 | | [ToSnakeCase](https://godoc.org/github.com/huandu/xstrings#ToSnakeCase) | `String#underscore` in RoR | [#1](https://github.com/huandu/xstrings/issues/1) | 63 | | [Translate](https://godoc.org/github.com/huandu/xstrings#Translate) | `str.translate` in Python; `String#tr` in Ruby; `strtr` in PHP; `tr///` in Perl | [#21](https://github.com/huandu/xstrings/issues/21) | 64 | | [Width](https://godoc.org/github.com/huandu/xstrings#Width) | `mb_strwidth` in PHP | [#26](https://github.com/huandu/xstrings/issues/26) | 65 | | [WordCount](https://godoc.org/github.com/huandu/xstrings#WordCount) | `str_word_count` in PHP | [#14](https://github.com/huandu/xstrings/issues/14) | 66 | | [WordSplit](https://godoc.org/github.com/huandu/xstrings#WordSplit) | - | [#14](https://github.com/huandu/xstrings/issues/14) | 67 | 68 | ### Package `strings` functions 69 | 70 | _Keep this table sorted by Function in ascending order._ 71 | 72 | | Function | Friends | 73 | | --------------------------------------------------------------- | ----------------------------------------------------------------------------------- | 74 | | [Contains](http://golang.org/pkg/strings/#Contains) | `String#include?` in Ruby | 75 | | [ContainsAny](http://golang.org/pkg/strings/#ContainsAny) | - | 76 | | [ContainsRune](http://golang.org/pkg/strings/#ContainsRune) | - | 77 | | [Count](http://golang.org/pkg/strings/#Count) | `str.count` in Python; `substr_count` in PHP | 78 | | [EqualFold](http://golang.org/pkg/strings/#EqualFold) | `stricmp` in PHP; `String#casecmp` in Ruby | 79 | | [Fields](http://golang.org/pkg/strings/#Fields) | `str.split` in Python; `split` in Perl; `String#split` in Ruby | 80 | | [FieldsFunc](http://golang.org/pkg/strings/#FieldsFunc) | - | 81 | | [HasPrefix](http://golang.org/pkg/strings/#HasPrefix) | `str.startswith` in Python; `String#start_with?` in Ruby | 82 | | [HasSuffix](http://golang.org/pkg/strings/#HasSuffix) | `str.endswith` in Python; `String#end_with?` in Ruby | 83 | | [Index](http://golang.org/pkg/strings/#Index) | `str.index` in Python; `String#index` in Ruby; `strpos` in PHP; `index` in Perl | 84 | | [IndexAny](http://golang.org/pkg/strings/#IndexAny) | - | 85 | | [IndexByte](http://golang.org/pkg/strings/#IndexByte) | - | 86 | | [IndexFunc](http://golang.org/pkg/strings/#IndexFunc) | - | 87 | | [IndexRune](http://golang.org/pkg/strings/#IndexRune) | - | 88 | | [Join](http://golang.org/pkg/strings/#Join) | `str.join` in Python; `Array#join` in Ruby; `implode` in PHP; `join` in Perl | 89 | | [LastIndex](http://golang.org/pkg/strings/#LastIndex) | `str.rindex` in Python; `String#rindex`; `strrpos` in PHP; `rindex` in Perl | 90 | | [LastIndexAny](http://golang.org/pkg/strings/#LastIndexAny) | - | 91 | | [LastIndexFunc](http://golang.org/pkg/strings/#LastIndexFunc) | - | 92 | | [Map](http://golang.org/pkg/strings/#Map) | `String#each_codepoint` in Ruby | 93 | | [Repeat](http://golang.org/pkg/strings/#Repeat) | operator `*` in Python and Ruby; `str_repeat` in PHP | 94 | | [Replace](http://golang.org/pkg/strings/#Replace) | `str.replace` in Python; `String#sub` in Ruby; `str_replace` in PHP | 95 | | [Split](http://golang.org/pkg/strings/#Split) | `str.split` in Python; `String#split` in Ruby; `explode` in PHP; `split` in Perl | 96 | | [SplitAfter](http://golang.org/pkg/strings/#SplitAfter) | - | 97 | | [SplitAfterN](http://golang.org/pkg/strings/#SplitAfterN) | - | 98 | | [SplitN](http://golang.org/pkg/strings/#SplitN) | `str.split` in Python; `String#split` in Ruby; `explode` in PHP; `split` in Perl | 99 | | [Title](http://golang.org/pkg/strings/#Title) | `str.title` in Python | 100 | | [ToLower](http://golang.org/pkg/strings/#ToLower) | `str.lower` in Python; `String#downcase` in Ruby; `strtolower` in PHP; `lc` in Perl | 101 | | [ToLowerSpecial](http://golang.org/pkg/strings/#ToLowerSpecial) | - | 102 | | [ToTitle](http://golang.org/pkg/strings/#ToTitle) | - | 103 | | [ToTitleSpecial](http://golang.org/pkg/strings/#ToTitleSpecial) | - | 104 | | [ToUpper](http://golang.org/pkg/strings/#ToUpper) | `str.upper` in Python; `String#upcase` in Ruby; `strtoupper` in PHP; `uc` in Perl | 105 | | [ToUpperSpecial](http://golang.org/pkg/strings/#ToUpperSpecial) | - | 106 | | [Trim](http://golang.org/pkg/strings/#Trim) | `str.strip` in Python; `String#strip` in Ruby; `trim` in PHP | 107 | | [TrimFunc](http://golang.org/pkg/strings/#TrimFunc) | - | 108 | | [TrimLeft](http://golang.org/pkg/strings/#TrimLeft) | `str.lstrip` in Python; `String#lstrip` in Ruby; `ltrim` in PHP | 109 | | [TrimLeftFunc](http://golang.org/pkg/strings/#TrimLeftFunc) | - | 110 | | [TrimPrefix](http://golang.org/pkg/strings/#TrimPrefix) | - | 111 | | [TrimRight](http://golang.org/pkg/strings/#TrimRight) | `str.rstrip` in Python; `String#rstrip` in Ruby; `rtrim` in PHP | 112 | | [TrimRightFunc](http://golang.org/pkg/strings/#TrimRightFunc) | - | 113 | | [TrimSpace](http://golang.org/pkg/strings/#TrimSpace) | `str.strip` in Python; `String#strip` in Ruby; `trim` in PHP | 114 | | [TrimSuffix](http://golang.org/pkg/strings/#TrimSuffix) | `String#chomp` in Ruby; `chomp` in Perl | 115 | 116 | ## License 117 | 118 | This library is licensed under MIT license. See LICENSE for details. 119 | -------------------------------------------------------------------------------- /common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Huan Du. All rights reserved. 2 | // Licensed under the MIT license that can be found in the LICENSE file. 3 | 4 | package xstrings 5 | 6 | const bufferMaxInitGrowSize = 2048 7 | 8 | // Lazy initialize a buffer. 9 | func allocBuffer(orig, cur string) *stringBuilder { 10 | output := &stringBuilder{} 11 | maxSize := len(orig) * 4 12 | 13 | // Avoid to reserve too much memory at once. 14 | if maxSize > bufferMaxInitGrowSize { 15 | maxSize = bufferMaxInitGrowSize 16 | } 17 | 18 | output.Grow(maxSize) 19 | output.WriteString(orig[:len(orig)-len(cur)]) 20 | return output 21 | } 22 | -------------------------------------------------------------------------------- /convert.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Huan Du. All rights reserved. 2 | // Licensed under the MIT license that can be found in the LICENSE file. 3 | 4 | package xstrings 5 | 6 | import ( 7 | "math/rand" 8 | "unicode" 9 | "unicode/utf8" 10 | ) 11 | 12 | // ToCamelCase is to convert words separated by space, underscore and hyphen to camel case. 13 | // 14 | // Some samples. 15 | // 16 | // "some_words" => "someWords" 17 | // "http_server" => "httpServer" 18 | // "no_https" => "noHttps" 19 | // "_complex__case_" => "_complex_Case_" 20 | // "some words" => "someWords" 21 | // "GOLANG_IS_GREAT" => "golangIsGreat" 22 | func ToCamelCase(str string) string { 23 | return toCamelCase(str, false) 24 | } 25 | 26 | // ToPascalCase is to convert words separated by space, underscore and hyphen to pascal case. 27 | // 28 | // Some samples. 29 | // 30 | // "some_words" => "SomeWords" 31 | // "http_server" => "HttpServer" 32 | // "no_https" => "NoHttps" 33 | // "_complex__case_" => "_Complex_Case_" 34 | // "some words" => "SomeWords" 35 | // "GOLANG_IS_GREAT" => "GolangIsGreat" 36 | func ToPascalCase(str string) string { 37 | return toCamelCase(str, true) 38 | } 39 | 40 | func toCamelCase(str string, isBig bool) string { 41 | if len(str) == 0 { 42 | return "" 43 | } 44 | 45 | buf := &stringBuilder{} 46 | var isFirstRuneUpper bool 47 | var r0, r1 rune 48 | var size int 49 | 50 | // leading connector will appear in output. 51 | for len(str) > 0 { 52 | r0, size = utf8.DecodeRuneInString(str) 53 | str = str[size:] 54 | 55 | if !isConnector(r0) { 56 | isFirstRuneUpper = unicode.IsUpper(r0) 57 | 58 | if isBig { 59 | r0 = unicode.ToUpper(r0) 60 | } else { 61 | r0 = unicode.ToLower(r0) 62 | } 63 | 64 | break 65 | } 66 | 67 | buf.WriteRune(r0) 68 | } 69 | 70 | if len(str) == 0 { 71 | // A special case for a string contains only 1 rune. 72 | if size != 0 { 73 | buf.WriteRune(r0) 74 | } 75 | 76 | return buf.String() 77 | } 78 | 79 | for len(str) > 0 { 80 | r1 = r0 81 | r0, size = utf8.DecodeRuneInString(str) 82 | str = str[size:] 83 | 84 | if isConnector(r0) && isConnector(r1) { 85 | buf.WriteRune(r1) 86 | continue 87 | } 88 | 89 | if isConnector(r1) { 90 | isFirstRuneUpper = unicode.IsUpper(r0) 91 | r0 = unicode.ToUpper(r0) 92 | } else { 93 | if isFirstRuneUpper { 94 | if unicode.IsUpper(r0) { 95 | r0 = unicode.ToLower(r0) 96 | } else { 97 | isFirstRuneUpper = false 98 | } 99 | } 100 | 101 | buf.WriteRune(r1) 102 | } 103 | } 104 | 105 | if isFirstRuneUpper && !isBig { 106 | r0 = unicode.ToLower(r0) 107 | } 108 | 109 | buf.WriteRune(r0) 110 | return buf.String() 111 | } 112 | 113 | // ToSnakeCase can convert all upper case characters in a string to 114 | // snake case format. 115 | // 116 | // Some samples. 117 | // 118 | // "FirstName" => "first_name" 119 | // "HTTPServer" => "http_server" 120 | // "NoHTTPS" => "no_https" 121 | // "GO_PATH" => "go_path" 122 | // "GO PATH" => "go_path" // space is converted to underscore. 123 | // "GO-PATH" => "go_path" // hyphen is converted to underscore. 124 | // "http2xx" => "http_2xx" // insert an underscore before a number and after an alphabet. 125 | // "HTTP20xOK" => "http_20x_ok" 126 | // "Duration2m3s" => "duration_2m3s" 127 | // "Bld4Floor3rd" => "bld4_floor_3rd" 128 | func ToSnakeCase(str string) string { 129 | return camelCaseToLowerCase(str, '_') 130 | } 131 | 132 | // ToKebabCase can convert all upper case characters in a string to 133 | // kebab case format. 134 | // 135 | // Some samples. 136 | // 137 | // "FirstName" => "first-name" 138 | // "HTTPServer" => "http-server" 139 | // "NoHTTPS" => "no-https" 140 | // "GO_PATH" => "go-path" 141 | // "GO PATH" => "go-path" // space is converted to '-'. 142 | // "GO-PATH" => "go-path" // hyphen is converted to '-'. 143 | // "http2xx" => "http-2xx" // insert an underscore before a number and after an alphabet. 144 | // "HTTP20xOK" => "http-20x-ok" 145 | // "Duration2m3s" => "duration-2m3s" 146 | // "Bld4Floor3rd" => "bld4-floor-3rd" 147 | func ToKebabCase(str string) string { 148 | return camelCaseToLowerCase(str, '-') 149 | } 150 | 151 | func camelCaseToLowerCase(str string, connector rune) string { 152 | if len(str) == 0 { 153 | return "" 154 | } 155 | 156 | buf := &stringBuilder{} 157 | wt, word, remaining := nextWord(str) 158 | 159 | for len(remaining) > 0 { 160 | if wt != connectorWord { 161 | toLower(buf, wt, word, connector) 162 | } 163 | 164 | prev := wt 165 | last := word 166 | wt, word, remaining = nextWord(remaining) 167 | 168 | switch prev { 169 | case numberWord: 170 | for wt == alphabetWord || wt == numberWord { 171 | toLower(buf, wt, word, connector) 172 | wt, word, remaining = nextWord(remaining) 173 | } 174 | 175 | if wt != invalidWord && wt != punctWord && wt != connectorWord { 176 | buf.WriteRune(connector) 177 | } 178 | 179 | case connectorWord: 180 | toLower(buf, prev, last, connector) 181 | 182 | case punctWord: 183 | // nothing. 184 | 185 | default: 186 | if wt != numberWord { 187 | if wt != connectorWord && wt != punctWord { 188 | buf.WriteRune(connector) 189 | } 190 | 191 | break 192 | } 193 | 194 | if len(remaining) == 0 { 195 | break 196 | } 197 | 198 | last := word 199 | wt, word, remaining = nextWord(remaining) 200 | 201 | // consider number as a part of previous word. 202 | // e.g. "Bld4Floor" => "bld4_floor" 203 | if wt != alphabetWord { 204 | toLower(buf, numberWord, last, connector) 205 | 206 | if wt != connectorWord && wt != punctWord { 207 | buf.WriteRune(connector) 208 | } 209 | 210 | break 211 | } 212 | 213 | // if there are some lower case letters following a number, 214 | // add connector before the number. 215 | // e.g. "HTTP2xx" => "http_2xx" 216 | buf.WriteRune(connector) 217 | toLower(buf, numberWord, last, connector) 218 | 219 | for wt == alphabetWord || wt == numberWord { 220 | toLower(buf, wt, word, connector) 221 | wt, word, remaining = nextWord(remaining) 222 | } 223 | 224 | if wt != invalidWord && wt != connectorWord && wt != punctWord { 225 | buf.WriteRune(connector) 226 | } 227 | } 228 | } 229 | 230 | toLower(buf, wt, word, connector) 231 | return buf.String() 232 | } 233 | 234 | func isConnector(r rune) bool { 235 | return r == '-' || r == '_' || unicode.IsSpace(r) 236 | } 237 | 238 | type wordType int 239 | 240 | const ( 241 | invalidWord wordType = iota 242 | numberWord 243 | upperCaseWord 244 | alphabetWord 245 | connectorWord 246 | punctWord 247 | otherWord 248 | ) 249 | 250 | func nextWord(str string) (wt wordType, word, remaining string) { 251 | if len(str) == 0 { 252 | return 253 | } 254 | 255 | var offset int 256 | remaining = str 257 | r, size := nextValidRune(remaining, utf8.RuneError) 258 | offset += size 259 | 260 | if r == utf8.RuneError { 261 | wt = invalidWord 262 | word = str[:offset] 263 | remaining = str[offset:] 264 | return 265 | } 266 | 267 | switch { 268 | case isConnector(r): 269 | wt = connectorWord 270 | remaining = remaining[size:] 271 | 272 | for len(remaining) > 0 { 273 | r, size = nextValidRune(remaining, r) 274 | 275 | if !isConnector(r) { 276 | break 277 | } 278 | 279 | offset += size 280 | remaining = remaining[size:] 281 | } 282 | 283 | case unicode.IsPunct(r): 284 | wt = punctWord 285 | remaining = remaining[size:] 286 | 287 | for len(remaining) > 0 { 288 | r, size = nextValidRune(remaining, r) 289 | 290 | if !unicode.IsPunct(r) { 291 | break 292 | } 293 | 294 | offset += size 295 | remaining = remaining[size:] 296 | } 297 | 298 | case unicode.IsUpper(r): 299 | wt = upperCaseWord 300 | remaining = remaining[size:] 301 | 302 | if len(remaining) == 0 { 303 | break 304 | } 305 | 306 | r, size = nextValidRune(remaining, r) 307 | 308 | switch { 309 | case unicode.IsUpper(r): 310 | prevSize := size 311 | offset += size 312 | remaining = remaining[size:] 313 | 314 | for len(remaining) > 0 { 315 | r, size = nextValidRune(remaining, r) 316 | 317 | if !unicode.IsUpper(r) { 318 | break 319 | } 320 | 321 | prevSize = size 322 | offset += size 323 | remaining = remaining[size:] 324 | } 325 | 326 | // it's a bit complex when dealing with a case like "HTTPStatus". 327 | // it's expected to be splitted into "HTTP" and "Status". 328 | // Therefore "S" should be in remaining instead of word. 329 | if len(remaining) > 0 && isAlphabet(r) { 330 | offset -= prevSize 331 | remaining = str[offset:] 332 | } 333 | 334 | case isAlphabet(r): 335 | offset += size 336 | remaining = remaining[size:] 337 | 338 | for len(remaining) > 0 { 339 | r, size = nextValidRune(remaining, r) 340 | 341 | if !isAlphabet(r) || unicode.IsUpper(r) { 342 | break 343 | } 344 | 345 | offset += size 346 | remaining = remaining[size:] 347 | } 348 | } 349 | 350 | case isAlphabet(r): 351 | wt = alphabetWord 352 | remaining = remaining[size:] 353 | 354 | for len(remaining) > 0 { 355 | r, size = nextValidRune(remaining, r) 356 | 357 | if !isAlphabet(r) || unicode.IsUpper(r) { 358 | break 359 | } 360 | 361 | offset += size 362 | remaining = remaining[size:] 363 | } 364 | 365 | case unicode.IsNumber(r): 366 | wt = numberWord 367 | remaining = remaining[size:] 368 | 369 | for len(remaining) > 0 { 370 | r, size = nextValidRune(remaining, r) 371 | 372 | if !unicode.IsNumber(r) { 373 | break 374 | } 375 | 376 | offset += size 377 | remaining = remaining[size:] 378 | } 379 | 380 | default: 381 | wt = otherWord 382 | remaining = remaining[size:] 383 | 384 | for len(remaining) > 0 { 385 | r, size = nextValidRune(remaining, r) 386 | 387 | if size == 0 || isConnector(r) || isAlphabet(r) || unicode.IsNumber(r) || unicode.IsPunct(r) { 388 | break 389 | } 390 | 391 | offset += size 392 | remaining = remaining[size:] 393 | } 394 | } 395 | 396 | word = str[:offset] 397 | return 398 | } 399 | 400 | func nextValidRune(str string, prev rune) (r rune, size int) { 401 | var sz int 402 | 403 | for len(str) > 0 { 404 | r, sz = utf8.DecodeRuneInString(str) 405 | size += sz 406 | 407 | if r != utf8.RuneError { 408 | return 409 | } 410 | 411 | str = str[sz:] 412 | } 413 | 414 | r = prev 415 | return 416 | } 417 | 418 | func toLower(buf *stringBuilder, wt wordType, str string, connector rune) { 419 | buf.Grow(buf.Len() + len(str)) 420 | 421 | if wt != upperCaseWord && wt != connectorWord { 422 | buf.WriteString(str) 423 | return 424 | } 425 | 426 | for len(str) > 0 { 427 | r, size := utf8.DecodeRuneInString(str) 428 | str = str[size:] 429 | 430 | if isConnector(r) { 431 | buf.WriteRune(connector) 432 | } else if unicode.IsUpper(r) { 433 | buf.WriteRune(unicode.ToLower(r)) 434 | } else { 435 | buf.WriteRune(r) 436 | } 437 | } 438 | } 439 | 440 | // SwapCase will swap characters case from upper to lower or lower to upper. 441 | func SwapCase(str string) string { 442 | var r rune 443 | var size int 444 | 445 | buf := &stringBuilder{} 446 | 447 | for len(str) > 0 { 448 | r, size = utf8.DecodeRuneInString(str) 449 | 450 | switch { 451 | case unicode.IsUpper(r): 452 | buf.WriteRune(unicode.ToLower(r)) 453 | 454 | case unicode.IsLower(r): 455 | buf.WriteRune(unicode.ToUpper(r)) 456 | 457 | default: 458 | buf.WriteRune(r) 459 | } 460 | 461 | str = str[size:] 462 | } 463 | 464 | return buf.String() 465 | } 466 | 467 | // FirstRuneToUpper converts first rune to upper case if necessary. 468 | func FirstRuneToUpper(str string) string { 469 | if str == "" { 470 | return str 471 | } 472 | 473 | r, size := utf8.DecodeRuneInString(str) 474 | 475 | if !unicode.IsLower(r) { 476 | return str 477 | } 478 | 479 | buf := &stringBuilder{} 480 | buf.WriteRune(unicode.ToUpper(r)) 481 | buf.WriteString(str[size:]) 482 | return buf.String() 483 | } 484 | 485 | // FirstRuneToLower converts first rune to lower case if necessary. 486 | func FirstRuneToLower(str string) string { 487 | if str == "" { 488 | return str 489 | } 490 | 491 | r, size := utf8.DecodeRuneInString(str) 492 | 493 | if !unicode.IsUpper(r) { 494 | return str 495 | } 496 | 497 | buf := &stringBuilder{} 498 | buf.WriteRune(unicode.ToLower(r)) 499 | buf.WriteString(str[size:]) 500 | return buf.String() 501 | } 502 | 503 | // Shuffle randomizes runes in a string and returns the result. 504 | // It uses default random source in `math/rand`. 505 | func Shuffle(str string) string { 506 | if str == "" { 507 | return str 508 | } 509 | 510 | runes := []rune(str) 511 | index := 0 512 | 513 | for i := len(runes) - 1; i > 0; i-- { 514 | index = rand.Intn(i + 1) 515 | 516 | if i != index { 517 | runes[i], runes[index] = runes[index], runes[i] 518 | } 519 | } 520 | 521 | return string(runes) 522 | } 523 | 524 | // ShuffleSource randomizes runes in a string with given random source. 525 | func ShuffleSource(str string, src rand.Source) string { 526 | if str == "" { 527 | return str 528 | } 529 | 530 | runes := []rune(str) 531 | index := 0 532 | r := rand.New(src) 533 | 534 | for i := len(runes) - 1; i > 0; i-- { 535 | index = r.Intn(i + 1) 536 | 537 | if i != index { 538 | runes[i], runes[index] = runes[index], runes[i] 539 | } 540 | } 541 | 542 | return string(runes) 543 | } 544 | 545 | // Successor returns the successor to string. 546 | // 547 | // If there is one alphanumeric rune is found in string, increase the rune by 1. 548 | // If increment generates a "carry", the rune to the left of it is incremented. 549 | // This process repeats until there is no carry, adding an additional rune if necessary. 550 | // 551 | // If there is no alphanumeric rune, the rightmost rune will be increased by 1 552 | // regardless whether the result is a valid rune or not. 553 | // 554 | // Only following characters are alphanumeric. 555 | // - a - z 556 | // - A - Z 557 | // - 0 - 9 558 | // 559 | // Samples (borrowed from ruby's String#succ document): 560 | // 561 | // "abcd" => "abce" 562 | // "THX1138" => "THX1139" 563 | // "<>" => "<>" 564 | // "1999zzz" => "2000aaa" 565 | // "ZZZ9999" => "AAAA0000" 566 | // "***" => "**+" 567 | func Successor(str string) string { 568 | if str == "" { 569 | return str 570 | } 571 | 572 | var r rune 573 | var i int 574 | carry := ' ' 575 | runes := []rune(str) 576 | l := len(runes) 577 | lastAlphanumeric := l 578 | 579 | for i = l - 1; i >= 0; i-- { 580 | r = runes[i] 581 | 582 | if ('a' <= r && r <= 'y') || 583 | ('A' <= r && r <= 'Y') || 584 | ('0' <= r && r <= '8') { 585 | runes[i]++ 586 | carry = ' ' 587 | lastAlphanumeric = i 588 | break 589 | } 590 | 591 | switch r { 592 | case 'z': 593 | runes[i] = 'a' 594 | carry = 'a' 595 | lastAlphanumeric = i 596 | 597 | case 'Z': 598 | runes[i] = 'A' 599 | carry = 'A' 600 | lastAlphanumeric = i 601 | 602 | case '9': 603 | runes[i] = '0' 604 | carry = '0' 605 | lastAlphanumeric = i 606 | } 607 | } 608 | 609 | // Needs to add one character for carry. 610 | if i < 0 && carry != ' ' { 611 | buf := &stringBuilder{} 612 | buf.Grow(l + 4) // Reserve enough space for write. 613 | 614 | if lastAlphanumeric != 0 { 615 | buf.WriteString(str[:lastAlphanumeric]) 616 | } 617 | 618 | buf.WriteRune(carry) 619 | 620 | for _, r = range runes[lastAlphanumeric:] { 621 | buf.WriteRune(r) 622 | } 623 | 624 | return buf.String() 625 | } 626 | 627 | // No alphanumeric character. Simply increase last rune's value. 628 | if lastAlphanumeric == l { 629 | runes[l-1]++ 630 | } 631 | 632 | return string(runes) 633 | } 634 | -------------------------------------------------------------------------------- /convert_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Huan Du. All rights reserved. 2 | // Licensed under the MIT license that can be found in the LICENSE file. 3 | 4 | package xstrings 5 | 6 | import ( 7 | "sort" 8 | "strings" 9 | "testing" 10 | ) 11 | 12 | func TestToSnakeCaseAndToKebabCase(t *testing.T) { 13 | cases := _M{ 14 | "HTTPServer": "http_server", 15 | "_camelCase": "_camel_case", 16 | "NoHTTPS": "no_https", 17 | "Wi_thF": "wi_th_f", 18 | "_AnotherTES_TCaseP": "_another_tes_t_case_p", 19 | "ALL": "all", 20 | "_HELLO_WORLD_": "_hello_world_", 21 | "HELLO_WORLD": "hello_world", 22 | "HELLO____WORLD": "hello____world", 23 | "TW": "tw", 24 | "_C": "_c", 25 | "http2xx": "http_2xx", 26 | "HTTP2XX": "http2_xx", 27 | "HTTP20xOK": "http_20x_ok", 28 | "HTTP20xStatus": "http_20x_status", 29 | "HTTP-20xStatus": "http_20x_status", 30 | "a": "a", 31 | "Duration2m3s": "duration_2m3s", 32 | "Bld4Floor3rd": "bld4_floor_3rd", 33 | " _-_ ": "_____", 34 | "a1b2c3d": "a_1b2c3d", 35 | "A//B%%2c": "a//b%%2c", 36 | 37 | "HTTP状态码404/502Error": "http_状态码404/502_error", 38 | "中文(字符)": "中文(字符)", 39 | "混合ABCWords与123数字456": "混合_abc_words_与123_数字456", 40 | 41 | " sentence case ": "__sentence_case__", 42 | " Mixed-hyphen case _and SENTENCE_case and UPPER-case": "_mixed_hyphen_case__and_sentence_case_and_upper_case", 43 | "FROM CamelCase to snake/kebab-case": "from_camel_case_to_snake/kebab_case", 44 | 45 | "": "", 46 | "Abc\uFFFDE\uFFFDf\uFFFDd\uFFFD2\uFFFD00z\uFFFDZZ\uFFFDZZ": "abc_\uFFFDe\uFFFDf\uFFFDd_\uFFFD2\uFFFD00z_\uFFFDzz\uFFFDzz", 47 | "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD": "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD", 48 | 49 | "abc_123_def": "abc_123_def", 50 | } 51 | 52 | runTestCases(t, ToSnakeCase, cases) 53 | 54 | for k, v := range cases { 55 | cases[k] = strings.Replace(v, "_", "-", -1) 56 | } 57 | 58 | runTestCases(t, ToKebabCase, cases) 59 | } 60 | 61 | func TestToCamelCase(t *testing.T) { 62 | runTestCases(t, ToCamelCase, _M{ 63 | "http_server": "httpServer", 64 | "_camel_case": "_camelCase", 65 | "no_https": "noHttps", 66 | "_complex__case_": "_complex_Case_", 67 | " complex -case ": " complex Case ", 68 | "all": "all", 69 | "GOLANG_IS_GREAT": "golangIsGreat", 70 | "GOLANG": "golang", 71 | "a": "a", 72 | "好": "好", 73 | 74 | "FROM CamelCase to snake/kebab-case": "fromCamelCaseToSnake/kebabCase", 75 | 76 | "": "", 77 | }) 78 | } 79 | 80 | func TestToPascalCase(t *testing.T) { 81 | runTestCases(t, ToPascalCase, _M{ 82 | "http_server": "HttpServer", 83 | "_camel_case": "_CamelCase", 84 | "no_https": "NoHttps", 85 | "_complex__case_": "_Complex_Case_", 86 | " complex -case ": " Complex Case ", 87 | "all": "All", 88 | "GOLANG_IS_GREAT": "GolangIsGreat", 89 | "GOLANG": "Golang", 90 | "a": "A", 91 | "好": "好", 92 | 93 | "FROM CamelCase to snake/kebab-case": "FromCamelCaseToSnake/kebabCase", 94 | 95 | "": "", 96 | }) 97 | } 98 | 99 | func TestSwapCase(t *testing.T) { 100 | runTestCases(t, SwapCase, _M{ 101 | "swapCase": "SWAPcASE", 102 | "Θ~λa云Ξπ": "θ~ΛA云ξΠ", 103 | "a": "A", 104 | 105 | "": "", 106 | }) 107 | } 108 | 109 | func TestFirstRuneToUpper(t *testing.T) { 110 | runTestCases(t, FirstRuneToUpper, _M{ 111 | "hello, world!": "Hello, world!", 112 | "Hello, world!": "Hello, world!", 113 | "你好,世界!": "你好,世界!", 114 | "a": "A", 115 | 116 | "": "", 117 | }) 118 | } 119 | 120 | func TestFirstRuneToLower(t *testing.T) { 121 | runTestCases(t, FirstRuneToLower, _M{ 122 | "hello, world!": "hello, world!", 123 | "Hello, world!": "hello, world!", 124 | "你好,世界!": "你好,世界!", 125 | "a": "a", 126 | "A": "a", 127 | 128 | "": "", 129 | }) 130 | } 131 | 132 | func TestShuffle(t *testing.T) { 133 | // It seems there is no reliable way to test shuffled string. 134 | // Runner just make sure shuffled string has the same runes as origin string. 135 | runner := func(str string) string { 136 | s := Shuffle(str) 137 | slice := sort.StringSlice(strings.Split(s, "")) 138 | slice.Sort() 139 | return strings.Join(slice, "") 140 | } 141 | 142 | runTestCases(t, runner, _M{ 143 | "": "", 144 | "facgbheidjk": "abcdefghijk", 145 | "尝试中文": "中尝文试", 146 | "zh英文hun排": "hhnuz排文英", 147 | }) 148 | } 149 | 150 | type testShuffleSource int 151 | 152 | // A generated random number sequance just for testing. 153 | var testShuffleTable = []int64{ 154 | 1874068156324778273, 155 | 3328451335138149956, 156 | 5263531936693774911, 157 | 7955079406183515637, 158 | 2703501726821866378, 159 | 2740103009342231109, 160 | 6941261091797652072, 161 | 1905388747193831650, 162 | 7981306761429961588, 163 | 6426100070888298971, 164 | 4831389563158288344, 165 | 261049867304784443, 166 | 1460320609597786623, 167 | 5600924393587988459, 168 | 8995016276575641803, 169 | 732830328053361739, 170 | 5486140987150761883, 171 | 545291762129038907, 172 | 6382800227808658932, 173 | 2781055864473387780, 174 | 1598098976185383115, 175 | 4990765271833742716, 176 | 5018949295715050020, 177 | 2568779411109623071, 178 | 3902890183311134652, 179 | 4893789450120281907, 180 | 2338498362660772719, 181 | 2601737961087659062, 182 | 7273596521315663110, 183 | 3337066551442961397, 184 | 8121576815539813105, 185 | 2740376916591569721, 186 | 8249030965139585917, 187 | 898860202204764712, 188 | 9010467728050264449, 189 | 685213522303989579, 190 | 2050257992909156333, 191 | 6281838661429879825, 192 | 2227583514184312746, 193 | 2873287401706343734, 194 | } 195 | 196 | func (src *testShuffleSource) Int63() int64 { 197 | n := testShuffleTable[int(*src)%len(testShuffleTable)] 198 | (*src)++ 199 | return n 200 | } 201 | 202 | func (*testShuffleSource) Seed(int64) {} 203 | 204 | func TestShuffleSource(t *testing.T) { 205 | runner := func(str string) string { 206 | var src testShuffleSource 207 | return ShuffleSource(str, &src) 208 | } 209 | 210 | runTestCases(t, runner, _M{ 211 | "": "", 212 | "facgbheidjk": "bkgfijached", 213 | "尝试中文怎么样": "怎试么中样尝文", 214 | "zh英文hun排": "zuhh文n英排", 215 | }) 216 | } 217 | 218 | func TestSuccessor(t *testing.T) { 219 | runTestCases(t, Successor, _M{ 220 | "": "", 221 | "abcd": "abce", 222 | "THX1138": "THX1139", 223 | "<>": "<>", 224 | "1999zzz": "2000aaa", 225 | "ZZZ9999": "AAAA0000", 226 | "***": "**+", 227 | 228 | "来点中文试试": "来点中文试诖", 229 | "中cZ英ZZ文zZ混9zZ9杂99进z位": "中dA英AA文aA混0aA0杂00进a位", 230 | }) 231 | } 232 | -------------------------------------------------------------------------------- /count.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Huan Du. All rights reserved. 2 | // Licensed under the MIT license that can be found in the LICENSE file. 3 | 4 | package xstrings 5 | 6 | import ( 7 | "unicode" 8 | "unicode/utf8" 9 | ) 10 | 11 | // Len returns str's utf8 rune length. 12 | func Len(str string) int { 13 | return utf8.RuneCountInString(str) 14 | } 15 | 16 | // WordCount returns number of words in a string. 17 | // 18 | // Word is defined as a locale dependent string containing alphabetic characters, 19 | // which may also contain but not start with `'` and `-` characters. 20 | func WordCount(str string) int { 21 | var r rune 22 | var size, n int 23 | 24 | inWord := false 25 | 26 | for len(str) > 0 { 27 | r, size = utf8.DecodeRuneInString(str) 28 | 29 | switch { 30 | case isAlphabet(r): 31 | if !inWord { 32 | inWord = true 33 | n++ 34 | } 35 | 36 | case inWord && (r == '\'' || r == '-'): 37 | // Still in word. 38 | 39 | default: 40 | inWord = false 41 | } 42 | 43 | str = str[size:] 44 | } 45 | 46 | return n 47 | } 48 | 49 | const minCJKCharacter = '\u3400' 50 | 51 | // Checks r is a letter but not CJK character. 52 | func isAlphabet(r rune) bool { 53 | if !unicode.IsLetter(r) { 54 | return false 55 | } 56 | 57 | switch { 58 | // Quick check for non-CJK character. 59 | case r < minCJKCharacter: 60 | return true 61 | 62 | // Common CJK characters. 63 | case r >= '\u4E00' && r <= '\u9FCC': 64 | return false 65 | 66 | // Rare CJK characters. 67 | case r >= '\u3400' && r <= '\u4D85': 68 | return false 69 | 70 | // Rare and historic CJK characters. 71 | case r >= '\U00020000' && r <= '\U0002B81D': 72 | return false 73 | } 74 | 75 | return true 76 | } 77 | 78 | // Width returns string width in monotype font. 79 | // Multi-byte characters are usually twice the width of single byte characters. 80 | // 81 | // Algorithm comes from `mb_strwidth` in PHP. 82 | // http://php.net/manual/en/function.mb-strwidth.php 83 | func Width(str string) int { 84 | var r rune 85 | var size, n int 86 | 87 | for len(str) > 0 { 88 | r, size = utf8.DecodeRuneInString(str) 89 | n += RuneWidth(r) 90 | str = str[size:] 91 | } 92 | 93 | return n 94 | } 95 | 96 | // RuneWidth returns character width in monotype font. 97 | // Multi-byte characters are usually twice the width of single byte characters. 98 | // 99 | // Algorithm comes from `mb_strwidth` in PHP. 100 | // http://php.net/manual/en/function.mb-strwidth.php 101 | func RuneWidth(r rune) int { 102 | switch { 103 | case r == utf8.RuneError || r < '\x20': 104 | return 0 105 | 106 | case '\x20' <= r && r < '\u2000': 107 | return 1 108 | 109 | case '\u2000' <= r && r < '\uFF61': 110 | return 2 111 | 112 | case '\uFF61' <= r && r < '\uFFA0': 113 | return 1 114 | 115 | case '\uFFA0' <= r: 116 | return 2 117 | } 118 | 119 | return 0 120 | } 121 | -------------------------------------------------------------------------------- /count_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Huan Du. All rights reserved. 2 | // Licensed under the MIT license that can be found in the LICENSE file. 3 | 4 | package xstrings 5 | 6 | import ( 7 | "fmt" 8 | "testing" 9 | ) 10 | 11 | func TestLen(t *testing.T) { 12 | runner := func(str string) string { 13 | return fmt.Sprint(Len(str)) 14 | } 15 | 16 | runTestCases(t, runner, _M{ 17 | "abcdef": "6", 18 | "中文": "2", 19 | "中yin文hun排": "9", 20 | "": "0", 21 | }) 22 | } 23 | 24 | func TestWordCount(t *testing.T) { 25 | runner := func(str string) string { 26 | return fmt.Sprint(WordCount(str)) 27 | } 28 | 29 | runTestCases(t, runner, _M{ 30 | "one word: λ": "3", 31 | "中文": "0", 32 | "你好,sekai!": "1", 33 | "oh, it's super-fancy!!a": "4", 34 | "": "0", 35 | "-": "0", 36 | "it's-'s": "1", 37 | }) 38 | } 39 | 40 | func TestWidth(t *testing.T) { 41 | runner := func(str string) string { 42 | return fmt.Sprint(Width(str)) 43 | } 44 | 45 | runTestCases(t, runner, _M{ 46 | "abcd\t0123\n7890": "12", 47 | "中zh英eng文混排": "15", 48 | "": "0", 49 | }) 50 | } 51 | 52 | func TestRuneWidth(t *testing.T) { 53 | runner := func(str string) string { 54 | return fmt.Sprint(RuneWidth([]rune(str)[0])) 55 | } 56 | 57 | runTestCases(t, runner, _M{ 58 | "a": "1", 59 | "中": "2", 60 | "\x11": "0", 61 | }) 62 | } 63 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Huan Du. All rights reserved. 2 | // Licensed under the MIT license that can be found in the LICENSE file. 3 | 4 | // Package xstrings is to provide string algorithms which are useful but not included in `strings` package. 5 | // See project home page for details. https://github.com/huandu/xstrings 6 | // 7 | // Package xstrings assumes all strings are encoded in utf8. 8 | package xstrings 9 | -------------------------------------------------------------------------------- /format.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Huan Du. All rights reserved. 2 | // Licensed under the MIT license that can be found in the LICENSE file. 3 | 4 | package xstrings 5 | 6 | import ( 7 | "unicode/utf8" 8 | ) 9 | 10 | // ExpandTabs can expand tabs ('\t') rune in str to one or more spaces dpending on 11 | // current column and tabSize. 12 | // The column number is reset to zero after each newline ('\n') occurring in the str. 13 | // 14 | // ExpandTabs uses RuneWidth to decide rune's width. 15 | // For example, CJK characters will be treated as two characters. 16 | // 17 | // If tabSize <= 0, ExpandTabs panics with error. 18 | // 19 | // Samples: 20 | // 21 | // ExpandTabs("a\tbc\tdef\tghij\tk", 4) => "a bc def ghij k" 22 | // ExpandTabs("abcdefg\thij\nk\tl", 4) => "abcdefg hij\nk l" 23 | // ExpandTabs("z中\t文\tw", 4) => "z中 文 w" 24 | func ExpandTabs(str string, tabSize int) string { 25 | if tabSize <= 0 { 26 | panic("tab size must be positive") 27 | } 28 | 29 | var r rune 30 | var i, size, column, expand int 31 | var output *stringBuilder 32 | 33 | orig := str 34 | 35 | for len(str) > 0 { 36 | r, size = utf8.DecodeRuneInString(str) 37 | 38 | if r == '\t' { 39 | expand = tabSize - column%tabSize 40 | 41 | if output == nil { 42 | output = allocBuffer(orig, str) 43 | } 44 | 45 | for i = 0; i < expand; i++ { 46 | output.WriteRune(' ') 47 | } 48 | 49 | column += expand 50 | } else { 51 | if r == '\n' { 52 | column = 0 53 | } else { 54 | column += RuneWidth(r) 55 | } 56 | 57 | if output != nil { 58 | output.WriteRune(r) 59 | } 60 | } 61 | 62 | str = str[size:] 63 | } 64 | 65 | if output == nil { 66 | return orig 67 | } 68 | 69 | return output.String() 70 | } 71 | 72 | // LeftJustify returns a string with pad string at right side if str's rune length is smaller than length. 73 | // If str's rune length is larger than length, str itself will be returned. 74 | // 75 | // If pad is an empty string, str will be returned. 76 | // 77 | // Samples: 78 | // 79 | // LeftJustify("hello", 4, " ") => "hello" 80 | // LeftJustify("hello", 10, " ") => "hello " 81 | // LeftJustify("hello", 10, "123") => "hello12312" 82 | func LeftJustify(str string, length int, pad string) string { 83 | l := Len(str) 84 | 85 | if l >= length || pad == "" { 86 | return str 87 | } 88 | 89 | remains := length - l 90 | padLen := Len(pad) 91 | 92 | output := &stringBuilder{} 93 | output.Grow(len(str) + (remains/padLen+1)*len(pad)) 94 | output.WriteString(str) 95 | writePadString(output, pad, padLen, remains) 96 | return output.String() 97 | } 98 | 99 | // RightJustify returns a string with pad string at left side if str's rune length is smaller than length. 100 | // If str's rune length is larger than length, str itself will be returned. 101 | // 102 | // If pad is an empty string, str will be returned. 103 | // 104 | // Samples: 105 | // 106 | // RightJustify("hello", 4, " ") => "hello" 107 | // RightJustify("hello", 10, " ") => " hello" 108 | // RightJustify("hello", 10, "123") => "12312hello" 109 | func RightJustify(str string, length int, pad string) string { 110 | l := Len(str) 111 | 112 | if l >= length || pad == "" { 113 | return str 114 | } 115 | 116 | remains := length - l 117 | padLen := Len(pad) 118 | 119 | output := &stringBuilder{} 120 | output.Grow(len(str) + (remains/padLen+1)*len(pad)) 121 | writePadString(output, pad, padLen, remains) 122 | output.WriteString(str) 123 | return output.String() 124 | } 125 | 126 | // Center returns a string with pad string at both side if str's rune length is smaller than length. 127 | // If str's rune length is larger than length, str itself will be returned. 128 | // 129 | // If pad is an empty string, str will be returned. 130 | // 131 | // Samples: 132 | // 133 | // Center("hello", 4, " ") => "hello" 134 | // Center("hello", 10, " ") => " hello " 135 | // Center("hello", 10, "123") => "12hello123" 136 | func Center(str string, length int, pad string) string { 137 | l := Len(str) 138 | 139 | if l >= length || pad == "" { 140 | return str 141 | } 142 | 143 | remains := length - l 144 | padLen := Len(pad) 145 | 146 | output := &stringBuilder{} 147 | output.Grow(len(str) + (remains/padLen+1)*len(pad)) 148 | writePadString(output, pad, padLen, remains/2) 149 | output.WriteString(str) 150 | writePadString(output, pad, padLen, (remains+1)/2) 151 | return output.String() 152 | } 153 | 154 | func writePadString(output *stringBuilder, pad string, padLen, remains int) { 155 | var r rune 156 | var size int 157 | 158 | repeats := remains / padLen 159 | 160 | for i := 0; i < repeats; i++ { 161 | output.WriteString(pad) 162 | } 163 | 164 | remains = remains % padLen 165 | 166 | if remains != 0 { 167 | for i := 0; i < remains; i++ { 168 | r, size = utf8.DecodeRuneInString(pad) 169 | output.WriteRune(r) 170 | pad = pad[size:] 171 | } 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /format_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Huan Du. All rights reserved. 2 | // Licensed under the MIT license that can be found in the LICENSE file. 3 | 4 | package xstrings 5 | 6 | import ( 7 | "strconv" 8 | "strings" 9 | "testing" 10 | ) 11 | 12 | func TestExpandTabs(t *testing.T) { 13 | runner := func(str string) (result string) { 14 | defer func() { 15 | if e := recover(); e != nil { 16 | result = e.(string) 17 | } 18 | }() 19 | 20 | input := strings.Split(str, separator) 21 | n, _ := strconv.Atoi(input[1]) 22 | return ExpandTabs(input[0], n) 23 | } 24 | 25 | runTestCases(t, runner, _M{ 26 | sep("a\tbc\tdef\tghij\tk", "4"): "a bc def ghij k", 27 | sep("abcdefg\thij\nk\tl", "4"): "abcdefg hij\nk l", 28 | sep("z中\t文\tw", "4"): "z中 文 w", 29 | sep("abcdef", "4"): "abcdef", 30 | 31 | sep("abc\td\tef\tghij\nk\tl", "3"): "abc d ef ghij\nk l", 32 | sep("abc\td\tef\tghij\nk\tl", "1"): "abc d ef ghij\nk l", 33 | 34 | sep("abc", "0"): "tab size must be positive", 35 | sep("abc", "-1"): "tab size must be positive", 36 | }) 37 | } 38 | 39 | func TestLeftJustify(t *testing.T) { 40 | runner := func(str string) string { 41 | input := strings.Split(str, separator) 42 | n, _ := strconv.Atoi(input[1]) 43 | return LeftJustify(input[0], n, input[2]) 44 | } 45 | 46 | runTestCases(t, runner, _M{ 47 | sep("hello", "4", " "): "hello", 48 | sep("hello", "10", " "): "hello ", 49 | sep("hello", "10", "123"): "hello12312", 50 | 51 | sep("hello中文test", "4", " "): "hello中文test", 52 | sep("hello中文test", "12", " "): "hello中文test ", 53 | sep("hello中文test", "18", "测试!"): "hello中文test测试!测试!测", 54 | 55 | sep("hello中文test", "0", "123"): "hello中文test", 56 | sep("hello中文test", "18", ""): "hello中文test", 57 | }) 58 | } 59 | 60 | func TestRightJustify(t *testing.T) { 61 | runner := func(str string) string { 62 | input := strings.Split(str, separator) 63 | n, _ := strconv.Atoi(input[1]) 64 | return RightJustify(input[0], n, input[2]) 65 | } 66 | 67 | runTestCases(t, runner, _M{ 68 | sep("hello", "4", " "): "hello", 69 | sep("hello", "10", " "): " hello", 70 | sep("hello", "10", "123"): "12312hello", 71 | 72 | sep("hello中文test", "4", " "): "hello中文test", 73 | sep("hello中文test", "12", " "): " hello中文test", 74 | sep("hello中文test", "18", "测试!"): "测试!测试!测hello中文test", 75 | 76 | sep("hello中文test", "0", "123"): "hello中文test", 77 | sep("hello中文test", "18", ""): "hello中文test", 78 | }) 79 | } 80 | 81 | func TestCenter(t *testing.T) { 82 | runner := func(str string) string { 83 | input := strings.Split(str, separator) 84 | n, _ := strconv.Atoi(input[1]) 85 | return Center(input[0], n, input[2]) 86 | } 87 | 88 | runTestCases(t, runner, _M{ 89 | sep("hello", "4", " "): "hello", 90 | sep("hello", "10", " "): " hello ", 91 | sep("hello", "10", "123"): "12hello123", 92 | 93 | sep("hello中文test", "4", " "): "hello中文test", 94 | sep("hello中文test", "12", " "): "hello中文test ", 95 | sep("hello中文test", "18", "测试!"): "测试!hello中文test测试!测", 96 | 97 | sep("hello中文test", "0", "123"): "hello中文test", 98 | sep("hello中文test", "18", ""): "hello中文test", 99 | }) 100 | } 101 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/huandu/xstrings 2 | 3 | go 1.12 4 | -------------------------------------------------------------------------------- /manipulate.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Huan Du. All rights reserved. 2 | // Licensed under the MIT license that can be found in the LICENSE file. 3 | 4 | package xstrings 5 | 6 | import ( 7 | "strings" 8 | "unicode/utf8" 9 | ) 10 | 11 | // Reverse a utf8 encoded string. 12 | func Reverse(str string) string { 13 | var size int 14 | 15 | tail := len(str) 16 | buf := make([]byte, tail) 17 | s := buf 18 | 19 | for len(str) > 0 { 20 | _, size = utf8.DecodeRuneInString(str) 21 | tail -= size 22 | s = append(s[:tail], []byte(str[:size])...) 23 | str = str[size:] 24 | } 25 | 26 | return string(buf) 27 | } 28 | 29 | // Slice a string by rune. 30 | // 31 | // Start must satisfy 0 <= start <= rune length. 32 | // 33 | // End can be positive, zero or negative. 34 | // If end >= 0, start and end must satisfy start <= end <= rune length. 35 | // If end < 0, it means slice to the end of string. 36 | // 37 | // Otherwise, Slice will panic as out of range. 38 | func Slice(str string, start, end int) string { 39 | var size, startPos, endPos int 40 | 41 | origin := str 42 | 43 | if start < 0 || end > len(str) || (end >= 0 && start > end) { 44 | panic("out of range") 45 | } 46 | 47 | if end >= 0 { 48 | end -= start 49 | } 50 | 51 | for start > 0 && len(str) > 0 { 52 | _, size = utf8.DecodeRuneInString(str) 53 | start-- 54 | startPos += size 55 | str = str[size:] 56 | } 57 | 58 | if end < 0 { 59 | return origin[startPos:] 60 | } 61 | 62 | endPos = startPos 63 | 64 | for end > 0 && len(str) > 0 { 65 | _, size = utf8.DecodeRuneInString(str) 66 | end-- 67 | endPos += size 68 | str = str[size:] 69 | } 70 | 71 | if len(str) == 0 && (start > 0 || end > 0) { 72 | panic("out of range") 73 | } 74 | 75 | return origin[startPos:endPos] 76 | } 77 | 78 | // Partition splits a string by sep into three parts. 79 | // The return value is a slice of strings with head, match and tail. 80 | // 81 | // If str contains sep, for example "hello" and "l", Partition returns 82 | // 83 | // "he", "l", "lo" 84 | // 85 | // If str doesn't contain sep, for example "hello" and "x", Partition returns 86 | // 87 | // "hello", "", "" 88 | func Partition(str, sep string) (head, match, tail string) { 89 | index := strings.Index(str, sep) 90 | 91 | if index == -1 { 92 | head = str 93 | return 94 | } 95 | 96 | head = str[:index] 97 | match = str[index : index+len(sep)] 98 | tail = str[index+len(sep):] 99 | return 100 | } 101 | 102 | // LastPartition splits a string by last instance of sep into three parts. 103 | // The return value is a slice of strings with head, match and tail. 104 | // 105 | // If str contains sep, for example "hello" and "l", LastPartition returns 106 | // 107 | // "hel", "l", "o" 108 | // 109 | // If str doesn't contain sep, for example "hello" and "x", LastPartition returns 110 | // 111 | // "", "", "hello" 112 | func LastPartition(str, sep string) (head, match, tail string) { 113 | index := strings.LastIndex(str, sep) 114 | 115 | if index == -1 { 116 | tail = str 117 | return 118 | } 119 | 120 | head = str[:index] 121 | match = str[index : index+len(sep)] 122 | tail = str[index+len(sep):] 123 | return 124 | } 125 | 126 | // Insert src into dst at given rune index. 127 | // Index is counted by runes instead of bytes. 128 | // 129 | // If index is out of range of dst, panic with out of range. 130 | func Insert(dst, src string, index int) string { 131 | return Slice(dst, 0, index) + src + Slice(dst, index, -1) 132 | } 133 | 134 | // Scrub scrubs invalid utf8 bytes with repl string. 135 | // Adjacent invalid bytes are replaced only once. 136 | func Scrub(str, repl string) string { 137 | var buf *stringBuilder 138 | var r rune 139 | var size, pos int 140 | var hasError bool 141 | 142 | origin := str 143 | 144 | for len(str) > 0 { 145 | r, size = utf8.DecodeRuneInString(str) 146 | 147 | if r == utf8.RuneError { 148 | if !hasError { 149 | if buf == nil { 150 | buf = &stringBuilder{} 151 | } 152 | 153 | buf.WriteString(origin[:pos]) 154 | hasError = true 155 | } 156 | } else if hasError { 157 | hasError = false 158 | buf.WriteString(repl) 159 | 160 | origin = origin[pos:] 161 | pos = 0 162 | } 163 | 164 | pos += size 165 | str = str[size:] 166 | } 167 | 168 | if buf != nil { 169 | buf.WriteString(origin) 170 | return buf.String() 171 | } 172 | 173 | // No invalid byte. 174 | return origin 175 | } 176 | 177 | // WordSplit splits a string into words. Returns a slice of words. 178 | // If there is no word in a string, return nil. 179 | // 180 | // Word is defined as a locale dependent string containing alphabetic characters, 181 | // which may also contain but not start with `'` and `-` characters. 182 | func WordSplit(str string) []string { 183 | var word string 184 | var words []string 185 | var r rune 186 | var size, pos int 187 | 188 | inWord := false 189 | 190 | for len(str) > 0 { 191 | r, size = utf8.DecodeRuneInString(str) 192 | 193 | switch { 194 | case isAlphabet(r): 195 | if !inWord { 196 | inWord = true 197 | word = str 198 | pos = 0 199 | } 200 | 201 | case inWord && (r == '\'' || r == '-'): 202 | // Still in word. 203 | 204 | default: 205 | if inWord { 206 | inWord = false 207 | words = append(words, word[:pos]) 208 | } 209 | } 210 | 211 | pos += size 212 | str = str[size:] 213 | } 214 | 215 | if inWord { 216 | words = append(words, word[:pos]) 217 | } 218 | 219 | return words 220 | } 221 | -------------------------------------------------------------------------------- /manipulate_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Huan Du. All rights reserved. 2 | // Licensed under the MIT license that can be found in the LICENSE file. 3 | 4 | package xstrings 5 | 6 | import ( 7 | "strconv" 8 | "strings" 9 | "testing" 10 | ) 11 | 12 | func TestReverse(t *testing.T) { 13 | runTestCases(t, Reverse, _M{ 14 | "reverse string": "gnirts esrever", 15 | "中文如何?": "?何如文中", 16 | "中en文混~排怎样?a": "a?样怎排~混文ne中", 17 | }) 18 | } 19 | 20 | func TestSlice(t *testing.T) { 21 | runner := func(str string) (result string) { 22 | defer func() { 23 | if e := recover(); e != nil { 24 | result = e.(string) 25 | } 26 | }() 27 | 28 | strs := split(str) 29 | start, _ := strconv.ParseInt(strs[1], 10, 0) 30 | end, _ := strconv.ParseInt(strs[2], 10, 0) 31 | 32 | result = Slice(strs[0], int(start), int(end)) 33 | return 34 | } 35 | 36 | runTestCases(t, runner, _M{ 37 | sep("abcdefghijk", "3", "8"): "defgh", 38 | sep("来点中文如何?", "2", "7"): "中文如何?", 39 | sep("中en文混~排总是少不了的a", "2", "8"): "n文混~排总", 40 | sep("中en文混~排总是少不了的a", "0", "0"): "", 41 | sep("中en文混~排总是少不了的a", "14", "14"): "", 42 | sep("中en文混~排总是少不了的a", "5", "-1"): "~排总是少不了的a", 43 | sep("中en文混~排总是少不了的a", "14", "-1"): "", 44 | 45 | sep("let us slice out of range", "-3", "3"): "out of range", 46 | sep("超出范围哦", "2", "6"): "out of range", 47 | sep("don't do this", "3", "2"): "out of range", 48 | sep("千gan万de不piao要liang", "19", "19"): "out of range", 49 | }) 50 | } 51 | 52 | func TestPartition(t *testing.T) { 53 | runner := func(str string) string { 54 | input := strings.Split(str, separator) 55 | head, match, tail := Partition(input[0], input[1]) 56 | return sep(head, match, tail) 57 | } 58 | 59 | runTestCases(t, runner, _M{ 60 | sep("hello", "l"): sep("he", "l", "lo"), 61 | sep("中文总少不了", "少"): sep("中文总", "少", "不了"), 62 | sep("z这个zh英文混排hao不", "h英文"): sep("z这个z", "h英文", "混排hao不"), 63 | sep("边界tiao件zen能忘", "边界"): sep("", "边界", "tiao件zen能忘"), 64 | sep("尾巴ye别忘le", "忘le"): sep("尾巴ye别", "忘le", ""), 65 | 66 | sep("hello", "x"): sep("hello", "", ""), 67 | sep("不是晩香玉", "晚"): sep("不是晩香玉", "", ""), // Hint: 晩 is not 晚 :) 68 | sep("来ge混排ba", "e 混"): sep("来ge混排ba", "", ""), 69 | }) 70 | } 71 | 72 | func TestLastPartition(t *testing.T) { 73 | runner := func(str string) string { 74 | input := strings.Split(str, separator) 75 | head, match, tail := LastPartition(input[0], input[1]) 76 | return sep(head, match, tail) 77 | } 78 | 79 | runTestCases(t, runner, _M{ 80 | sep("hello", "l"): sep("hel", "l", "o"), 81 | sep("少量中文总少不了", "少"): sep("少量中文总", "少", "不了"), 82 | sep("z这个zh英文ch英文混排hao不", "h英文"): sep("z这个zh英文c", "h英文", "混排hao不"), 83 | sep("边界tiao件zen能忘边界", "边界"): sep("边界tiao件zen能忘", "边界", ""), 84 | sep("尾巴ye别忘le", "尾巴"): sep("", "尾巴", "ye别忘le"), 85 | 86 | sep("hello", "x"): sep("", "", "hello"), 87 | sep("不是晩香玉", "晚"): sep("", "", "不是晩香玉"), // Hint: 晩 is not 晚 :) 88 | sep("来ge混排ba", "e 混"): sep("", "", "来ge混排ba"), 89 | }) 90 | } 91 | 92 | func TestInsert(t *testing.T) { 93 | runner := func(str string) (result string) { 94 | defer func() { 95 | if e := recover(); e != nil { 96 | result = e.(string) 97 | } 98 | }() 99 | 100 | strs := split(str) 101 | index, _ := strconv.ParseInt(strs[2], 10, 0) 102 | result = Insert(strs[0], strs[1], int(index)) 103 | return 104 | } 105 | 106 | runTestCases(t, runner, _M{ 107 | sep("abcdefg", "hi", "3"): "abchidefg", 108 | sep("少量中文是必须的", "混pai", "4"): "少量中文混pai是必须的", 109 | sep("zh英文hun排", "~!", "5"): "zh英文h~!un排", 110 | sep("插在beginning", "我", "0"): "我插在beginning", 111 | sep("插在ending", "我", "8"): "插在ending我", 112 | 113 | sep("超tian出yuan边tu界po", "foo", "-1"): "out of range", 114 | sep("超tian出yuan边tu界po", "foo", "17"): "out of range", 115 | }) 116 | } 117 | 118 | func TestScrub(t *testing.T) { 119 | runner := func(str string) string { 120 | strs := split(str) 121 | return Scrub(strs[0], strs[1]) 122 | } 123 | 124 | runTestCases(t, runner, _M{ 125 | sep("ab\uFFFDcd\xFF\xCEefg\xFF\xFC\xFD\xFAhijk", "*"): "ab*cd*efg*hijk", 126 | sep("no错误です", "*"): "no错误です", 127 | sep("", "*"): "", 128 | }) 129 | } 130 | 131 | func TestWordSplit(t *testing.T) { 132 | runner := func(str string) string { 133 | return sep(WordSplit(str)...) 134 | } 135 | 136 | runTestCases(t, runner, _M{ 137 | "one word": sep("one", "word"), 138 | "一个字:把他给我拿下!": "", 139 | "it's a super-fancy one!!!a": sep("it's", "a", "super-fancy", "one", "a"), 140 | "a -b-c' 'd'e": sep("a", "b-c'", "d'e"), 141 | }) 142 | } 143 | -------------------------------------------------------------------------------- /stringbuilder.go: -------------------------------------------------------------------------------- 1 | //go:build go1.10 2 | // +build go1.10 3 | 4 | package xstrings 5 | 6 | import "strings" 7 | 8 | type stringBuilder = strings.Builder 9 | -------------------------------------------------------------------------------- /stringbuilder_go110.go: -------------------------------------------------------------------------------- 1 | //go:build !go1.10 2 | // +build !go1.10 3 | 4 | package xstrings 5 | 6 | import "bytes" 7 | 8 | type stringBuilder struct { 9 | bytes.Buffer 10 | } 11 | -------------------------------------------------------------------------------- /translate.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Huan Du. All rights reserved. 2 | // Licensed under the MIT license that can be found in the LICENSE file. 3 | 4 | package xstrings 5 | 6 | import ( 7 | "unicode" 8 | "unicode/utf8" 9 | ) 10 | 11 | type runeRangeMap struct { 12 | FromLo rune // Lower bound of range map. 13 | FromHi rune // An inclusive higher bound of range map. 14 | ToLo rune 15 | ToHi rune 16 | } 17 | 18 | type runeDict struct { 19 | Dict [unicode.MaxASCII + 1]rune 20 | } 21 | 22 | type runeMap map[rune]rune 23 | 24 | // Translator can translate string with pre-compiled from and to patterns. 25 | // If a from/to pattern pair needs to be used more than once, it's recommended 26 | // to create a Translator and reuse it. 27 | type Translator struct { 28 | quickDict *runeDict // A quick dictionary to look up rune by index. Only available for latin runes. 29 | runeMap runeMap // Rune map for translation. 30 | ranges []*runeRangeMap // Ranges of runes. 31 | mappedRune rune // If mappedRune >= 0, all matched runes are translated to the mappedRune. 32 | reverted bool // If to pattern is empty, all matched characters will be deleted. 33 | hasPattern bool 34 | } 35 | 36 | // NewTranslator creates new Translator through a from/to pattern pair. 37 | func NewTranslator(from, to string) *Translator { 38 | tr := &Translator{} 39 | 40 | if from == "" { 41 | return tr 42 | } 43 | 44 | reverted := from[0] == '^' 45 | deletion := len(to) == 0 46 | 47 | if reverted { 48 | from = from[1:] 49 | } 50 | 51 | var fromStart, fromEnd, fromRangeStep rune 52 | var toStart, toEnd, toRangeStep rune 53 | var fromRangeSize, toRangeSize rune 54 | var singleRunes []rune 55 | 56 | // Update the to rune range. 57 | updateRange := func() { 58 | // No more rune to read in the to rune pattern. 59 | if toEnd == utf8.RuneError { 60 | return 61 | } 62 | 63 | if toRangeStep == 0 { 64 | to, toStart, toEnd, toRangeStep = nextRuneRange(to, toEnd) 65 | return 66 | } 67 | 68 | // Current range is not empty. Consume 1 rune from start. 69 | if toStart != toEnd { 70 | toStart += toRangeStep 71 | return 72 | } 73 | 74 | // No more rune. Repeat the last rune. 75 | if to == "" { 76 | toEnd = utf8.RuneError 77 | return 78 | } 79 | 80 | // Both start and end are used. Read two more runes from the to pattern. 81 | to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError) 82 | } 83 | 84 | if deletion { 85 | toStart = utf8.RuneError 86 | toEnd = utf8.RuneError 87 | } else { 88 | // If from pattern is reverted, only the last rune in the to pattern will be used. 89 | if reverted { 90 | var size int 91 | 92 | for len(to) > 0 { 93 | toStart, size = utf8.DecodeRuneInString(to) 94 | to = to[size:] 95 | } 96 | 97 | toEnd = utf8.RuneError 98 | } else { 99 | to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError) 100 | } 101 | } 102 | 103 | fromEnd = utf8.RuneError 104 | 105 | for len(from) > 0 { 106 | from, fromStart, fromEnd, fromRangeStep = nextRuneRange(from, fromEnd) 107 | 108 | // fromStart is a single character. Just map it with a rune in the to pattern. 109 | if fromRangeStep == 0 { 110 | singleRunes = tr.addRune(fromStart, toStart, singleRunes) 111 | updateRange() 112 | continue 113 | } 114 | 115 | for toEnd != utf8.RuneError && fromStart != fromEnd { 116 | // If mapped rune is a single character instead of a range, simply shift first 117 | // rune in the range. 118 | if toRangeStep == 0 { 119 | singleRunes = tr.addRune(fromStart, toStart, singleRunes) 120 | updateRange() 121 | fromStart += fromRangeStep 122 | continue 123 | } 124 | 125 | fromRangeSize = (fromEnd - fromStart) * fromRangeStep 126 | toRangeSize = (toEnd - toStart) * toRangeStep 127 | 128 | // Not enough runes in the to pattern. Need to read more. 129 | if fromRangeSize > toRangeSize { 130 | fromStart, toStart = tr.addRuneRange(fromStart, fromStart+toRangeSize*fromRangeStep, toStart, toEnd, singleRunes) 131 | fromStart += fromRangeStep 132 | updateRange() 133 | 134 | // Edge case: If fromRangeSize == toRangeSize + 1, the last fromStart value needs be considered 135 | // as a single rune. 136 | if fromStart == fromEnd { 137 | singleRunes = tr.addRune(fromStart, toStart, singleRunes) 138 | updateRange() 139 | } 140 | 141 | continue 142 | } 143 | 144 | fromStart, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart+fromRangeSize*toRangeStep, singleRunes) 145 | updateRange() 146 | break 147 | } 148 | 149 | if fromStart == fromEnd { 150 | fromEnd = utf8.RuneError 151 | continue 152 | } 153 | 154 | _, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart, singleRunes) 155 | fromEnd = utf8.RuneError 156 | } 157 | 158 | if fromEnd != utf8.RuneError { 159 | tr.addRune(fromEnd, toStart, singleRunes) 160 | } 161 | 162 | tr.reverted = reverted 163 | tr.mappedRune = -1 164 | tr.hasPattern = true 165 | 166 | // Translate RuneError only if in deletion or reverted mode. 167 | if deletion || reverted { 168 | tr.mappedRune = toStart 169 | } 170 | 171 | return tr 172 | } 173 | 174 | func (tr *Translator) addRune(from, to rune, singleRunes []rune) []rune { 175 | if from <= unicode.MaxASCII { 176 | if tr.quickDict == nil { 177 | tr.quickDict = &runeDict{} 178 | } 179 | 180 | tr.quickDict.Dict[from] = to 181 | } else { 182 | if tr.runeMap == nil { 183 | tr.runeMap = make(runeMap) 184 | } 185 | 186 | tr.runeMap[from] = to 187 | } 188 | 189 | singleRunes = append(singleRunes, from) 190 | return singleRunes 191 | } 192 | 193 | func (tr *Translator) addRuneRange(fromLo, fromHi, toLo, toHi rune, singleRunes []rune) (rune, rune) { 194 | var r rune 195 | var rrm *runeRangeMap 196 | 197 | if fromLo < fromHi { 198 | rrm = &runeRangeMap{ 199 | FromLo: fromLo, 200 | FromHi: fromHi, 201 | ToLo: toLo, 202 | ToHi: toHi, 203 | } 204 | } else { 205 | rrm = &runeRangeMap{ 206 | FromLo: fromHi, 207 | FromHi: fromLo, 208 | ToLo: toHi, 209 | ToHi: toLo, 210 | } 211 | } 212 | 213 | // If there is any single rune conflicts with this rune range, clear single rune record. 214 | for _, r = range singleRunes { 215 | if rrm.FromLo <= r && r <= rrm.FromHi { 216 | if r <= unicode.MaxASCII { 217 | tr.quickDict.Dict[r] = 0 218 | } else { 219 | delete(tr.runeMap, r) 220 | } 221 | } 222 | } 223 | 224 | tr.ranges = append(tr.ranges, rrm) 225 | return fromHi, toHi 226 | } 227 | 228 | func nextRuneRange(str string, last rune) (remaining string, start, end rune, rangeStep rune) { 229 | var r rune 230 | var size int 231 | 232 | remaining = str 233 | escaping := false 234 | isRange := false 235 | 236 | for len(remaining) > 0 { 237 | r, size = utf8.DecodeRuneInString(remaining) 238 | remaining = remaining[size:] 239 | 240 | // Parse special characters. 241 | if !escaping { 242 | if r == '\\' { 243 | escaping = true 244 | continue 245 | } 246 | 247 | if r == '-' { 248 | // Ignore slash at beginning of string. 249 | if last == utf8.RuneError { 250 | continue 251 | } 252 | 253 | start = last 254 | isRange = true 255 | continue 256 | } 257 | } 258 | 259 | escaping = false 260 | 261 | if last != utf8.RuneError { 262 | // This is a range which start and end are the same. 263 | // Considier it as a normal character. 264 | if isRange && last == r { 265 | isRange = false 266 | continue 267 | } 268 | 269 | start = last 270 | end = r 271 | 272 | if isRange { 273 | if start < end { 274 | rangeStep = 1 275 | } else { 276 | rangeStep = -1 277 | } 278 | } 279 | 280 | return 281 | } 282 | 283 | last = r 284 | } 285 | 286 | start = last 287 | end = utf8.RuneError 288 | return 289 | } 290 | 291 | // Translate str with a from/to pattern pair. 292 | // 293 | // See comment in Translate function for usage and samples. 294 | func (tr *Translator) Translate(str string) string { 295 | if !tr.hasPattern || str == "" { 296 | return str 297 | } 298 | 299 | var r rune 300 | var size int 301 | var needTr bool 302 | 303 | orig := str 304 | 305 | var output *stringBuilder 306 | 307 | for len(str) > 0 { 308 | r, size = utf8.DecodeRuneInString(str) 309 | r, needTr = tr.TranslateRune(r) 310 | 311 | if needTr && output == nil { 312 | output = allocBuffer(orig, str) 313 | } 314 | 315 | if r != utf8.RuneError && output != nil { 316 | output.WriteRune(r) 317 | } 318 | 319 | str = str[size:] 320 | } 321 | 322 | // No character is translated. 323 | if output == nil { 324 | return orig 325 | } 326 | 327 | return output.String() 328 | } 329 | 330 | // TranslateRune return translated rune and true if r matches the from pattern. 331 | // If r doesn't match the pattern, original r is returned and translated is false. 332 | func (tr *Translator) TranslateRune(r rune) (result rune, translated bool) { 333 | switch { 334 | case tr.quickDict != nil: 335 | if r <= unicode.MaxASCII { 336 | result = tr.quickDict.Dict[r] 337 | 338 | if result != 0 { 339 | translated = true 340 | 341 | if tr.mappedRune >= 0 { 342 | result = tr.mappedRune 343 | } 344 | 345 | break 346 | } 347 | } 348 | 349 | fallthrough 350 | 351 | case tr.runeMap != nil: 352 | var ok bool 353 | 354 | if result, ok = tr.runeMap[r]; ok { 355 | translated = true 356 | 357 | if tr.mappedRune >= 0 { 358 | result = tr.mappedRune 359 | } 360 | 361 | break 362 | } 363 | 364 | fallthrough 365 | 366 | default: 367 | var rrm *runeRangeMap 368 | ranges := tr.ranges 369 | 370 | for i := len(ranges) - 1; i >= 0; i-- { 371 | rrm = ranges[i] 372 | 373 | if rrm.FromLo <= r && r <= rrm.FromHi { 374 | translated = true 375 | 376 | if tr.mappedRune >= 0 { 377 | result = tr.mappedRune 378 | break 379 | } 380 | 381 | if rrm.ToLo < rrm.ToHi { 382 | result = rrm.ToLo + r - rrm.FromLo 383 | } else if rrm.ToLo > rrm.ToHi { 384 | // ToHi can be smaller than ToLo if range is from higher to lower. 385 | result = rrm.ToLo - r + rrm.FromLo 386 | } else { 387 | result = rrm.ToLo 388 | } 389 | 390 | break 391 | } 392 | } 393 | } 394 | 395 | if tr.reverted { 396 | if !translated { 397 | result = tr.mappedRune 398 | } 399 | 400 | translated = !translated 401 | } 402 | 403 | if !translated { 404 | result = r 405 | } 406 | 407 | return 408 | } 409 | 410 | // HasPattern returns true if Translator has one pattern at least. 411 | func (tr *Translator) HasPattern() bool { 412 | return tr.hasPattern 413 | } 414 | 415 | // Translate str with the characters defined in from replaced by characters defined in to. 416 | // 417 | // From and to are patterns representing a set of characters. Pattern is defined as following. 418 | // 419 | // Special characters: 420 | // 421 | // 1. '-' means a range of runes, e.g. 422 | // "a-z" means all characters from 'a' to 'z' inclusive; 423 | // "z-a" means all characters from 'z' to 'a' inclusive. 424 | // 2. '^' as first character means a set of all runes excepted listed, e.g. 425 | // "^a-z" means all characters except 'a' to 'z' inclusive. 426 | // 3. '\' escapes special characters. 427 | // 428 | // Normal character represents itself, e.g. "abc" is a set including 'a', 'b' and 'c'. 429 | // 430 | // Translate will try to find a 1:1 mapping from from to to. 431 | // If to is smaller than from, last rune in to will be used to map "out of range" characters in from. 432 | // 433 | // Note that '^' only works in the from pattern. It will be considered as a normal character in the to pattern. 434 | // 435 | // If the to pattern is an empty string, Translate works exactly the same as Delete. 436 | // 437 | // Samples: 438 | // 439 | // Translate("hello", "aeiou", "12345") => "h2ll4" 440 | // Translate("hello", "a-z", "A-Z") => "HELLO" 441 | // Translate("hello", "z-a", "a-z") => "svool" 442 | // Translate("hello", "aeiou", "*") => "h*ll*" 443 | // Translate("hello", "^l", "*") => "**ll*" 444 | // Translate("hello ^ world", `\^lo`, "*") => "he*** * w*r*d" 445 | func Translate(str, from, to string) string { 446 | tr := NewTranslator(from, to) 447 | return tr.Translate(str) 448 | } 449 | 450 | // Delete runes in str matching the pattern. 451 | // Pattern is defined in Translate function. 452 | // 453 | // Samples: 454 | // 455 | // Delete("hello", "aeiou") => "hll" 456 | // Delete("hello", "a-k") => "llo" 457 | // Delete("hello", "^a-k") => "he" 458 | func Delete(str, pattern string) string { 459 | tr := NewTranslator(pattern, "") 460 | return tr.Translate(str) 461 | } 462 | 463 | // Count how many runes in str match the pattern. 464 | // Pattern is defined in Translate function. 465 | // 466 | // Samples: 467 | // 468 | // Count("hello", "aeiou") => 3 469 | // Count("hello", "a-k") => 3 470 | // Count("hello", "^a-k") => 2 471 | func Count(str, pattern string) int { 472 | if pattern == "" || str == "" { 473 | return 0 474 | } 475 | 476 | var r rune 477 | var size int 478 | var matched bool 479 | 480 | tr := NewTranslator(pattern, "") 481 | cnt := 0 482 | 483 | for len(str) > 0 { 484 | r, size = utf8.DecodeRuneInString(str) 485 | str = str[size:] 486 | 487 | if _, matched = tr.TranslateRune(r); matched { 488 | cnt++ 489 | } 490 | } 491 | 492 | return cnt 493 | } 494 | 495 | // Squeeze deletes adjacent repeated runes in str. 496 | // If pattern is not empty, only runes matching the pattern will be squeezed. 497 | // 498 | // Samples: 499 | // 500 | // Squeeze("hello", "") => "helo" 501 | // Squeeze("hello", "m-z") => "hello" 502 | // Squeeze("hello world", " ") => "hello world" 503 | func Squeeze(str, pattern string) string { 504 | var last, r rune 505 | var size int 506 | var skipSqueeze, matched bool 507 | var tr *Translator 508 | var output *stringBuilder 509 | 510 | orig := str 511 | last = -1 512 | 513 | if len(pattern) > 0 { 514 | tr = NewTranslator(pattern, "") 515 | } 516 | 517 | for len(str) > 0 { 518 | r, size = utf8.DecodeRuneInString(str) 519 | 520 | // Need to squeeze the str. 521 | if last == r && !skipSqueeze { 522 | if tr != nil { 523 | if _, matched = tr.TranslateRune(r); !matched { 524 | skipSqueeze = true 525 | } 526 | } 527 | 528 | if output == nil { 529 | output = allocBuffer(orig, str) 530 | } 531 | 532 | if skipSqueeze { 533 | output.WriteRune(r) 534 | } 535 | } else { 536 | if output != nil { 537 | output.WriteRune(r) 538 | } 539 | 540 | last = r 541 | skipSqueeze = false 542 | } 543 | 544 | str = str[size:] 545 | } 546 | 547 | if output == nil { 548 | return orig 549 | } 550 | 551 | return output.String() 552 | } 553 | -------------------------------------------------------------------------------- /translate_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Huan Du. All rights reserved. 2 | // Licensed under the MIT license that can be found in the LICENSE file. 3 | 4 | package xstrings 5 | 6 | import ( 7 | "fmt" 8 | "strings" 9 | "testing" 10 | ) 11 | 12 | func TestTranslate(t *testing.T) { 13 | runner := func(str string) string { 14 | input := strings.Split(str, separator) 15 | return Translate(input[0], input[1], input[2]) 16 | } 17 | 18 | runTestCases(t, runner, _M{ 19 | sep("hello", "aeiou", "12345"): "h2ll4", 20 | sep("hello", "aeiou", ""): "hll", 21 | sep("hello", "a-z", "A-Z"): "HELLO", 22 | sep("hello", "z-a", "a-z"): "svool", 23 | sep("hello", "aeiou", "*"): "h*ll*", 24 | sep("hello", "^l", "*"): "**ll*", 25 | sep("hello", "p-z", "*"): "hello", 26 | sep("hello ^ world", `\^lo`, "*"): "he*** * w*r*d", 27 | 28 | sep("中文字符测试", "文中谁敢试?", "123456"): "21字符测5", 29 | sep("中文字符测试", "^文中谁敢试?", "123456"): "中文666试", 30 | sep("中文字符测试", "字-试", "0-9"): "中90999", 31 | 32 | sep("h1e2l3l4o, w5o6r7l8d", "a-z,0-9", `A-Z\-a-czk-p`): "HbEcLzLkO- WlOmRnLoD", 33 | sep("h1e2l3l4o, w5o6r7l8d", "a-zoh-n", "b-zakt-z"): "t1f2x3x4k, x5k6s7x8e", 34 | sep("h1e2l3l4o, w5o6r7l8d", "helloa-zoh-n", "99999b-zakt-z"): "t1f2x3x4k, x5k6s7x8e", 35 | 36 | sep("hello", "e-", "p"): "hpllo", 37 | sep("hello", "-e-", "p"): "hpllo", 38 | sep("hello", "----e---", "p"): "hpllo", 39 | sep("hello", "^---e----", "p"): "peppp", 40 | 41 | sep("hel\uFFFDlo", "\uFFFD", "H"): "helHlo", 42 | sep("hel\uFFFDlo", "^\uFFFD", "H"): "HHHHH", 43 | sep("hel\uFFFDlo", "o-\uFFFDh", "H"): "HelHlH", 44 | }) 45 | } 46 | 47 | func TestDelete(t *testing.T) { 48 | runner := func(str string) string { 49 | input := strings.Split(str, separator) 50 | return Delete(input[0], input[1]) 51 | } 52 | 53 | runTestCases(t, runner, _M{ 54 | sep("hello", "aeiou"): "hll", 55 | sep("hello", "a-k"): "llo", 56 | sep("hello", "^a-k"): "he", 57 | 58 | sep("中文字符测试", "文中谁敢试?"): "字符测", 59 | }) 60 | } 61 | 62 | func TestCount(t *testing.T) { 63 | runner := func(str string) string { 64 | input := strings.Split(str, separator) 65 | return fmt.Sprint(Count(input[0], input[1])) 66 | } 67 | 68 | runTestCases(t, runner, _M{ 69 | sep("hello", "aeiou"): "2", 70 | sep("hello", "a-k"): "2", 71 | sep("hello", "^a-k"): "3", 72 | 73 | sep("中文字符测试", "文中谁敢试?"): "3", 74 | }) 75 | } 76 | 77 | func TestSqueeze(t *testing.T) { 78 | runner := func(str string) string { 79 | input := strings.Split(str, separator) 80 | return Squeeze(input[0], input[1]) 81 | } 82 | 83 | runTestCases(t, runner, _M{ 84 | sep("hello", ""): "helo", 85 | sep("hello world", ""): "helo world", 86 | sep("hello world", " "): "hello world", 87 | sep("hello world", " "): "hello world", 88 | sep("hello", "a-k"): "hello", 89 | sep("hello", "^a-k"): "helo", 90 | sep("hello", "^a-l"): "hello", 91 | sep("foooo baaaaar", "a"): "foooo bar", 92 | 93 | sep("打打打打个劫!!", ""): "打个劫!", 94 | sep("打打打打个劫!!", "打"): "打个劫!!", 95 | }) 96 | } 97 | -------------------------------------------------------------------------------- /util_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Huan Du. All rights reserved. 2 | // Licensed under the MIT license that can be found in the LICENSE file. 3 | 4 | package xstrings 5 | 6 | import ( 7 | "strings" 8 | "testing" 9 | ) 10 | 11 | type _M map[string]string 12 | 13 | const ( 14 | separator = " ¶ " 15 | ) 16 | 17 | func runTestCases(t *testing.T, converter func(string) string, cases map[string]string) { 18 | for k, v := range cases { 19 | s := converter(k) 20 | 21 | if s != v { 22 | t.Fatalf("case fails. [case:%v]\nshould => %#v\nactual => %#v", k, v, s) 23 | } 24 | } 25 | } 26 | 27 | func sep(strs ...string) string { 28 | return strings.Join(strs, separator) 29 | } 30 | 31 | func split(str string) []string { 32 | return strings.Split(str, separator) 33 | } 34 | --------------------------------------------------------------------------------