├── LICENSE ├── README.md ├── slug.go └── slug_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 The Go Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # slug 2 | 3 | See the [API docs](http://go.pkgdoc.org/github.com/extemporalgenome/slug). 4 | 5 | Latin-ish inputs should have very stable output. All inputs are passed through 6 | an NFKD transform, and anything still in the unicode Letter and Number 7 | categories are passed through intact. Anything in the Mark or Lm/Sk categories 8 | (modifiers) are skipped, and runs of characters from any other categories are 9 | collapsed to a single hyphen. 10 | -------------------------------------------------------------------------------- /slug.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package slug transforms strings into a normalized form well suited for use in URLs. 6 | package slug 7 | 8 | import ( 9 | "golang.org/x/text/unicode/norm" 10 | "encoding/hex" 11 | "unicode" 12 | "unicode/utf8" 13 | ) 14 | 15 | var lat = []*unicode.RangeTable{unicode.Letter, unicode.Number} 16 | var nop = []*unicode.RangeTable{unicode.Mark, unicode.Sk, unicode.Lm} 17 | 18 | // Slug replaces each run of characters which are not unicode letters or 19 | // numbers with a single hyphen, except for leading or trailing runs. Letters 20 | // will be stripped of diacritical marks and lowercased. Letter or number 21 | // codepoints that do not have combining marks or a lower-cased variant will 22 | // be passed through unaltered. 23 | func Slug(s string) string { 24 | buf := make([]rune, 0, len(s)) 25 | dash := false 26 | for _, r := range norm.NFKD.String(s) { 27 | switch { 28 | // unicode 'letters' like mandarin characters pass through 29 | case unicode.IsOneOf(lat, r): 30 | buf = append(buf, unicode.ToLower(r)) 31 | dash = true 32 | case unicode.IsOneOf(nop, r): 33 | // skip 34 | case dash: 35 | buf = append(buf, '-') 36 | dash = false 37 | } 38 | } 39 | if i := len(buf) - 1; i >= 0 && buf[i] == '-' { 40 | buf = buf[:i] 41 | } 42 | return string(buf) 43 | } 44 | 45 | // SlugAscii is identical to Slug, except that runs of one or more unicode 46 | // letters or numbers that still fall outside the ASCII range will have their 47 | // UTF-8 representation hex encoded and delimited by hyphens. As with Slug, in 48 | // no case will hyphens appear at either end of the returned string. 49 | func SlugAscii(s string) string { 50 | const m = utf8.UTFMax 51 | var ( 52 | ib [m * 3]byte 53 | ob []byte 54 | buf = make([]byte, 0, len(s)) 55 | dash = false 56 | latin = true 57 | ) 58 | for _, r := range norm.NFKD.String(s) { 59 | switch { 60 | case unicode.IsOneOf(lat, r): 61 | r = unicode.ToLower(r) 62 | n := utf8.EncodeRune(ib[:m], r) 63 | if r >= 128 { 64 | if latin && dash { 65 | buf = append(buf, '-') 66 | } 67 | n = hex.Encode(ib[m:], ib[:n]) 68 | ob = ib[m : m+n] 69 | latin = false 70 | } else { 71 | if !latin { 72 | buf = append(buf, '-') 73 | } 74 | ob = ib[:n] 75 | latin = true 76 | } 77 | dash = true 78 | buf = append(buf, ob...) 79 | case unicode.IsOneOf(nop, r): 80 | // skip 81 | case dash: 82 | buf = append(buf, '-') 83 | dash = false 84 | latin = true 85 | } 86 | } 87 | if i := len(buf) - 1; i >= 0 && buf[i] == '-' { 88 | buf = buf[:i] 89 | } 90 | return string(buf) 91 | } 92 | 93 | // IsSlugAscii returns true only if SlugAscii(s) == s. 94 | func IsSlugAscii(s string) bool { 95 | dash := true 96 | for _, r := range s { 97 | switch { 98 | case r == '-': 99 | if dash { 100 | return false 101 | } 102 | dash = true 103 | case 'a' <= r && r <= 'z', '0' <= r && r <= '9': 104 | dash = false 105 | default: 106 | return false 107 | } 108 | } 109 | return !dash 110 | } 111 | -------------------------------------------------------------------------------- /slug_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package slug 6 | 7 | import "testing" 8 | 9 | func TestIsSlugAscii(t *testing.T) { 10 | tests := []struct { 11 | s string 12 | b bool 13 | }{ 14 | {"", false}, 15 | {"-", false}, 16 | {"A", false}, 17 | {"a", true}, 18 | {"-a", false}, 19 | {"a-", false}, 20 | {"a-0", true}, 21 | {"aa", true}, 22 | {"a--0", false}, 23 | {"abc世界def", false}, 24 | } 25 | 26 | for _, test := range tests { 27 | if IsSlugAscii(test.s) != test.b { 28 | t.Error(test.s, "!=", test.b) 29 | } 30 | } 31 | } 32 | 33 | func TestSlugAscii(t *testing.T) { 34 | var tests = []struct{ in, out string }{ 35 | {"ABC世界def-", "abc-e4b896e7958c-def"}, 36 | {"012世界", "012-e4b896e7958c"}, 37 | {"世界345", "e4b896e7958c-345"}, 38 | {"012-世界-345", "012-e4b896e7958c-345"}, 39 | } 40 | 41 | for _, test := range tests { 42 | if out := SlugAscii(test.in); out != test.out { 43 | t.Errorf("%q: %q != %q", test.in, out, test.out) 44 | } 45 | } 46 | } 47 | --------------------------------------------------------------------------------