├── .travis.yml ├── models ├── doc.go └── emails_model.go ├── LICENSE ├── README.md ├── shoco_test.go ├── shoco.go ├── shoco_model_filepaths.go ├── shoco_model_words_en.go └── shoco_model_text_en.go /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - 1.7.x 4 | - 1.8.x 5 | - 1.9.x 6 | - 1.10.x 7 | - 1.11.x 8 | - 1.12.x 9 | - 1.13.x 10 | - tip 11 | matrix: 12 | fast_finish: true 13 | allow_failures: 14 | - go: tip 15 | -------------------------------------------------------------------------------- /models/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tom Thorogood. All rights reserved. 2 | // Use of this source code is governed by a 3 | // Modified BSD License license that can be found in 4 | // the LICENSE file. 5 | 6 | // Package models contains various compression models for shoco. 7 | package models 8 | 9 | import "github.com/tmthrgd/shoco" 10 | 11 | func check(model *shoco.Model) { 12 | // The call to Compress is used for it's side-effect of 13 | // calling (*Model).checkValid. 14 | model.Compress(nil) 15 | } 16 | 17 | // WordsEn is a model optimised for words of the English langauge. 18 | func WordsEn() *shoco.Model { 19 | check(shoco.WordsEnModel) 20 | return shoco.WordsEnModel 21 | } 22 | 23 | // TextEn is a model optimised for English langauge text. 24 | func TextEn() *shoco.Model { 25 | check(shoco.TextEnModel) 26 | return shoco.TextEnModel 27 | } 28 | 29 | // FilePath is a model optimised for filepaths. 30 | func FilePath() *shoco.Model { 31 | check(shoco.FilePathModel) 32 | return shoco.FilePathModel 33 | } 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Tom Thorogood. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the Tom Thorogood nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | 26 | ---- Portions of the source code are also covered by the following license: ---- 27 | 28 | The MIT License (MIT) 29 | 30 | Copyright (c) 2014 Christian Schramm 31 | 32 | Permission is hereby granted, free of charge, to any person obtaining a copy 33 | of this software and associated documentation files (the "Software"), to deal 34 | in the Software without restriction, including without limitation the rights 35 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 36 | copies of the Software, and to permit persons to whom the Software is 37 | furnished to do so, subject to the following conditions: 38 | 39 | The above copyright notice and this permission notice shall be included in 40 | all copies or substantial portions of the Software. 41 | 42 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 43 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 44 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 45 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 46 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 47 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 48 | THE SOFTWARE. 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # shoco 2 | 3 | [![GoDoc](https://godoc.org/github.com/tmthrgd/shoco?status.svg)](https://godoc.org/github.com/tmthrgd/shoco) 4 | [![Build Status](https://travis-ci.org/tmthrgd/shoco.svg?branch=master)](https://travis-ci.org/tmthrgd/shoco) 5 | 6 | **shoco** is a Golang package, based on [the **shoco** C library](https://github.com/Ed-von-Schleck/shoco), to compress and decompress short strings. It is very fast and easy to use. The default compression model is optimized for english words, but it is possible to generate your own compression model based on your specific input data. 7 | 8 | Compression models can be found in the [models package](https://godoc.org/github.com/tmthrgd/shoco/models). 9 | 10 | ## Download 11 | 12 | ``` 13 | go get github.com/tmthrgd/shoco 14 | ``` 15 | 16 | ## Benchmark 17 | 18 | ``` 19 | BenchmarkCompress/#0-0-8 10000000 177 ns/op 20 | BenchmarkCompress/#1-4-8 5000000 264 ns/op 15.14 MB/s 21 | BenchmarkCompress/#2-5-8 5000000 349 ns/op 14.30 MB/s 22 | BenchmarkCompress/#3-240-8 300000 4768 ns/op 50.33 MB/s 23 | BenchmarkCompress/#4-58-8 1000000 1180 ns/op 49.15 MB/s 24 | BenchmarkCompress/#5-20-8 2000000 684 ns/op 29.23 MB/s 25 | BenchmarkCompress/#6-13-8 3000000 450 ns/op 28.83 MB/s 26 | BenchmarkCompress/#7-111-8 500000 2748 ns/op 40.38 MB/s 27 | BenchmarkCompress/#8-9-8 5000000 400 ns/op 22.45 MB/s 28 | BenchmarkCompress/#9-13-8 3000000 452 ns/op 28.75 MB/s 29 | BenchmarkCompress/#10-13-8 3000000 433 ns/op 30.02 MB/s 30 | BenchmarkCompress/#11-10-8 3000000 398 ns/op 25.10 MB/s 31 | BenchmarkCompress/#12-15-8 3000000 462 ns/op 32.44 MB/s 32 | BenchmarkCompress/#13-35-8 2000000 974 ns/op 35.91 MB/s 33 | BenchmarkCompress/#14-6-8 5000000 330 ns/op 18.18 MB/s 34 | BenchmarkCompress/#15-2-8 10000000 218 ns/op 9.14 MB/s 35 | BenchmarkCompress/#16-4-8 5000000 269 ns/op 14.85 MB/s 36 | BenchmarkCompress/#17-4-8 5000000 269 ns/op 14.82 MB/s 37 | BenchmarkCompress/#18-9-8 5000000 297 ns/op 30.23 MB/s 38 | BenchmarkCompress/#19-2-8 10000000 193 ns/op 10.35 MB/s 39 | BenchmarkCompress/#20-4-8 10000000 200 ns/op 19.94 MB/s 40 | BenchmarkCompress/#21-4-8 10000000 191 ns/op 20.94 MB/s 41 | BenchmarkDecompress/#0-0-8 10000000 120 ns/op 42 | BenchmarkDecompress/#1-2-8 10000000 196 ns/op 10.16 MB/s 43 | BenchmarkDecompress/#2-3-8 10000000 214 ns/op 14.02 MB/s 44 | BenchmarkDecompress/#3-169-8 500000 4170 ns/op 40.52 MB/s 45 | BenchmarkDecompress/#4-39-8 1000000 1316 ns/op 29.63 MB/s 46 | BenchmarkDecompress/#5-24-8 3000000 470 ns/op 51.04 MB/s 47 | BenchmarkDecompress/#6-17-8 5000000 369 ns/op 45.99 MB/s 48 | BenchmarkDecompress/#7-79-8 1000000 2255 ns/op 35.02 MB/s 49 | BenchmarkDecompress/#8-18-8 5000000 284 ns/op 63.29 MB/s 50 | BenchmarkDecompress/#9-22-8 5000000 333 ns/op 65.96 MB/s 51 | BenchmarkDecompress/#10-22-8 5000000 327 ns/op 67.26 MB/s 52 | BenchmarkDecompress/#11-20-8 5000000 304 ns/op 65.77 MB/s 53 | BenchmarkDecompress/#12-25-8 5000000 360 ns/op 69.35 MB/s 54 | BenchmarkDecompress/#13-46-8 2000000 858 ns/op 53.60 MB/s 55 | BenchmarkDecompress/#14-12-8 10000000 174 ns/op 68.65 MB/s 56 | BenchmarkDecompress/#15-4-8 10000000 176 ns/op 22.71 MB/s 57 | BenchmarkDecompress/#16-8-8 10000000 216 ns/op 36.92 MB/s 58 | BenchmarkDecompress/#17-8-8 10000000 222 ns/op 36.00 MB/s 59 | BenchmarkDecompress/#18-6-8 5000000 344 ns/op 17.43 MB/s 60 | BenchmarkDecompress/#19-3-8 10000000 183 ns/op 16.36 MB/s 61 | BenchmarkDecompress/#20-5-8 10000000 190 ns/op 26.31 MB/s 62 | BenchmarkDecompress/#21-5-8 10000000 188 ns/op 26.49 MB/s 63 | ``` 64 | 65 | ``` 66 | BenchmarkWords/Compress-8 100 21806321 ns/op 43.05 MB/s 67 | BenchmarkWords/Decompress-8 100 16730975 ns/op 39.60 MB/s 68 | --- BENCH: BenchmarkWords 69 | shoco_test.go:228: len(in) = 938848B 70 | shoco_test.go:229: len(out) = 662545B 71 | shoco_test.go:230: ratio = 0.705700% 72 | ``` 73 | 74 | ## License 75 | 76 | Unless otherwise noted, the shoco source files are distributed under the Modified BSD License 77 | found in the LICENSE file. 78 | -------------------------------------------------------------------------------- /shoco_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tom Thorogood. All rights reserved. 2 | // Use of this source code is governed by a 3 | // Modified BSD License license that can be found in 4 | // the LICENSE file. 5 | 6 | package shoco_test 7 | 8 | import ( 9 | "encoding/hex" 10 | "fmt" 11 | "io/ioutil" 12 | "math/rand" 13 | "os" 14 | "reflect" 15 | "testing" 16 | "testing/quick" 17 | 18 | "github.com/tmthrgd/shoco" 19 | "github.com/tmthrgd/shoco/models" 20 | ) 21 | 22 | func testCompress(in string, proposed bool) string { 23 | if proposed { 24 | return hex.EncodeToString(shoco.ProposedCompress([]byte(in))) 25 | } 26 | 27 | return hex.EncodeToString(shoco.Compress([]byte(in))) 28 | } 29 | 30 | func testDecompress(in string, proposed bool) (string, error) { 31 | b, err := hex.DecodeString(in) 32 | if err != nil { 33 | return "", err 34 | } 35 | 36 | if proposed { 37 | out, err := shoco.ProposedDecompress(b) 38 | return string(out), err 39 | } 40 | 41 | out, err := shoco.Decompress(b) 42 | return string(out), err 43 | } 44 | 45 | // test cases were generated by running: 46 | // Array.from(shoco.compress("Übergrößenträger")).map(x => ('00' + x.toString(16)).slice(-2)).join('') 47 | // in the development console on https://ed-von-schleck.github.io/shoco/ 48 | var testCases = []struct { 49 | in, out string 50 | proposed bool 51 | }{ 52 | {"", "", false}, 53 | {"test", "c899", false}, 54 | {"shoco", "a26fac", false}, 55 | {"shoco is a C library to compress and decompress short strings. It is very fast and easy to use. The default compression model is optimized for english words, but you can generate your own compression model based on your specific input data.", "a26fac20892061204320a6df9b79209120d625ce1d20846420e70484a4737320d09a7420d07199732e2049742089207680792066867420846420658679209120ab652e20549420b86661aa7420d625ce1d698d20b6b86c2089206f70c8db7a8220668e20c04e896820d917732c20bf7420798c20af6e20e908906620798c72206f776e20d625ce1d698d20b6b86c20df5064208d20798c72207370656369666963208870a920dccc2e", false}, 56 | {"shoco is free software, distributed under the MIT license.", "a26fac208920669c6520d11fd8182c20dc499ddeca6420d50072209065204d495420d2b16ea02e", false}, 57 | {"Übergrößenträger", "00c3009cbc72677200c300b600c3009fc05e00c300a46780", false}, 58 | {"Hello, 世界", "48c14d2c2000e400b8009600e70095008c", false}, 59 | {"Go is an open source programming language that makes it easy to build simple, reliable, and efficient software.", "476f20892084206f708120d100ad20709e679f6ddac120d3817561676520c80920b56b83208a20658679209120bf696c6420d0dda42c20ce2a61bd652c20846420656666696369817420d11fd8182e", false}, 60 | {"\u263a\u263b\u2639", "00e2009800ba00e2009800bb00e2009800b9", false}, 61 | {"a\u263ab\u263bc\u2639d", "6100e2009800ba6200e2009800bb6300e2009800b964", false}, 62 | {"1\u20002\u20013\u20024", "3100e2008000803200e2008000813300e20080008234", false}, 63 | {"\u0250\u0250\u0250\u0250\u0250", "00c9009000c9009000c9009000c9009000c90090", false}, 64 | {"\t\v\r\f\n\u0085\u00a0\u2000\u3000", "090b0d0c0a00c2008500c200a000e20080008000e300800080", false}, 65 | {"abcçdefgğhıijklmnoöprsştuüvyz", "61626300c300a7b8666700c4009f6800c400b1696a6b6c6d6e6f00c300b670727300c5009f747500c300bc76797a", false}, 66 | {"ÿøû", "00c300bf00c300b800c300bb", false}, 67 | {"μ", "00ce00bc", false}, 68 | {"μδ", "00ce00bc00ce00b4", false}, 69 | {"\U0001f601", "00f0009f00980081", false}, 70 | {"test\x00test", "c8990000c899", false}, 71 | 72 | // See https://github.com/Ed-von-Schleck/shoco/issues/11 73 | {"μ", "01cebc", true}, 74 | {"μδ", "03cebcceb4", true}, 75 | {"\U0001f601", "03f09f9881", true}, 76 | } 77 | 78 | func TestCompress(t *testing.T) { 79 | for i, testCase := range testCases { 80 | if out := testCompress(testCase.in, testCase.proposed); out != testCase.out { 81 | t.Errorf("failed for test case #%d", i) 82 | t.Logf("got: %s", out) 83 | t.Logf("expected: %s", testCase.out) 84 | } 85 | } 86 | } 87 | 88 | func TestDecompress(t *testing.T) { 89 | for i, testCase := range testCases { 90 | in, err := testDecompress(testCase.out, testCase.proposed) 91 | if err != nil { 92 | t.Errorf("failed for test case #%d", i) 93 | t.Log(err) 94 | } else if in != testCase.in { 95 | t.Errorf("failed for test case #%d", i) 96 | t.Logf("got: %s", in) 97 | t.Logf("expected: %s", testCase.in) 98 | } 99 | } 100 | } 101 | 102 | var testModels = []struct { 103 | name string 104 | model *shoco.Model 105 | }{ 106 | {"WordsEn", models.WordsEn()}, 107 | {"TextEn", models.TextEn()}, 108 | {"FilePath", models.FilePath()}, 109 | {"Emails", models.Emails()}, 110 | } 111 | 112 | func TestRoundTrip(t *testing.T) { 113 | for _, m := range testModels { 114 | t.Run(m.name, func(t *testing.T) { 115 | if err := quick.CheckEqual(func(in []byte) (out []byte, err error) { 116 | return in, nil 117 | }, func(in []byte) (out []byte, err error) { 118 | if len(in) == 0 { 119 | return in, nil 120 | } 121 | 122 | b := m.model.Compress(in) 123 | 124 | if out, err = m.model.Decompress(b); err != nil { 125 | t.Logf("in: %x", in) 126 | t.Logf("compressed: %x", b) 127 | } 128 | 129 | return 130 | }, nil); err != nil { 131 | t.Fatal(err) 132 | } 133 | }) 134 | } 135 | } 136 | 137 | func TestProposedRoundTrip(t *testing.T) { 138 | for _, m := range testModels { 139 | t.Run(m.name, func(t *testing.T) { 140 | if err := quick.CheckEqual(func(in []byte) (out []byte, err error) { 141 | return in, nil 142 | }, func(in []byte) (out []byte, err error) { 143 | if len(in) == 0 { 144 | return in, nil 145 | } 146 | 147 | b := m.model.ProposedCompress(in) 148 | 149 | if out, err = m.model.ProposedDecompress(b); err != nil { 150 | t.Logf("in: %x", in) 151 | t.Logf("compressed: %x", b) 152 | } 153 | 154 | return 155 | }, nil); err != nil { 156 | t.Fatal(err) 157 | } 158 | }) 159 | } 160 | } 161 | 162 | func TestDecompressASCII(t *testing.T) { 163 | if err := quick.CheckEqual(func(in []byte) (out []byte, err error) { 164 | return in, nil 165 | }, shoco.Decompress, &quick.Config{ 166 | Values: func(values []reflect.Value, rand *rand.Rand) { 167 | in := make([]byte, 1+rand.Intn(128)) 168 | rand.Read(in) 169 | 170 | for i := range in { 171 | in[i] &^= 0x80 172 | 173 | for in[i] == 0 { 174 | in[i] = ^byte(rand.Intn(0x100)) &^ 0x80 175 | } 176 | } 177 | 178 | values[0] = reflect.ValueOf(in) 179 | }, 180 | }); err != nil { 181 | t.Fatal(err) 182 | } 183 | } 184 | 185 | func BenchmarkCompress(b *testing.B) { 186 | for i, testCase := range testCases { 187 | b.Run(fmt.Sprintf("#%d-%d", i, len(testCase.in)), func(b *testing.B) { 188 | in := []byte(testCase.in) 189 | b.SetBytes(int64(len(in))) 190 | 191 | if testCase.proposed { 192 | for n := 0; n < b.N; n++ { 193 | var _ = shoco.ProposedCompress(in) 194 | } 195 | } else { 196 | for n := 0; n < b.N; n++ { 197 | var _ = shoco.Compress(in) 198 | } 199 | } 200 | }) 201 | } 202 | } 203 | 204 | func BenchmarkDecompress(b *testing.B) { 205 | for i, testCase := range testCases { 206 | out, err := hex.DecodeString(testCase.out) 207 | if err != nil { 208 | b.Fatal(err) 209 | } 210 | 211 | b.Run(fmt.Sprintf("#%d-%d", i, len(out)), func(b *testing.B) { 212 | b.SetBytes(int64(len(out))) 213 | 214 | if testCase.proposed { 215 | for n := 0; n < b.N; n++ { 216 | if _, err = shoco.ProposedDecompress(out); err != nil { 217 | b.Fatal(err) 218 | } 219 | } 220 | } else { 221 | for n := 0; n < b.N; n++ { 222 | if _, err = shoco.Decompress(out); err != nil { 223 | b.Fatal(err) 224 | } 225 | } 226 | } 227 | }) 228 | } 229 | } 230 | 231 | func BenchmarkWords(b *testing.B) { 232 | f, err := os.Open("/usr/share/dict/words") 233 | if err != nil { 234 | if os.IsNotExist(err) { 235 | b.Skip("/usr/share/dict/words does not exist") 236 | } 237 | 238 | b.Fatal(err) 239 | } 240 | 241 | in, err := ioutil.ReadAll(f) 242 | f.Close() 243 | if err != nil { 244 | b.Fatal(err) 245 | } 246 | 247 | out := shoco.Compress(in) 248 | 249 | b.Logf("len(in) = %dB", len(in)) 250 | b.Logf("len(out) = %dB", len(out)) 251 | b.Logf("ratio = %f%%", float64(len(out))/float64(len(in))) 252 | 253 | b.Run("Compress", func(b *testing.B) { 254 | b.SetBytes(int64(len(in))) 255 | 256 | for n := 0; n < b.N; n++ { 257 | var _ = shoco.Compress(in) 258 | } 259 | }) 260 | 261 | b.Run("Decompress", func(b *testing.B) { 262 | b.SetBytes(int64(len(out))) 263 | 264 | for n := 0; n < b.N; n++ { 265 | if _, err = shoco.Decompress(out); err != nil { 266 | b.Fatal(err) 267 | } 268 | } 269 | }) 270 | } 271 | -------------------------------------------------------------------------------- /shoco.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tom Thorogood. All rights reserved. 2 | // Use of this source code is governed by a 3 | // Modified BSD License license that can be found in 4 | // the LICENSE file. 5 | // 6 | // Copyright 2014 Christian Schramm. All rights reserved. 7 | // Use of this source code is governed by a MIT-style 8 | // license that can be found in the LICENSE file. 9 | 10 | // Package shoco is a compressor for small text strings based on the shoco C 11 | // library. 12 | package shoco 13 | 14 | import ( 15 | "bytes" 16 | "encoding/binary" 17 | "errors" 18 | "sync" 19 | ) 20 | 21 | // ErrInvalid is returned by decompress functions when the compressed input 22 | // data is malformed. 23 | var ErrInvalid = errors.New("shoco: invalid input") 24 | 25 | // DefaultModel is the default model used by the package level functions. 26 | var DefaultModel = WordsEnModel 27 | 28 | // Compress uses DefaultModel to compress the input data. 29 | func Compress(in []byte) (out []byte) { 30 | return DefaultModel.Compress(in) 31 | } 32 | 33 | // ProposedCompress uses DefaultModel to compress the input data, it uses a 34 | // shorter encoding for non-ASCII characters. 35 | func ProposedCompress(in []byte) (out []byte) { 36 | return DefaultModel.ProposedCompress(in) 37 | } 38 | 39 | // Decompress uses DefaultModel to decompress the input data, it will return 40 | // an error if the data is invalid. 41 | func Decompress(in []byte) (out []byte, err error) { 42 | return DefaultModel.Decompress(in) 43 | } 44 | 45 | // ProposedDecompress uses DefaultModel to decompress the input data, it will 46 | // return an error if the data is invalid. It requires the data to have been 47 | // previously compressed with the shorter encoding produced by 48 | // ProposedCompress. 49 | func ProposedDecompress(in []byte) (out []byte, err error) { 50 | return DefaultModel.ProposedDecompress(in) 51 | } 52 | 53 | // Pack represents encoding data for a shoco compression model. 54 | type Pack struct { 55 | Word uint32 56 | BytesPacked int 57 | BytesUnpacked int 58 | Offsets [8]uint 59 | Masks [8]int16 60 | } 61 | 62 | func (p *Pack) checkIndices(indices *[8]int16) bool { 63 | for i := 0; i < p.BytesUnpacked; i++ { 64 | if indices[i] > p.Masks[i] { 65 | return false 66 | } 67 | } 68 | 69 | return true 70 | } 71 | 72 | // Model represents a shoco compression model. 73 | // 74 | // It can be generated using the generate_compressor_model.py script in 75 | // Ed-von-Schleck/shoco. The output of that script will require conversion to 76 | // Go code. 77 | // The script is available at: https://github.com/Ed-von-Schleck/shoco/blob/4dee0fc850cdec2bdb911093fe0a6a56e3623b71/generate_compressor_model.py. 78 | type Model struct { 79 | check sync.Once 80 | 81 | ChrsByChrID []byte 82 | ChrIdsByChr [256]int8 83 | SuccessorIDsByChrIDAndChrID [][]int8 84 | ChrsByChrAndSuccessorID [][]byte 85 | Packs []Pack 86 | MinChr byte 87 | MaxSuccessorN int 88 | } 89 | 90 | func (m *Model) checkValid() { 91 | const invalidModel = "shoco: invalid model" 92 | 93 | if len(m.ChrsByChrID) == 0 || 94 | len(m.SuccessorIDsByChrIDAndChrID) != len(m.ChrsByChrID) || 95 | len(m.ChrsByChrAndSuccessorID) == 0 || len(m.Packs) == 0 || 96 | m.MaxSuccessorN > 7 { 97 | panic(invalidModel) 98 | } 99 | 100 | for _, s := range m.SuccessorIDsByChrIDAndChrID { 101 | if len(s) != len(m.ChrsByChrID) { 102 | panic(invalidModel) 103 | } 104 | } 105 | 106 | for _, p := range m.Packs { 107 | if p.BytesPacked == 0 || p.BytesPacked > 4 || p.BytesPacked&(p.BytesPacked-1) != 0 || 108 | p.BytesUnpacked == 0 || p.BytesUnpacked > 8 || p.BytesUnpacked&(p.BytesUnpacked-1) != 0 { 109 | panic(invalidModel) 110 | } 111 | } 112 | } 113 | 114 | func (m *Model) findBestEncoding(indices *[8]int16, nConsecutive int) int { 115 | for p := len(m.Packs) - 1; p >= 0; p-- { 116 | if nConsecutive >= m.Packs[p].BytesUnpacked && m.Packs[p].checkIndices(indices) { 117 | return p 118 | } 119 | } 120 | 121 | return -1 122 | } 123 | 124 | // Compress uses the given model to compress the input data. 125 | func (m *Model) Compress(in []byte) (out []byte) { 126 | return m.compress(in, false) 127 | } 128 | 129 | // ProposedCompress uses the given model to compress the input data, it uses a 130 | // shorter encoding for non-ASCII characters. 131 | func (m *Model) ProposedCompress(in []byte) (out []byte) { 132 | return m.compress(in, true) 133 | } 134 | 135 | func (m *Model) compress(in []byte, proposed bool) (out []byte) { 136 | m.check.Do(m.checkValid) 137 | 138 | var buf bytes.Buffer 139 | buf.Grow(len(in)) 140 | 141 | var indices [8]int16 142 | 143 | for len(in) != 0 { 144 | // find the longest string of known successors 145 | indices[0] = int16(m.ChrIdsByChr[in[0]]) 146 | 147 | if lastChrIndex := indices[0]; lastChrIndex >= 0 { 148 | nConsecutive := 1 149 | for ; nConsecutive <= m.MaxSuccessorN && nConsecutive < len(in); nConsecutive++ { 150 | currentIndex := m.ChrIdsByChr[in[nConsecutive]] 151 | if currentIndex < 0 { // '\0' is always -1 152 | break 153 | } 154 | 155 | sucessorIndex := m.SuccessorIDsByChrIDAndChrID[lastChrIndex][currentIndex] 156 | if sucessorIndex < 0 { 157 | break 158 | } 159 | 160 | indices[nConsecutive] = int16(sucessorIndex) 161 | lastChrIndex = int16(currentIndex) 162 | } 163 | 164 | if nConsecutive >= 2 { 165 | if packN := m.findBestEncoding(&indices, nConsecutive); packN >= 0 { 166 | code := m.Packs[packN].Word 167 | for i := 0; i < m.Packs[packN].BytesUnpacked; i++ { 168 | code |= uint32(indices[i]) << m.Packs[packN].Offsets[i] 169 | } 170 | 171 | var codeBuf [4]byte 172 | binary.BigEndian.PutUint32(codeBuf[:], code) 173 | buf.Write(codeBuf[:m.Packs[packN].BytesPacked]) 174 | 175 | in = in[m.Packs[packN].BytesUnpacked:] 176 | continue 177 | } 178 | } 179 | } 180 | 181 | if proposed { 182 | // See https://github.com/Ed-von-Schleck/shoco/issues/11 183 | if in[0]&0x80 != 0 || in[0] < 0x09 { 184 | j := byte(1) 185 | for ; int(j) < len(in) && j < 0x09 && (in[j]&0x80 != 0 || in[j] < 0x09); j++ { 186 | } 187 | 188 | buf.Grow(1 + int(j)) 189 | buf.WriteByte(j - 1) 190 | buf.Write(in[:j]) 191 | in = in[j:] 192 | } else { 193 | buf.WriteByte(in[0]) 194 | in = in[1:] 195 | } 196 | 197 | continue 198 | } 199 | 200 | if in[0]&0x80 != 0 || in[0] == 0x00 { // non-ascii case or NUL char 201 | // Encoding NUL chars in this way is not compatible with 202 | // shoco_compress. shoco_compress terminates the compression 203 | // upon encountering a NUL char. shoco_decompress will 204 | // nonetheless correctly decode compressed strings that 205 | // contained NUL chars. 206 | 207 | buf.Grow(2) 208 | buf.WriteByte(0x00) // put in a sentinel byte 209 | } 210 | 211 | buf.WriteByte(in[0]) 212 | in = in[1:] 213 | } 214 | 215 | return buf.Bytes() 216 | } 217 | 218 | // Decompress uses the given model to decompress the input data, it will return 219 | // an error if the data is invalid. 220 | func (m *Model) Decompress(in []byte) (out []byte, err error) { 221 | return m.decompress(in, false) 222 | } 223 | 224 | // ProposedDecompress uses the given model to decompress the input data, it 225 | // will return an error if the data is invalid. It requires the data to have 226 | // been previously compressed with the shorter encoding produced by 227 | // ProposedCompress. 228 | func (m *Model) ProposedDecompress(in []byte) (out []byte, err error) { 229 | return m.decompress(in, true) 230 | } 231 | 232 | func (m *Model) decompress(in []byte, proposed bool) (out []byte, err error) { 233 | m.check.Do(m.checkValid) 234 | 235 | var buf bytes.Buffer 236 | buf.Grow(len(in) * 2) 237 | 238 | for len(in) != 0 { 239 | mark := -1 240 | for val := in[0]; val&0x80 != 0; val <<= 1 { 241 | mark++ 242 | } 243 | 244 | if mark < 0 { 245 | if proposed { 246 | // See https://github.com/Ed-von-Schleck/shoco/issues/11 247 | if in[0] < 0x09 { 248 | j := in[0] + 1 249 | if len(in) < 1+int(j) { 250 | return nil, ErrInvalid 251 | } 252 | 253 | buf.Write(in[1 : 1+j]) 254 | in = in[1+j:] 255 | } else { 256 | buf.WriteByte(in[0]) 257 | in = in[1:] 258 | } 259 | 260 | continue 261 | } 262 | 263 | if in[0] == 0x00 { // ignore the sentinel value for non-ascii chars 264 | if len(in) < 2 { 265 | return nil, ErrInvalid 266 | } 267 | 268 | buf.WriteByte(in[1]) 269 | in = in[2:] 270 | } else { 271 | buf.WriteByte(in[0]) 272 | in = in[1:] 273 | } 274 | 275 | continue 276 | } 277 | 278 | if mark >= len(m.Packs) || m.Packs[mark].BytesPacked > len(in) { 279 | return nil, ErrInvalid 280 | } 281 | 282 | buf.Grow(m.Packs[mark].BytesUnpacked) 283 | 284 | var codeBuf [4]byte 285 | copy(codeBuf[:], in[:m.Packs[mark].BytesPacked]) 286 | code := binary.BigEndian.Uint32(codeBuf[:]) 287 | 288 | offset, mask := m.Packs[mark].Offsets[0], m.Packs[mark].Masks[0] 289 | 290 | idx := (code >> offset) & uint32(mask) 291 | if int(idx) >= len(m.ChrsByChrID) { 292 | return nil, ErrInvalid 293 | } 294 | 295 | lastChr := m.ChrsByChrID[idx] 296 | buf.WriteByte(lastChr) 297 | 298 | for i := 1; i < m.Packs[mark].BytesUnpacked; i++ { 299 | offset, mask := m.Packs[mark].Offsets[i], m.Packs[mark].Masks[i] 300 | 301 | idx0, idx1 := lastChr-m.MinChr, (code>>offset)&uint32(mask) 302 | if int(idx0) >= len(m.ChrsByChrAndSuccessorID) || 303 | int(idx1) >= len(m.ChrsByChrAndSuccessorID[idx0]) { 304 | return nil, ErrInvalid 305 | } 306 | 307 | lastChr = m.ChrsByChrAndSuccessorID[idx0][idx1] 308 | buf.WriteByte(lastChr) 309 | } 310 | 311 | in = in[m.Packs[mark].BytesPacked:] 312 | } 313 | 314 | return buf.Bytes(), nil 315 | } 316 | -------------------------------------------------------------------------------- /shoco_model_filepaths.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tom Thorogood. All rights reserved. 2 | // Use of this source code is governed by a 3 | // Modified BSD License license that can be found in 4 | // the LICENSE file. 5 | // 6 | // Copyright 2014 Christian Schramm. All rights reserved. 7 | // Use of this source code is governed by a MIT-style 8 | // license that can be found in the LICENSE file. 9 | 10 | package shoco 11 | 12 | // FilePathModel is a model optimised for filepaths. 13 | // 14 | // Deprecated: Use models.FilePath() instead. 15 | var FilePathModel = filePathModel 16 | 17 | var filePathModel = &Model{ 18 | ChrsByChrID: []byte{'/', 'e', 's', 'r', 'o', 'i', 't', 'c', 'a', 'n', 'm', 'l', 'h', '.', 'u', 'd', 'g', 'b', 'p', '-', 'f', '2', 'k', '0', '_', '1', '3', 'v', '4', 'x', '6', 'w'}, 19 | ChrIdsByChr: [256]int8{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 13, 0, 23, 25, 21, 26, 28, -1, 30, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 24, -1, 8, 17, 7, 15, 1, 20, 16, 12, 5, -1, 22, 11, 10, 9, 4, 18, -1, 3, 2, 6, 14, 27, 31, 29, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 20 | SuccessorIDsByChrIDAndChrID: [][]int8{ 21 | {-1, -1, 0, -1, -1, 9, 4, 1, -1, -1, 12, 6, 2, 11, 5, 10, 3, -1, 7, -1, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 8}, 22 | {0, -1, 2, 1, -1, -1, 10, 7, 12, 5, 9, 6, -1, 13, -1, 11, -1, 3, 15, 8, -1, -1, -1, -1, 14, -1, -1, -1, -1, 4, -1, -1}, 23 | {1, 5, 6, 2, 3, 9, 0, 7, -1, -1, -1, -1, 4, 8, 13, -1, -1, -1, 12, 10, -1, -1, 14, -1, -1, -1, -1, 15, -1, -1, -1, -1}, 24 | {3, 1, 9, -1, 4, 0, 6, 2, 5, 12, 7, 15, -1, 8, 11, 14, -1, -1, -1, 13, -1, -1, -1, -1, -1, -1, -1, 10, -1, -1, -1, -1}, 25 | {7, -1, 12, 2, 8, -1, 11, 5, 9, 3, 0, 6, -1, -1, 1, 4, 15, 10, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 14}, 26 | {14, 15, 0, 13, 8, -1, 2, 5, 1, 4, 12, 6, -1, -1, -1, 9, 11, 3, 10, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, -1, -1, -1}, 27 | {2, 1, 5, 7, 8, 0, -1, 15, 4, -1, 14, -1, 9, 10, -1, -1, 6, -1, -1, 11, 12, -1, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 28 | {3, 1, 11, 8, 2, -1, 5, 12, 4, -1, -1, 7, 0, 13, 14, -1, -1, 15, 10, -1, -1, 9, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 29 | {12, -1, 4, 1, -1, 10, 2, 5, -1, 0, 11, 3, -1, -1, 15, 9, 6, 13, 8, 7, -1, -1, -1, -1, -1, -1, -1, 14, -1, -1, -1, -1}, 30 | {0, 6, 4, -1, 1, 10, 2, 7, 12, -1, -1, -1, -1, 13, 11, 5, 3, -1, -1, 8, 9, -1, 14, -1, -1, -1, 15, -1, -1, -1, -1, -1}, 31 | {4, 0, 8, -1, 3, 9, -1, -1, 1, -1, 12, 6, -1, 14, 15, 7, -1, 11, 2, 13, 5, -1, -1, -1, 10, -1, -1, -1, -1, -1, -1, -1}, 32 | {4, 1, 7, -1, 3, 0, 11, -1, 2, -1, -1, 5, -1, 12, 6, 9, -1, -1, 8, 10, -1, -1, 13, -1, -1, -1, -1, 14, -1, -1, -1, -1}, 33 | {6, 3, -1, 1, 0, 5, 4, -1, 2, -1, 13, -1, -1, 8, 15, -1, 10, -1, 9, 11, -1, -1, -1, -1, 12, -1, -1, -1, -1, -1, -1, -1}, 34 | {-1, -1, -1, -1, 12, -1, -1, 5, -1, -1, 13, 4, 2, -1, -1, 9, 7, -1, 0, -1, 10, 14, -1, 8, -1, 3, 1, -1, 11, -1, -1, -1}, 35 | {12, 14, 1, 0, -1, 7, 5, -1, 15, 3, 2, 4, -1, -1, -1, 6, 9, 10, 8, -1, 11, -1, -1, -1, -1, -1, -1, -1, -1, 13, -1, -1}, 36 | {3, 0, 6, 13, 2, 1, -1, -1, 4, -1, -1, 12, -1, 10, 7, 15, -1, 5, -1, 8, 14, -1, -1, -1, 11, -1, -1, -1, -1, -1, -1, -1}, 37 | {4, 2, 6, 5, 8, 3, 1, -1, -1, 0, -1, 7, 10, 9, 12, 14, -1, 15, -1, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 38 | {1, 5, 6, -1, 11, 13, -1, 15, 3, -1, -1, 8, -1, -1, 4, -1, 14, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 12, -1}, 39 | {5, 2, 8, 7, 6, 10, 9, 13, 0, 3, 15, 1, 14, -1, 12, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 40 | {-1, 15, 2, -1, -1, -1, 4, 6, -1, 14, 12, 9, -1, -1, -1, 1, 8, 13, 3, -1, 7, 0, -1, 11, -1, 5, -1, -1, -1, 10, -1, -1}, 41 | {7, 5, 11, 9, 0, 1, -1, 2, 4, -1, 6, -1, -1, 13, -1, 10, -1, 12, -1, 3, 8, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 42 | {2, -1, -1, -1, -1, -1, -1, 15, 14, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 8, -1, 4, -1, 1, -1, 12, 11, -1, 3, 13, 7, -1}, 43 | {3, 2, 6, -1, 10, 0, 12, -1, 5, 8, -1, 11, -1, 7, -1, 13, -1, 15, -1, 1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1}, 44 | {2, -1, -1, -1, -1, -1, -1, -1, 15, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 3, -1, 6, -1, 1, 5, 4, 8, -1, 9, -1, 12, -1}, 45 | {-1, -1, 4, 10, -1, -1, 8, 11, -1, -1, 6, 0, -1, -1, -1, 14, -1, 15, 3, -1, -1, -1, -1, 2, 13, 9, 5, -1, -1, -1, 1, 12}, 46 | {0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 14, -1, 15, -1, 9, -1, 5, -1, 2, 13, 4, 3, -1, 7, -1, 6, -1}, 47 | {1, -1, -1, -1, -1, -1, -1, -1, 14, -1, -1, -1, -1, 0, -1, 15, -1, -1, -1, 13, -1, 2, -1, 5, 3, 6, 7, -1, 9, -1, 11, -1}, 48 | {8, 0, 13, -1, 3, 2, -1, 9, 1, 6, 5, -1, -1, 10, -1, 15, 4, -1, 14, 12, 7, -1, -1, -1, 11, -1, -1, -1, -1, -1, -1, -1}, 49 | {0, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, 13, -1, 2, 15, 10, -1, 3, -1, 5, 7, -1, 8, -1, 12, -1}, 50 | {3, 9, -1, -1, -1, -1, 4, -1, 8, -1, 0, 1, -1, 10, -1, -1, -1, -1, 11, 5, -1, 6, -1, -1, 14, 13, 15, -1, 12, -1, -1, -1}, 51 | {2, -1, -1, -1, -1, -1, -1, -1, 13, -1, -1, -1, -1, 4, -1, 15, -1, 14, -1, -1, -1, 3, -1, 5, 1, 9, 7, -1, 0, -1, 6, -1}, 52 | {11, 0, 10, 9, 3, 1, 12, 15, 2, 4, 6, -1, 13, 7, -1, 14, -1, -1, -1, 8, -1, -1, -1, -1, 5, -1, -1, -1, -1, -1, -1, -1}, 53 | }, 54 | ChrsByChrAndSuccessorID: [][]byte{ 55 | {'2', 'd', 's', 'p', 't', '1', 'c', 'f', 'g', 'l', 'x', '0', 'm', 'b', 'n', 'e'}, 56 | {'p', '3', 'h', '1', 'l', 'c', '9', 'g', '0', 'd', 'f', '4', 'o', 'm', '2', 'P'}, 57 | {'s', 'c', 'h', 'g', 't', 'u', 'l', 'p', 'w', 'i', 'd', '.', 'm', 'S', 'W', 'f'}, 58 | {'.', '0', '/', '-', '1', '_', '2', '9', '3', '4', '7', '8', '6', '5', ':', 'a'}, 59 | {'/', '.', '0', '3', '1', '2', '6', '4', '5', '-', '7', '8', '9', '_', 'd', 'b'}, 60 | {'.', '0', '/', '4', '2', '8', '5', '6', '-', '7', '9', '3', '1', 'x', 'a', 'c'}, 61 | {'.', '/', '2', '_', '7', '0', '1', '3', '8', '4', '5', '6', '9', '-', 'a', 'd'}, 62 | {'/', '.', '-', '0', '8', '1', '5', '3', '4', '9', '2', '7', '6', 'b', 'e', 'f'}, 63 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 64 | {'4', '_', '/', '2', '.', '0', '6', '3', '8', '1', '7', '9', '5', 'a', 'b', 'd'}, 65 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 66 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 67 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 68 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 69 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 70 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 71 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 72 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 73 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 74 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 75 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 76 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 77 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 78 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 79 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 80 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 81 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 82 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 83 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 84 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 85 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 86 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 87 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 88 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 89 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 90 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 91 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 92 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 93 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 94 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 95 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 96 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 97 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 98 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 99 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 100 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 101 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 102 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 103 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 104 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 105 | {'l', '6', '0', 'p', 's', '3', 'm', 'M', 't', '1', 'r', 'c', 'w', '_', 'd', 'b'}, 106 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 107 | {'n', 'r', 't', 'l', 's', 'c', 'g', '-', 'p', 'd', 'i', 'm', '/', 'b', 'v', 'u'}, 108 | {'k', '/', 'C', 'a', 'u', 'e', 's', 'j', 'l', 'K', 'W', 'o', '6', 'i', 'g', 'c'}, 109 | {'h', 'e', 'o', '/', 'a', 't', 'k', 'l', 'r', '2', 'p', 's', 'c', '.', 'u', 'b'}, 110 | {'e', 'i', 'o', '/', 'a', 'b', 's', 'u', '-', 'S', '.', '_', 'l', 'r', 'f', 'd'}, 111 | {'/', 'r', 's', 'b', 'x', 'n', 'l', 'c', '-', 'm', 't', 'd', 'a', '.', '_', 'p'}, 112 | {'o', 'i', 'c', '-', 'a', 'e', 'm', '/', 'f', 'r', 'd', 's', 'b', '.', '8', '2'}, 113 | {'n', 't', 'e', 'i', '/', 'r', 's', 'l', 'o', '.', 'h', 'z', 'u', '-', 'd', 'b'}, 114 | {'o', 'r', 'a', 'e', 't', 'i', '/', 'y', '.', 'p', 'g', '-', '_', 'm', 'C', 'u'}, 115 | {'s', 'a', 't', 'b', 'n', 'c', 'l', 'v', 'o', 'd', 'p', 'g', 'm', 'r', '/', 'e'}, 116 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 117 | {'i', '-', 'e', '/', '_', 'a', 's', '.', 'n', '+', 'o', 'l', 't', 'd', 'M', 'b'}, 118 | {'i', 'e', 'a', 'o', '/', 'l', 'u', 's', 'p', 'd', '-', 't', '.', 'k', 'v', 'y'}, 119 | {'e', 'a', 'p', 'o', '/', 'f', 'l', 'd', 's', 'i', '_', 'b', 'm', '-', '.', 'u'}, 120 | {'/', 'o', 't', 'g', 's', 'd', 'e', 'c', '-', 'f', 'i', 'u', 'a', '.', 'k', '3'}, 121 | {'m', 'u', 'r', 'n', 'd', 'c', 'l', '/', 'o', 'a', 'b', 't', 's', 'p', 'w', 'g'}, 122 | {'a', 'l', 'e', 'n', 'p', '/', 'o', 'r', 's', 't', 'i', 'y', 'u', 'c', 'h', 'm'}, 123 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 124 | {'i', 'e', 'c', '/', 'o', 'a', 't', 'm', '.', 's', 'v', 'u', 'n', '-', 'd', 'l'}, 125 | {'t', '/', 'r', 'o', 'h', 'e', 's', 'c', '.', 'i', '-', 'y', 'p', 'u', 'k', 'v'}, 126 | {'i', 'e', '/', 'k', 'a', 's', 'g', 'r', 'o', 'h', '.', '-', 'f', 'y', 'm', 'c'}, 127 | {'r', 's', 'm', 'n', 'l', 't', 'd', 'i', 'p', 'g', 'b', 'f', '/', 'x', 'e', 'a'}, 128 | {'e', 'a', 'i', 'o', 'g', 'm', 'n', 'f', '/', 'c', '.', '_', '-', 's', 'p', 'd'}, 129 | {'e', 'i', 'a', 'o', 'n', '_', 'm', '.', '-', 'r', 's', '/', 't', 'h', 'd', 'c'}, 130 | {'m', 'l', '8', '/', 't', '-', '2', 'y', 'a', 'e', '.', 'p', '4', '1', '_', '3'}, 131 | }, 132 | Packs: []Pack{ 133 | {0x80000000, 1, 2, [8]uint{26, 24, 24, 24, 24, 24, 24}, [8]int16{15, 3, 0, 0, 0, 0, 0}}, 134 | {0xc0000000, 2, 4, [8]uint{25, 21, 18, 16, 16, 16, 16}, [8]int16{15, 15, 7, 3, 0, 0, 0}}, 135 | {0xe0000000, 4, 8, [8]uint{24, 20, 16, 12, 9, 6, 3, 0}, [8]int16{15, 15, 15, 15, 7, 7, 7, 7}}, 136 | }, 137 | MinChr: 45, 138 | MaxSuccessorN: 7, 139 | } 140 | -------------------------------------------------------------------------------- /models/emails_model.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tom Thorogood. All rights reserved. 2 | // Use of this source code is governed by a 3 | // Modified BSD License license that can be found in 4 | // the LICENSE file. 5 | // 6 | // Copyright 2014 Christian Schramm. All rights reserved. 7 | // Use of this source code is governed by a MIT-style 8 | // license that can be found in the LICENSE file. 9 | 10 | package models 11 | 12 | import "github.com/tmthrgd/shoco" 13 | 14 | // Emails is a model optimized for compressing email addresses. 15 | // 16 | // It was trained against a sample of 2,000 email addresses using the 17 | // --optimize-encoding flag. It achieves good compression on short strings 18 | // consisting of just email addresses. 19 | func Emails() *shoco.Model { 20 | check(emailsModel) 21 | return emailsModel 22 | } 23 | 24 | var emailsModel = &shoco.Model{ 25 | ChrsByChrID: []byte{'.', 'e', 'o', 'a', 'n', 's', 'r', 'c', 'i', 'l', '@', 't', 'h', 'u', 'm', 'd', 'g', 'p', 'y', 'b', 'w', 'k', 'f', 'v', 'j', '-', '1', '0', 'x', 'z', '2', '8'}, 26 | ChrIdsByChr: [256]int8{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 25, 0, -1, 27, 26, 30, -1, -1, -1, -1, -1, 31, -1, -1, -1, -1, -1, -1, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 19, 7, 15, 1, 22, 16, 12, 8, 24, 21, 9, 14, 4, 2, 17, -1, 6, 5, 11, 13, 23, 20, 28, 18, 29, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 27 | SuccessorIDsByChrIDAndChrID: [][]int8{ 28 | {-1, -1, 3, 11, 2, 5, -1, 0, -1, 15, -1, 14, 8, 1, 7, 12, 4, 9, -1, 6, 10, -1, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 29 | {3, 13, -1, 9, 4, 2, 0, 10, 14, 5, 6, 1, -1, -1, 12, 7, -1, -1, 8, -1, 11, -1, -1, 15, -1, -1, -1, -1, -1, -1, -1, -1}, 30 | {1, -1, 4, -1, 2, 10, 3, 14, -1, 5, 6, 7, 13, 9, 0, 12, -1, 15, -1, 11, -1, -1, -1, 8, -1, -1, -1, -1, -1, -1, -1, -1}, 31 | {4, -1, -1, -1, 2, 7, 1, 8, 3, 0, 13, 5, 9, 14, 6, 10, 15, -1, 11, -1, -1, -1, -1, 12, -1, -1, -1, -1, -1, -1, -1, -1}, 32 | {2, 1, 13, 7, 10, 6, -1, 12, 9, -1, 3, 4, 0, -1, 15, 5, 8, -1, 14, -1, -1, -1, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 33 | {0, 5, 4, 3, -1, 10, -1, 13, 7, 12, 1, 2, 6, 9, 8, -1, -1, 14, 15, -1, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 34 | {9, 0, 4, 2, 10, 6, 11, -1, 1, 15, 8, 3, -1, 13, -1, 7, 5, -1, 12, -1, -1, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 35 | {7, 4, 0, 1, -1, 11, 6, 10, 9, 8, 14, 3, 2, 12, -1, -1, 15, -1, 13, -1, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 36 | {5, 6, 9, 8, 0, 3, 7, 2, -1, 1, 15, 4, -1, -1, 11, 10, 12, -1, -1, -1, -1, -1, 14, 13, -1, -1, -1, -1, -1, -1, -1, -1}, 37 | {1, 0, 5, 3, -1, 6, -1, 14, 2, 4, 7, 8, -1, 13, 15, 9, -1, 12, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 11}, 38 | {-1, 13, -1, 8, 0, 3, 15, 2, -1, 11, -1, 6, 5, -1, 9, 7, 1, 12, 10, 4, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 39 | {1, 0, 4, 6, -1, 11, 5, 13, 3, 12, 7, 8, 2, -1, 9, -1, 14, -1, 15, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 40 | {5, 3, 2, 1, 9, 0, 10, 12, 4, 13, 6, 8, -1, 7, 14, -1, -1, 15, -1, -1, -1, -1, -1, -1, -1, 11, -1, -1, -1, -1, -1, -1}, 41 | {9, 10, -1, -1, 3, 2, 1, 11, 14, 4, -1, 5, -1, -1, 8, 13, 6, 7, -1, 15, -1, 0, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 42 | {3, 1, 4, 0, -1, 9, 15, 5, 2, -1, 6, -1, 7, 12, 10, 13, -1, 8, 14, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 43 | {3, 0, 5, 2, -1, 7, 6, 14, 1, 15, 4, -1, 11, 9, 13, 10, -1, 12, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 44 | {1, 2, 3, 4, 13, 9, 6, -1, 8, 12, 11, -1, 7, 15, 0, -1, 14, 5, -1, -1, -1, -1, -1, -1, -1, 10, -1, -1, -1, -1, -1, -1}, 45 | {7, 1, 6, 0, -1, 11, 4, 3, 10, 8, 14, 13, 2, 12, 15, -1, -1, 9, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1, -1, -1, -1, -1}, 46 | {0, 11, 7, 2, 4, 3, 14, 10, -1, 6, 1, 9, -1, -1, 5, 12, 15, 8, -1, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 47 | {9, 0, 3, 1, -1, 8, 2, 13, 4, 7, 14, 5, 11, 6, -1, 15, -1, -1, 12, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 48 | {0, 2, 4, 1, 8, 11, 14, 10, 3, 5, 12, -1, 7, -1, -1, 13, -1, -1, 9, -1, -1, 15, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1}, 49 | {1, 0, 6, 3, -1, 5, 13, 9, 4, 8, 2, 11, 7, -1, 15, -1, -1, -1, 10, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 50 | {8, 5, 0, 6, -1, 10, 3, -1, 1, 7, 9, 2, -1, 11, 12, -1, -1, 15, -1, -1, -1, -1, 4, -1, -1, 14, -1, -1, -1, -1, -1, 13}, 51 | {2, 0, 4, 3, -1, -1, 6, 7, 1, 15, 5, -1, 9, -1, 11, -1, -1, 12, 13, -1, 8, -1, 14, -1, 10, -1, -1, -1, -1, -1, -1, -1}, 52 | {5, 3, 0, 1, -1, -1, 15, 9, 4, 14, 7, -1, -1, 2, 8, 12, 10, 11, -1, 6, -1, -1, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 53 | {-1, 11, -1, 10, 12, 4, -1, 5, -1, 0, -1, 1, 14, 8, 2, 6, 13, 3, -1, 15, 9, -1, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 54 | {6, -1, -1, 10, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 1, -1, -1, 4, 12}, 55 | {2, -1, -1, -1, -1, 10, -1, -1, -1, -1, 3, -1, -1, -1, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, -1, -1, 6, 9}, 56 | {0, 7, 2, 4, -1, 15, -1, 9, 6, -1, 1, 3, -1, -1, 10, -1, -1, 8, 13, -1, 12, -1, 5, -1, -1, 14, -1, -1, 11, -1, -1, -1}, 57 | {4, 3, 0, 1, -1, -1, -1, -1, 2, 14, 5, -1, 7, 8, -1, 10, -1, -1, 9, -1, -1, -1, 11, -1, 12, 13, 15, -1, -1, 6, -1, -1}, 58 | {1, 11, -1, -1, -1, -1, -1, -1, -1, -1, 0, 7, -1, -1, 15, -1, -1, 12, -1, -1, -1, -1, -1, -1, -1, -1, 3, 4, 10, -1, 6, -1}, 59 | {8, -1, -1, -1, -1, 9, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 7, -1, -1, -1, 5}, 60 | }, 61 | ChrsByChrAndSuccessorID: [][]byte{ 62 | {'l', 't', 'm', 'p', 's', 'c', 'd', 'f', 'u', 'w', 'a', 'e', 'n', 'g', 'h', 'b'}, 63 | {'c', 'u', 'n', 'o', 'g', 's', 'b', 'm', 'h', 'p', 'w', 'a', 'd', 'f', 't', 'l'}, 64 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 65 | {'1', '0', '.', '@', '3', '4', '2', '6', '5', '8', 's', 'm', '\x00', '\x00', '\x00', '\x00'}, 66 | {'@', '0', '6', '1', '2', '3', '.', '7', '4', '9', 'a', '5', '8', '\x00', '\x00', '\x00'}, 67 | {'@', '.', '5', '1', '0', '4', '2', 't', '7', '6', 'x', 'e', 'p', '3', '9', 'm'}, 68 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 69 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 70 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 71 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 72 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 73 | {'1', '@', '4', '5', '6', '8', '3', '0', '.', 's', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 74 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 75 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 76 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 77 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 78 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 79 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 80 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 81 | {'n', 'g', 'c', 's', 'b', 'h', 't', 'd', 'a', 'm', 'y', 'l', 'p', 'e', 'w', 'r'}, 82 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 83 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 84 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 85 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 86 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 87 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 88 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 89 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 90 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 91 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 92 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 93 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 94 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 95 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 96 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 97 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 98 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 99 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 100 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 101 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 102 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 103 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 104 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 105 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 106 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 107 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 108 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 109 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 110 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 111 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 112 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 113 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 114 | {'l', 'r', 'n', 'i', '.', 't', 'm', 's', 'c', 'h', 'd', 'y', 'v', '@', 'u', 'g'}, 115 | {'e', 'a', 'r', 'o', 'i', 't', 'u', 'l', 's', '.', 'b', 'h', 'y', 'c', '@', 'd'}, 116 | {'o', 'a', 'h', 't', 'e', 'k', 'r', '.', 'l', 'i', 'c', 's', 'u', 'y', '@', 'g'}, 117 | {'e', 'i', 'a', '.', '@', 'o', 'r', 's', 'y', 'u', 'd', 'h', 'p', 'm', 'c', 'l'}, 118 | {'r', 't', 's', '.', 'n', 'l', '@', 'd', 'y', 'a', 'c', 'w', 'm', 'e', 'i', 'v'}, 119 | {'o', 'i', 't', 'r', 'f', 'e', 'a', 'l', '.', '@', 's', 'u', 'm', '8', '-', 'p'}, 120 | {'m', '.', 'e', 'o', 'a', 'p', 'r', 'h', 'i', 's', '-', '@', 'l', 'n', 'g', 'u'}, 121 | {'s', 'a', 'o', 'e', 'i', '.', '@', 'u', 't', 'n', 'r', '-', 'c', 'l', 'm', 'p'}, 122 | {'n', 'l', 'c', 's', 't', '.', 'e', 'r', 'a', 'o', 'd', 'm', 'g', 'v', 'f', '@'}, 123 | {'o', 'a', 'u', 'e', 'i', '.', 'b', '@', 'm', 'c', 'g', 'p', 'd', 'f', 'l', 'r'}, 124 | {'e', '.', '@', 'a', 'i', 's', 'o', 'h', 'l', 'c', 'y', 't', 'b', 'r', '_', 'm'}, 125 | {'e', '.', 'i', 'a', 'l', 'o', 's', '@', 't', 'd', 'y', '8', 'p', 'u', 'c', 'm'}, 126 | {'a', 'e', 'i', '.', 'o', 'c', '@', 'h', 'p', 's', 'm', 'b', 'u', 'd', 'y', 'r'}, 127 | {'h', 'e', '.', '@', 't', 'd', 's', 'a', 'g', 'i', 'n', 'f', 'c', 'o', 'y', 'm'}, 128 | {'m', '.', 'n', 'r', 'o', 'l', '@', 't', 'v', 'u', 's', 'b', 'd', 'h', 'c', 'p'}, 129 | {'a', 'e', 'h', 'c', 'r', '-', 'o', '.', 'l', 'p', 'i', 's', 'u', 't', '@', 'm'}, 130 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 131 | {'e', 'i', 'a', 't', 'o', 'g', 's', 'd', '@', '.', 'n', 'r', 'y', 'u', 'k', 'l'}, 132 | {'.', '@', 't', 'a', 'o', 'e', 'h', 'i', 'm', 'u', 's', 'w', 'l', 'c', 'p', 'y'}, 133 | {'e', '.', 'h', 'i', 'o', 'r', 'a', '@', 't', 'm', 'w', 's', 'l', 'c', 'g', 'y'}, 134 | {'k', 'r', 's', 'n', 'l', 't', 'g', 'p', 'm', '.', 'e', 'c', 'f', 'd', 'i', 'b'}, 135 | {'e', 'i', '.', 'a', 'o', '@', 'r', 'c', 'w', 'h', 'j', 'm', 'p', 'y', 'f', 'l'}, 136 | {'.', 'a', 'e', 'i', 'o', 'l', 'v', 'h', 'n', 'y', 'c', 's', '@', 'd', 'r', 'k'}, 137 | {'.', '@', 'o', 't', 'a', 'f', 'i', 'e', 'p', 'c', 'm', 'x', 'w', 'y', '-', 's'}, 138 | {'.', '@', 'a', 's', 'n', 'm', 'l', 'o', 'p', 't', 'c', 'e', 'd', 'b', 'r', 'g'}, 139 | {'o', 'a', 'i', 'e', '.', '@', 'z', 'h', 'u', 'y', 'd', 'f', 'j', '-', 'l', '1'}, 140 | }, 141 | Packs: []shoco.Pack{ 142 | {Word: 0x80000000, BytesPacked: 1, BytesUnpacked: 2, Offsets: [8]uint{26, 24, 24, 24, 24, 24, 24}, Masks: [8]int16{15, 3, 0, 0, 0, 0, 0}}, 143 | {Word: 0xc0000000, BytesPacked: 2, BytesUnpacked: 4, Offsets: [8]uint{25, 21, 18, 16, 16, 16, 16}, Masks: [8]int16{15, 15, 7, 3, 0, 0, 0}}, 144 | {Word: 0xe0000000, BytesPacked: 4, BytesUnpacked: 8, Offsets: [8]uint{24, 20, 16, 12, 9, 6, 3, 0}, Masks: [8]int16{15, 15, 15, 15, 7, 7, 7, 7}}, 145 | }, 146 | MinChr: 45, 147 | MaxSuccessorN: 7, 148 | } 149 | -------------------------------------------------------------------------------- /shoco_model_words_en.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tom Thorogood. All rights reserved. 2 | // Use of this source code is governed by a 3 | // Modified BSD License license that can be found in 4 | // the LICENSE file. 5 | // 6 | // Copyright 2014 Christian Schramm. All rights reserved. 7 | // Use of this source code is governed by a MIT-style 8 | // license that can be found in the LICENSE file. 9 | 10 | package shoco 11 | 12 | // WordsEnModel is a model optimised for words of the English langauge. 13 | // 14 | // Deprecated: Use models.WordsEn() instead. 15 | var WordsEnModel = wordsEnModel 16 | 17 | var wordsEnModel = &Model{ 18 | ChrsByChrID: []byte{'e', 'a', 'i', 'o', 't', 'h', 'n', 'r', 's', 'l', 'u', 'c', 'w', 'm', 'd', 'b', 'p', 'f', 'g', 'v', 'y', 'k', '-', 'H', 'M', 'T', '\'', 'B', 'x', 'I', 'W', 'L'}, 19 | ChrIdsByChr: [256]int8{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 27, -1, -1, -1, -1, -1, 23, 29, -1, -1, 31, 24, -1, -1, -1, -1, -1, -1, 25, -1, -1, 30, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 15, 11, 14, 0, 17, 18, 5, 2, -1, 21, 9, 13, 6, 3, 16, -1, 7, 8, 4, 10, 19, 12, 28, 20, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 20 | SuccessorIDsByChrIDAndChrID: [][]int8{ 21 | {7, 4, 12, -1, 6, -1, 1, 0, 3, 5, -1, 9, -1, 8, 2, -1, 15, 14, -1, 10, 11, -1, -1, -1, -1, -1, -1, -1, 13, -1, -1, -1}, 22 | {-1, -1, 6, -1, 1, -1, 0, 3, 2, 4, 15, 11, -1, 9, 5, 10, 13, -1, 12, 8, 7, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 23 | {9, 11, -1, 4, 2, -1, 0, 8, 1, 5, -1, 6, -1, 3, 7, 15, -1, 12, 10, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 24 | {-1, -1, 14, 7, 5, -1, 1, 2, 8, 9, 0, 15, 6, 4, 11, -1, 12, 3, -1, 10, -1, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 25 | {2, 4, 3, 1, 5, 0, -1, 6, 10, 9, 7, 12, 11, -1, -1, -1, -1, 13, -1, -1, 8, -1, 15, -1, -1, -1, 14, -1, -1, -1, -1, -1}, 26 | {0, 1, 2, 3, 4, -1, -1, 5, 9, 10, 6, -1, -1, 8, 15, 11, -1, 14, -1, -1, 7, -1, 13, -1, -1, -1, 12, -1, -1, -1, -1, -1}, 27 | {2, 8, 7, 4, 3, -1, 9, -1, 6, 11, -1, 5, -1, -1, 0, -1, -1, 14, 1, 15, 10, 12, -1, -1, -1, -1, 13, -1, -1, -1, -1, -1}, 28 | {0, 3, 1, 2, 6, -1, 9, 8, 4, 12, 13, 10, -1, 11, 7, -1, -1, 15, 14, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 29 | {0, 6, 3, 4, 1, 2, -1, -1, 5, 10, 7, 9, 11, 12, -1, -1, 8, 14, -1, -1, 15, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 30 | {0, 6, 2, 5, 9, -1, -1, -1, 10, 1, 8, -1, 12, 14, 4, -1, 15, 7, -1, 13, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 31 | {8, 10, 9, 15, 1, -1, 4, 0, 3, 2, -1, 6, -1, 12, 11, 13, 7, 14, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 32 | {1, 3, 6, 0, 4, 2, -1, 7, 13, 8, 9, 11, -1, -1, 15, -1, -1, -1, -1, -1, 10, 5, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 33 | {3, 0, 1, 4, -1, 2, 5, 6, 7, 8, -1, 14, -1, -1, 9, 15, -1, 12, -1, -1, -1, 10, 11, -1, -1, -1, 13, -1, -1, -1, -1, -1}, 34 | {0, 1, 3, 2, 15, -1, 12, -1, 7, 14, 4, -1, -1, 9, -1, 8, 5, 10, -1, -1, 6, -1, 13, -1, -1, -1, 11, -1, -1, -1, -1, -1}, 35 | {0, 3, 1, 2, -1, -1, 12, 6, 4, 9, 7, -1, -1, 14, 8, -1, -1, 15, 11, 13, 5, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 36 | {0, 5, 7, 2, 10, 13, -1, 6, 8, 1, 3, -1, -1, 14, 15, 11, -1, -1, -1, 12, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 37 | {0, 2, 6, 3, 7, 10, -1, 1, 9, 4, 8, -1, -1, 15, -1, 12, 5, -1, -1, -1, 11, -1, 13, -1, -1, -1, 14, -1, -1, -1, -1, -1}, 38 | {1, 3, 4, 0, 7, -1, 12, 2, 11, 8, 6, 13, -1, -1, -1, -1, -1, 5, -1, -1, 10, 15, 9, -1, -1, -1, 14, -1, -1, -1, -1, -1}, 39 | {1, 3, 5, 2, 13, 0, 9, 4, 7, 6, 8, -1, -1, 15, -1, 11, -1, -1, 10, -1, 14, -1, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 40 | {0, 2, 1, 3, -1, -1, -1, 6, -1, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 41 | {1, 11, 4, 0, 3, -1, 13, 12, 2, 7, -1, -1, 15, 10, 5, 8, 14, -1, -1, -1, -1, -1, 9, -1, -1, -1, 6, -1, -1, -1, -1, -1}, 42 | {0, 9, 2, 14, 15, 4, 1, 13, 3, 5, -1, -1, 10, -1, -1, -1, -1, 6, 12, -1, 7, -1, 8, -1, -1, -1, 11, -1, -1, -1, -1, -1}, 43 | {-1, 2, 14, -1, 1, 5, 8, 7, 4, 12, -1, 6, 9, 11, 13, 3, 10, 15, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 44 | {0, 1, 3, 2, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 45 | {4, 3, 1, 5, -1, -1, -1, 0, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 46 | {2, 8, 4, 1, -1, 0, -1, 6, -1, -1, 5, -1, 7, -1, -1, -1, -1, -1, -1, -1, 10, -1, -1, 9, -1, -1, -1, -1, -1, -1, -1, -1}, 47 | {12, 5, -1, -1, 1, -1, -1, 7, 0, 3, -1, 2, -1, 4, 6, -1, -1, -1, -1, 8, -1, -1, 15, -1, 13, 9, -1, -1, -1, -1, -1, 11}, 48 | {1, 3, 2, 4, -1, -1, -1, 5, -1, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, 8, -1, -1}, 49 | {5, 3, 4, 12, 1, 6, -1, -1, -1, -1, 8, 2, -1, -1, -1, -1, 0, 9, -1, -1, 11, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 50 | {-1, -1, -1, -1, 0, -1, 1, 12, 3, -1, -1, -1, -1, 5, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1, 6, -1, 10}, 51 | {2, 3, 1, 4, -1, 0, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1}, 52 | {5, 1, 3, 0, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, 9, -1, -1, 6, -1, 7}, 53 | }, 54 | ChrsByChrAndSuccessorID: [][]byte{ 55 | {'s', 't', 'c', 'l', 'm', 'a', 'd', 'r', 'v', 'T', 'A', 'L', 'e', 'M', 'Y', '-'}, 56 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 57 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 58 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 59 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 60 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 61 | {'-', 't', 'a', 'b', 's', 'h', 'c', 'r', 'n', 'w', 'p', 'm', 'l', 'd', 'i', 'f'}, 62 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 63 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 64 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 65 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 66 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 67 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 68 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 69 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 70 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 71 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 72 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 73 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 74 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 75 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 76 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 77 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 78 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 79 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 80 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 81 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 82 | {'u', 'e', 'i', 'a', 'o', 'r', 'y', 'l', 'I', 'E', 'R', '\x00', '\x00', '\x00', '\x00', '\x00'}, 83 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 84 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 85 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 86 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 87 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 88 | {'e', 'a', 'o', 'i', 'u', 'A', 'y', 'E', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 89 | {'t', 'n', 'f', 's', '\'', 'm', 'I', 'N', 'A', 'E', 'L', 'Z', 'r', 'V', 'R', 'C'}, 90 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 91 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 92 | {'o', 'a', 'y', 'i', 'u', 'e', 'I', 'L', 'D', '\'', 'E', 'Y', '\x00', '\x00', '\x00', '\x00'}, 93 | {'r', 'i', 'y', 'a', 'e', 'o', 'u', 'Y', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 94 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 95 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 96 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 97 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 98 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 99 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 100 | {'h', 'o', 'e', 'E', 'i', 'u', 'r', 'w', 'a', 'H', 'y', 'R', 'Z', '\x00', '\x00', '\x00'}, 101 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 102 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 103 | {'h', 'i', 'e', 'a', 'o', 'r', 'I', 'y', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 104 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 105 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 106 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 107 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 108 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 109 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 110 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 111 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 112 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 113 | {'n', 't', 's', 'r', 'l', 'd', 'i', 'y', 'v', 'm', 'b', 'c', 'g', 'p', 'k', 'u'}, 114 | {'e', 'l', 'o', 'u', 'y', 'a', 'r', 'i', 's', 'j', 't', 'b', 'v', 'h', 'm', 'd'}, 115 | {'o', 'e', 'h', 'a', 't', 'k', 'i', 'r', 'l', 'u', 'y', 'c', 'q', 's', '-', 'd'}, 116 | {'e', 'i', 'o', 'a', 's', 'y', 'r', 'u', 'd', 'l', '-', 'g', 'n', 'v', 'm', 'f'}, 117 | {'r', 'n', 'd', 's', 'a', 'l', 't', 'e', 'm', 'c', 'v', 'y', 'i', 'x', 'f', 'p'}, 118 | {'o', 'e', 'r', 'a', 'i', 'f', 'u', 't', 'l', '-', 'y', 's', 'n', 'c', '\'', 'k'}, 119 | {'h', 'e', 'o', 'a', 'r', 'i', 'l', 's', 'u', 'n', 'g', 'b', '-', 't', 'y', 'm'}, 120 | {'e', 'a', 'i', 'o', 't', 'r', 'u', 'y', 'm', 's', 'l', 'b', '\'', '-', 'f', 'd'}, 121 | {'n', 's', 't', 'm', 'o', 'l', 'c', 'd', 'r', 'e', 'g', 'a', 'f', 'v', 'z', 'b'}, 122 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 123 | {'e', 'n', 'i', 's', 'h', 'l', 'f', 'y', '-', 'a', 'w', '\'', 'g', 'r', 'o', 't'}, 124 | {'e', 'l', 'i', 'y', 'd', 'o', 'a', 'f', 'u', 't', 's', 'k', 'w', 'v', 'm', 'p'}, 125 | {'e', 'a', 'o', 'i', 'u', 'p', 'y', 's', 'b', 'm', 'f', '\'', 'n', '-', 'l', 't'}, 126 | {'d', 'g', 'e', 't', 'o', 'c', 's', 'i', 'a', 'n', 'y', 'l', 'k', '\'', 'f', 'v'}, 127 | {'u', 'n', 'r', 'f', 'm', 't', 'w', 'o', 's', 'l', 'v', 'd', 'p', 'k', 'i', 'c'}, 128 | {'e', 'r', 'a', 'o', 'l', 'p', 'i', 't', 'u', 's', 'h', 'y', 'b', '-', '\'', 'm'}, 129 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 130 | {'e', 'i', 'o', 'a', 's', 'y', 't', 'd', 'r', 'n', 'c', 'm', 'l', 'u', 'g', 'f'}, 131 | {'e', 't', 'h', 'i', 'o', 's', 'a', 'u', 'p', 'c', 'l', 'w', 'm', 'k', 'f', 'y'}, 132 | {'h', 'o', 'e', 'i', 'a', 't', 'r', 'u', 'y', 'l', 's', 'w', 'c', 'f', '\'', '-'}, 133 | {'r', 't', 'l', 's', 'n', 'g', 'c', 'p', 'e', 'i', 'a', 'd', 'm', 'b', 'f', 'o'}, 134 | {'e', 'i', 'a', 'o', 'y', 'u', 'r', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 135 | {'a', 'i', 'h', 'e', 'o', 'n', 'r', 's', 'l', 'd', 'k', '-', 'f', '\'', 'c', 'b'}, 136 | {'p', 't', 'c', 'a', 'i', 'e', 'h', 'q', 'u', 'f', '-', 'y', 'o', '\x00', '\x00', '\x00'}, 137 | {'o', 'e', 's', 't', 'i', 'd', '\'', 'l', 'b', '-', 'm', 'a', 'r', 'n', 'p', 'w'}, 138 | }, 139 | Packs: []Pack{ 140 | {0x80000000, 1, 2, [8]uint{26, 24, 24, 24, 24, 24, 24, 24}, [8]int16{15, 3, 0, 0, 0, 0, 0, 0}}, 141 | {0xc0000000, 2, 4, [8]uint{25, 22, 19, 16, 16, 16, 16, 16}, [8]int16{15, 7, 7, 7, 0, 0, 0, 0}}, 142 | {0xe0000000, 4, 8, [8]uint{23, 19, 15, 11, 8, 5, 2, 0}, [8]int16{31, 15, 15, 15, 7, 7, 7, 3}}, 143 | }, 144 | MinChr: 39, 145 | MaxSuccessorN: 7, 146 | } 147 | -------------------------------------------------------------------------------- /shoco_model_text_en.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Tom Thorogood. All rights reserved. 2 | // Use of this source code is governed by a 3 | // Modified BSD License license that can be found in 4 | // the LICENSE file. 5 | // 6 | // Copyright 2014 Christian Schramm. All rights reserved. 7 | // Use of this source code is governed by a MIT-style 8 | // license that can be found in the LICENSE file. 9 | 10 | package shoco 11 | 12 | // TextEnModel is a model optimised for English langauge text. 13 | // 14 | // Deprecated: Use models.TextEn() instead. 15 | var TextEnModel = textEnModel 16 | 17 | var textEnModel = &Model{ 18 | ChrsByChrID: []byte{' ', 'e', 't', 'a', 'o', 'n', 'i', 'h', 's', 'r', 'd', 'l', 'u', 'm', 'c', 'w', 'y', 'f', 'g', ',', 'p', 'b', '.', 'v', 'k', 'I', '"', '-', 'H', 'M', 'T', '\''}, 19 | ChrIdsByChr: [256]int8{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, 26, -1, -1, -1, -1, 31, -1, -1, -1, -1, 19, 27, 22, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 28, 25, -1, -1, -1, 29, -1, -1, -1, -1, -1, -1, 30, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 21, 14, 10, 1, 17, 18, 7, 6, -1, 24, 11, 13, 5, 4, 20, -1, 9, 8, 2, 12, 23, 15, -1, 16, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 20 | SuccessorIDsByChrIDAndChrID: [][]int8{ 21 | {12, -1, 0, 1, 5, 13, 6, 2, 4, -1, 11, 14, -1, 7, 10, 3, -1, 9, -1, -1, 15, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 22 | {0, 8, 7, 5, -1, 2, 15, -1, 4, 1, 3, 6, -1, 10, 11, -1, 14, -1, -1, 9, -1, -1, 13, 12, -1, -1, -1, -1, -1, -1, -1, -1}, 23 | {1, 3, 6, 5, 2, -1, 4, 0, 13, 7, -1, 12, 9, -1, 15, 14, 11, -1, -1, 8, -1, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 24 | {6, -1, 1, -1, -1, 0, 7, -1, 2, 3, 5, 4, -1, 10, 12, -1, 8, -1, 13, -1, 14, 11, -1, 9, 15, -1, -1, -1, -1, -1, -1, -1}, 25 | {2, -1, 6, -1, 8, 1, 15, -1, 9, 3, 12, 10, 0, 5, -1, 7, -1, 4, -1, -1, 13, -1, -1, 11, 14, -1, -1, -1, -1, -1, -1, -1}, 26 | {0, 3, 4, 10, 5, 11, 8, -1, 7, -1, 1, 14, -1, -1, 6, -1, 12, -1, 2, 9, -1, -1, 13, -1, 15, -1, -1, -1, -1, -1, -1, -1}, 27 | {-1, 9, 2, 11, 4, 0, -1, -1, 1, 8, 7, 5, -1, 3, 6, -1, -1, 12, 10, -1, -1, 15, -1, 13, -1, -1, -1, -1, -1, -1, -1, -1}, 28 | {3, 0, 5, 1, 4, -1, 2, -1, 13, 7, -1, 14, 8, 12, -1, -1, 10, -1, -1, 6, -1, 15, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 29 | {0, 1, 2, 7, 5, -1, 4, 3, 6, -1, -1, 13, 8, 15, 12, 14, -1, -1, -1, 9, 11, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 30 | {1, 0, 7, 4, 3, 12, 2, -1, 5, 11, 9, 15, -1, 14, 13, -1, 6, -1, -1, 10, -1, -1, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 31 | {0, 1, -1, 6, 3, -1, 2, -1, 7, 9, 11, 12, 10, -1, -1, -1, 8, -1, 15, 4, -1, -1, 5, -1, -1, -1, -1, 14, -1, -1, -1, -1}, 32 | {3, 0, 11, 7, 6, -1, 2, -1, 12, -1, 5, 1, 9, -1, -1, 15, 4, 8, -1, 10, -1, -1, 14, -1, 13, -1, -1, -1, -1, -1, -1, -1}, 33 | {5, 9, 1, 11, -1, 4, 10, -1, 3, 0, 12, 2, -1, 13, 7, -1, -1, -1, 6, 15, 8, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 34 | {1, 0, -1, 2, 3, -1, 4, -1, 8, -1, -1, -1, 5, 12, -1, -1, 7, 14, -1, 9, 6, 11, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 35 | {12, 1, 4, 3, 0, -1, 6, 2, 15, 7, -1, 8, 9, -1, 11, -1, 10, -1, -1, -1, -1, -1, 14, -1, 5, -1, -1, -1, -1, -1, -1, -1}, 36 | {5, 3, -1, 0, 4, 6, 1, 2, 9, 7, 12, 10, -1, -1, -1, -1, -1, -1, -1, 8, -1, -1, 11, -1, 15, -1, -1, 14, -1, -1, -1, -1}, 37 | {0, 4, 6, -1, 1, -1, 7, -1, 5, -1, 8, 12, -1, -1, -1, -1, -1, -1, -1, 2, -1, 14, 3, -1, -1, -1, -1, 15, -1, -1, -1, 10}, 38 | {0, 2, 8, 4, 1, -1, 5, -1, -1, 3, -1, 9, 7, -1, -1, -1, 13, 6, -1, 10, -1, -1, 11, -1, -1, -1, -1, 12, -1, -1, -1, -1}, 39 | {0, 2, -1, 4, 3, 12, 6, 1, 9, 5, -1, 7, 11, -1, -1, -1, -1, -1, 13, 8, -1, 15, 10, -1, -1, -1, -1, 14, -1, -1, -1, -1}, 40 | {0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 3, -1, -1, -1, 2}, 41 | {7, 0, 8, 2, 3, -1, 6, 11, 10, 1, -1, 4, 9, -1, -1, -1, 12, -1, -1, 13, 5, 15, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 42 | {12, 0, 10, 5, 2, -1, 7, 15, 8, 6, -1, 1, 3, -1, -1, -1, 4, -1, -1, -1, -1, 11, 14, 13, -1, -1, -1, -1, -1, -1, -1, -1}, 43 | {0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1, 2, -1, -1, -1, 1, 4, -1, -1, -1, 3}, 44 | {-1, 0, -1, 2, 3, -1, 1, -1, -1, 6, -1, -1, 5, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 45 | {1, 0, -1, 13, -1, 2, 3, 5, 4, -1, -1, 8, -1, -1, -1, 14, 10, 9, -1, 6, -1, -1, 7, -1, -1, -1, -1, 11, -1, -1, -1, -1}, 46 | {0, -1, 1, -1, -1, 2, -1, -1, 4, -1, -1, -1, -1, 7, -1, -1, -1, 3, -1, 6, -1, -1, 14, -1, -1, 8, -1, -1, -1, -1, -1, 5}, 47 | {0, -1, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, -1, -1, -1, -1, -1, 1, -1, -1, 7, 6, 4, -1}, 48 | {8, -1, 1, 2, -1, 9, 15, 5, 4, 7, 14, 13, -1, 12, 6, 10, -1, -1, -1, -1, 11, 3, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1}, 49 | {-1, 0, -1, 1, 2, -1, 3, -1, -1, -1, -1, -1, 4, -1, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 50 | {8, 4, -1, 3, 5, -1, 1, -1, -1, 0, -1, -1, 6, -1, -1, -1, 2, -1, -1, -1, -1, -1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1}, 51 | {9, 2, -1, 8, 1, -1, 4, 0, -1, 6, -1, -1, 5, -1, -1, 7, 13, -1, -1, 11, -1, -1, 12, -1, -1, -1, -1, -1, 10, -1, -1, -1}, 52 | {2, 15, 1, 6, -1, -1, -1, -1, 0, 8, 7, 4, -1, 5, 3, -1, -1, -1, -1, 14, -1, -1, -1, 9, -1, -1, 13, -1, -1, -1, 10, -1}, 53 | }, 54 | ChrsByChrAndSuccessorID: [][]byte{ 55 | {'t', 'a', 'h', 'w', 's', 'o', 'i', 'm', 'b', 'f', 'c', 'd', ' ', 'n', 'l', 'p'}, 56 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 57 | {' ', 'I', 'Y', 'W', 'T', 'A', 'M', 'H', 'O', 'B', 'N', 'D', 't', 'S', 'a', ','}, 58 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 59 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 60 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 61 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 62 | {'s', 't', ' ', 'c', 'l', 'm', 'a', 'd', 'r', 'v', 'T', 'A', 'L', '"', ',', 'e'}, 63 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 64 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 65 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 66 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 67 | {' ', '"', '\'', '-', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 68 | {'-', 't', 'a', 'b', 's', 'h', 'c', 'r', ' ', 'n', 'w', 'p', 'm', 'l', 'd', 'i'}, 69 | {' ', '"', '.', '\'', '-', ',', '?', ';', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 70 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 71 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 72 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 73 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 74 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 75 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 76 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 77 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 78 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 79 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 80 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 81 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 82 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 83 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 84 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 85 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 86 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 87 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 88 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 89 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 90 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 91 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 92 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 93 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 94 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 95 | {'e', 'a', 'o', 'i', 'u', 'A', 'y', 'E', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 96 | {' ', 't', 'n', 'f', 's', '\'', ',', 'm', 'I', 'N', '_', 'A', 'E', 'L', '.', 'R'}, 97 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 98 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 99 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 100 | {'r', 'i', 'y', 'a', 'e', 'o', 'u', 'Y', ' ', '.', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 101 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 102 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 103 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 104 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 105 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 106 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 107 | {'h', 'o', 'e', 'E', 'i', 'u', 'r', 'w', 'a', ' ', 'H', ',', '.', 'y', 'R', 'Z'}, 108 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 109 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 110 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 111 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 112 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 113 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 114 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 115 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 116 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 117 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 118 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 119 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 120 | {'n', 't', 's', 'r', 'l', 'd', ' ', 'i', 'y', 'v', 'm', 'b', 'c', 'g', 'p', 'k'}, 121 | {'e', 'l', 'o', 'u', 'y', 'a', 'r', 'i', 's', 'j', 't', 'b', ' ', 'v', '.', 'h'}, 122 | {'o', 'e', 'h', 'a', 't', 'k', 'i', 'r', 'l', 'u', 'y', 'c', ' ', 'q', '.', 's'}, 123 | {' ', 'e', 'i', 'o', ',', '.', 'a', 's', 'y', 'r', 'u', 'd', 'l', ';', '-', 'g'}, 124 | {' ', 'r', 'n', 'd', 's', 'a', 'l', 't', 'e', ',', 'm', 'c', 'v', '.', 'y', 'i'}, 125 | {' ', 'o', 'e', 'r', 'a', 'i', 'f', 'u', 't', 'l', ',', '.', '-', 'y', ';', '?'}, 126 | {' ', 'h', 'e', 'o', 'a', 'r', 'i', 'l', ',', 's', '.', 'u', 'n', 'g', '-', 'b'}, 127 | {'e', 'a', 'i', ' ', 'o', 't', ',', 'r', 'u', '.', 'y', '!', 'm', 's', 'l', 'b'}, 128 | {'n', 's', 't', 'm', 'o', 'l', 'c', 'd', 'r', 'e', 'g', 'a', 'f', 'v', 'z', 'b'}, 129 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 130 | {'e', ' ', 'n', 'i', 's', 'h', ',', '.', 'l', 'f', 'y', '-', ';', 'a', 'w', '!'}, 131 | {'e', 'l', 'i', ' ', 'y', 'd', 'o', 'a', 'f', 'u', ',', 't', 's', 'k', '.', 'w'}, 132 | {'e', ' ', 'a', 'o', 'i', 'u', 'p', 'y', 's', ',', '.', 'b', 'm', ';', 'f', '?'}, 133 | {' ', 'd', 'g', 'e', 't', 'o', 'c', 's', 'i', ',', 'a', 'n', 'y', '.', 'l', 'k'}, 134 | {'u', 'n', ' ', 'r', 'f', 'm', 't', 'w', 'o', 's', 'l', 'v', 'd', 'p', 'k', 'i'}, 135 | {'e', 'r', 'a', 'o', 'l', 'p', 'i', ' ', 't', 'u', 's', 'h', 'y', ',', '.', 'b'}, 136 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 137 | {'e', ' ', 'i', 'o', 'a', 's', 'y', 't', '.', 'd', ',', 'r', 'n', 'c', 'm', 'l'}, 138 | {' ', 'e', 't', 'h', 'i', 'o', 's', 'a', 'u', ',', '.', 'p', 'c', 'l', 'w', 'm'}, 139 | {'h', ' ', 'o', 'e', 'i', 'a', 't', 'r', ',', 'u', '.', 'y', 'l', 's', 'w', 'c'}, 140 | {'r', 't', 'l', 's', 'n', ' ', 'g', 'c', 'p', 'e', 'i', 'a', 'd', 'm', 'b', ','}, 141 | {'e', 'i', 'a', 'o', 'y', 'u', 'r', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 142 | {'a', 'i', 'h', 'e', 'o', ' ', 'n', 'r', ',', 's', 'l', '.', 'd', ';', '-', 'k'}, 143 | {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, 144 | {' ', 'o', ',', '.', 'e', 's', 't', 'i', 'd', ';', '\'', '?', 'l', '!', 'b', '-'}, 145 | }, 146 | Packs: []Pack{ 147 | {0x80000000, 1, 2, [8]uint{26, 24, 24, 24, 24, 24, 24, 24}, [8]int16{15, 3, 0, 0, 0, 0, 0, 0}}, 148 | {0xc0000000, 2, 4, [8]uint{25, 22, 19, 16, 16, 16, 16, 16}, [8]int16{15, 7, 7, 7, 0, 0, 0, 0}}, 149 | {0xe0000000, 4, 8, [8]uint{23, 19, 15, 11, 8, 5, 2, 0}, [8]int16{31, 15, 15, 15, 7, 7, 7, 3}}, 150 | }, 151 | MinChr: 32, 152 | MaxSuccessorN: 7, 153 | } 154 | --------------------------------------------------------------------------------