├── GOST.pdf ├── NickBuhro.Translit.Tests ├── Assets │ ├── Exact │ │ ├── Ru_E01.txt │ │ ├── Be_E01.txt │ │ └── Ru_E02.txt │ ├── Round │ │ ├── Bu_R01.txt │ │ ├── Be_R01.txt │ │ ├── Ma_R01.txt │ │ ├── Ru_R01.txt │ │ └── Uk_R01.txt │ ├── AssetCollectionRound.cs │ ├── README.md │ ├── AssetCollection.cs │ ├── Alphabet.txt │ ├── AssetCollectionExact.cs │ └── AssetCollectionAlphabet.cs ├── Issue005.cs ├── Issue014.cs ├── AssetTests.cs ├── NickBuhro.Translit.Tests.csproj └── GeneralTests.cs ├── NickBuhro.Translit.Benchmark ├── Program.cs ├── v12 │ ├── Rules.txt │ ├── LatinToCyrillicConverter.cs │ ├── CyrillycToLatinConverter.tt │ ├── LatinToCyrillicConverter.tt │ ├── CyrillicToLatinConverter.cs │ ├── CyrillycToLatinConverter.generated.cs │ └── LatinToCyrillicConverter.generated.cs ├── NickBuhro.Translit.Benchmark.csproj ├── Benchmark.cs ├── README.md └── v13 │ ├── Rules.cs │ └── FSMTranslit.cs ├── coverage.bat ├── NickBuhro.Translit ├── Language.cs ├── CustomStringBuilder.cs ├── NickBuhro.Translit.csproj ├── Transliteration.cs └── TransliterationT4.tt ├── LICENSE ├── appveyor.yml ├── README.md ├── NickBuhro.Translit.sln └── .gitignore /GOST.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nick-buhro/Translit/HEAD/GOST.pdf -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/Exact/Ru_E01.txt: -------------------------------------------------------------------------------- 1 | один 2 | odin 3 | 4 | два 5 | dva 6 | 7 | це 8 | ce -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/Exact/Be_E01.txt: -------------------------------------------------------------------------------- 1 | Жыць без сяброў вельмі складана. 2 | Zhy`cz` bez syabrou` vel`mі skladana. 3 | 4 | З сябрамі прыемна падзяліцца радасцю. 5 | Z syabramі pry`emna padzyalіczcza radascyu. -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/Round/Bu_R01.txt: -------------------------------------------------------------------------------- 1 | Горда Стара планина, 2 | до ней Дунава синей, 3 | слънце Тракия огрява, 4 | над Пирина пламеней. 5 | 6 | Мила Родино, 7 | ти си земен рай, 8 | твойта хубост, твойта прелест, 9 | ах, те нямат край. -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/Round/Be_R01.txt: -------------------------------------------------------------------------------- 1 | Будзь смелым!.. Не вер у людскую брахню, 2 | Вер толькі ў адвагу і сілу сваю! 3 | 4 | Будзь смелым, хоць путалі б злыдні ўвакруг, — 5 | Адважным ваякам не страшны ланцуг! 6 | 7 | Будзь смелым, як вецер, як воля сама! 8 | Знай, смелых не чэпе ні крыўда, ні цьма! 9 | 10 | Будзь смелым, як бура, што крышыць дубы, 11 | А будзе твой верх, тваё права ўсягды! -------------------------------------------------------------------------------- /NickBuhro.Translit.Benchmark/Program.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Running; 2 | using System; 3 | 4 | namespace NickBuhro.Translit.Benchmark 5 | { 6 | public static class Program 7 | { 8 | public static void Main() 9 | { 10 | var summary = BenchmarkRunner.Run(); 11 | Console.WriteLine(summary); 12 | Console.ReadKey(); 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/Round/Ma_R01.txt: -------------------------------------------------------------------------------- 1 | Денес над Македонија се раѓа 2 | ново сонце на слободата! 3 | Македонците се борат 4 | за своите правдини! 5 | Македонците се борат 6 | за своите правдини! 7 | Од сега веќе знамето се вее 8 | на Крушевската република! 9 | Гоце Делчев, Питу Гули, 10 | Даме Груев, Сандански! 11 | Гоце Делчев, Питу Гули, 12 | Даме Груев, Сандански! 13 | Горите шумно пеат 14 | нови песни, нови весници! 15 | Македонија слободна, 16 | слободна живее! 17 | Македонија слободна, 18 | слободна живее! -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/Round/Ru_R01.txt: -------------------------------------------------------------------------------- 1 | Не мысля гордый свет забавить, 2 | Вниманье дружбы возлюбя, 3 | Хотел бы я тебе представить 4 | Залог достойнее тебя, 5 | Достойнее души прекрасной, 6 | Святой исполненной мечты, 7 | Поэзии живой и ясной, 8 | Высоких дум и простоты; 9 | Но так и быть – рукой пристрастной 10 | Прими собранье пестрых глав, 11 | Полусмешных, полупечальных, 12 | Простонародных, идеальных, 13 | Небрежный плод моих забав, 14 | Бессонниц, легких вдохновений, 15 | Незрелых и увядших лет, 16 | Ума холодных наблюдений 17 | И сердца горестных замет. 18 | -------------------------------------------------------------------------------- /coverage.bat: -------------------------------------------------------------------------------- 1 | ECHO OFF 2 | %USERPROFILE%\.nuget\packages\opencover\4.6.519\tools\OpenCover.Console.exe -register:administrator "-filter:+[NickBuhro.Translit]* -[*Tests]*" "-target:%USERPROFILE%\.nuget\packages\xunit.runner.console\2.4.0\tools\net461\xunit.console.exe" "-targetargs:.\NickBuhro.Translit.Tests\bin\Debug\net461\NickBuhro.Translit.Tests.dll -noshadow" 3 | %USERPROFILE%\.nuget\packages\ReportGenerator\3.1.2\tools\ReportGenerator.exe "-reports:results.xml" "-reporttypes:MHtml" "-targetdir:.\" 4 | IF DEFINED COVERALLS_REPO_TOKEN (%USERPROFILE%\.nuget\packages\coveralls.io\1.4.2\tools\coveralls.net.exe --opencover results.xml --full-sources) 5 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/Exact/Ru_E02.txt: -------------------------------------------------------------------------------- 1 | Все люди рождаются свободными и равными в своём достоинстве и правах. 2 | Vse lyudi rozhdayutsya svobodny`mi i ravny`mi v svoyom dostoinstve i pravax. 3 | 4 | Они наделены разумом и совестью и должны поступать в отношении друг друга в духе братства. 5 | Oni nadeleny` razumom i sovest`yu i dolzhny` postupat` v otnoshenii drug druga v duxe bratstva. 6 | 7 | Славься, Отечество наше свободное, 8 | Slav`sya, Otechestvo nashe svobodnoe, 9 | 10 | Братских народов союз вековой, 11 | Bratskix narodov soyuz vekovoj, 12 | 13 | Предками данная мудрость народная! 14 | Predkami dannaya mudrost` narodnaya! 15 | 16 | Славься, страна! Мы гордимся тобой! 17 | Slav`sya, strana! My` gordimsya toboj! -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Issue005.cs: -------------------------------------------------------------------------------- 1 | using Xunit; 2 | 3 | namespace NickBuhro.Translit.Tests 4 | { 5 | /// 6 | /// Tests for GitHub Issue #5. 7 | /// 8 | public sealed class Issue005 9 | { 10 | [Fact] 11 | public void TestRu() 12 | { 13 | var actual = Transliteration.CyrillicToLatin("Obstacles", Language.Russian); 14 | Assert.Equal("Obstacles", actual); 15 | } 16 | 17 | [Fact] 18 | public void TestMk() 19 | { 20 | var actual = Transliteration.CyrillicToLatin("Obstacles", Language.Macedonian); 21 | Assert.Equal("Obz`taclez`", actual); 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Issue014.cs: -------------------------------------------------------------------------------- 1 | using Xunit; 2 | 3 | namespace NickBuhro.Translit.Tests 4 | { 5 | /// 6 | /// Tests for GitHub Issue #5. 7 | /// 8 | public sealed class Issue014 9 | { 10 | const string latIi = "Ii"; // 0x49 , 0x69 11 | const string ukrIi = "Іі"; // 0x406, 0x456 12 | 13 | [Fact] 14 | public void TestIiL2C() 15 | { 16 | var actual = Transliteration.LatinToCyrillic(latIi, Language.Ukrainian); 17 | Assert.Equal(ukrIi, actual); 18 | } 19 | 20 | [Fact] 21 | public void TestIiC2L() 22 | { 23 | var actual = Transliteration.CyrillicToLatin(ukrIi, Language.Ukrainian); 24 | Assert.Equal(latIi, actual); 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/Round/Uk_R01.txt: -------------------------------------------------------------------------------- 1 | Гімн України 2 | "Ще не вмерла Україна" 3 | 4 | Ще не вмерла України, ні слава, ні воля, 5 | Ще нам, браття українці, усміхнеться доля. 6 | 7 | ... 8 | 9 | Душу й тіло ми положим за нашу свободу, 10 | І покажем, що ми, браття, козацького роду. 11 | 12 | Станем, браття, в бій кривавий від Сяну до Дону, 13 | В ріднім краю панувати не дамо нікому; 14 | Чорне море ще всміхнеться, дід Дніпро зрадіє, 15 | Ще у нашій Україні доленька наспіє. 16 | 17 | Душу й тіло ми положим за нашу свободу, 18 | І покажем, що ми, браття, козацького роду. 19 | 20 | А завзяття, праця щира свого ще докаже, 21 | Ще ся волі в Україні піснь гучна розляже, 22 | ... 23 | України слава стане поміж ворогами. 24 | 25 | Душу й тіло ми положим за нашу свободу, 26 | І покажем, що ми, браття, козацького роду. 27 | 28 | 29 | Слова: Павла Чубинського 30 | Музика: Михайла Вербицького -------------------------------------------------------------------------------- /NickBuhro.Translit/Language.cs: -------------------------------------------------------------------------------- 1 | namespace NickBuhro.Translit 2 | { 3 | /// 4 | /// Slavic language with cyrillic alphabet. 5 | /// 6 | public enum Language 7 | { 8 | /// 9 | /// Unknown language. Most common rules will be used for transliteration. 10 | /// 11 | Unknown, 12 | 13 | /// 14 | /// Russian language. 15 | /// 16 | Russian, 17 | 18 | /// 19 | /// Belorussian language. 20 | /// 21 | Belorussian, 22 | 23 | /// 24 | /// Ukrainian language. 25 | /// 26 | Ukrainian, 27 | 28 | /// 29 | /// Bulgarian language. 30 | /// 31 | Bulgarian, 32 | 33 | /// 34 | /// Macedonian language. 35 | /// 36 | Macedonian 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /NickBuhro.Translit/CustomStringBuilder.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | #if NET45 || NETSTANDARD1_3 3 | using System.Buffers; 4 | #endif 5 | 6 | namespace NickBuhro.Translit 7 | { 8 | internal struct CustomStringBuilder : IDisposable 9 | { 10 | private readonly char[] _array; 11 | private int _index; 12 | 13 | public CustomStringBuilder(int capacity) 14 | { 15 | #if NET45 || NETSTANDARD1_3 16 | _array = ArrayPool.Shared.Rent(capacity); 17 | #else 18 | _array = new char[capacity]; 19 | #endif 20 | _index = 0; 21 | } 22 | 23 | public void Append(char c) 24 | { 25 | _array[_index++] = c; 26 | } 27 | 28 | public override string ToString() 29 | { 30 | return new string(_array, 0, _index); 31 | } 32 | 33 | public void Dispose() 34 | { 35 | #if NET45 || NETSTANDARD1_3 36 | ArrayPool.Shared.Return(_array); 37 | #endif 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Benchmark/v12/Rules.txt: -------------------------------------------------------------------------------- 1 | ru by ua bg mk comment 2 | А a a a a a 3 | Б b b b b b 4 | В v v v v v 5 | Г g h h g g 6 | Ѓ g` 7 | Ґ g` 8 | Д d d d d d 9 | Е e e e e e 10 | Ё yo yo 11 | Є ye 12 | Ж zh zh zh zh zh 13 | З z z z z z 14 | S z` 15 | И i y` i i 16 | Й j j j j 17 | J j 18 | I i i i i 19 | Ї yi 20 | К k k k k k 21 | Ќ k` 22 | Л l l l l l 23 | Љ l` 24 | М m m m m m 25 | Н n n n n п 26 | Њ n` 27 | О o o o o o 28 | П p p p p p 29 | Р r r r r r 30 | С s s s s s 31 | Т t t t t t 32 | У u u u u u 33 | Ў u` 34 | Ф f f f f f 35 | Х x x x x x 36 | Ц cz, c cz, c cz, c cz, c cz, c рекомендуется использовать С перед буквами I, Е, Y, J; в остальных случаях CZ 37 | Ч ch ch ch ch ch 38 | Џ dh 39 | Ш sh sh sh sh sh 40 | Щ shh shh sht 41 | Ъ `` a` для русского языка — 2 грависа 42 | Ы y` y` 43 | Ь ` ` ` ` гравис 44 | Э e` e` 45 | Ю yu yu yu yu 46 | Я ya ya ya ya 47 | ’ ' ' ' ' ' апостроф 48 | Ѣ ye ye ять 49 | Ѳ fh fh фита 50 | Ѵ yh yh ижица 51 | Ѫ о` юс 52 | № # # # # # -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/AssetCollectionRound.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Text.RegularExpressions; 3 | 4 | namespace NickBuhro.Translit.Tests.Assets 5 | { 6 | public sealed class AssetCollectionRound : AssetCollection 7 | { 8 | public AssetCollectionRound() 9 | { 10 | var langPattern = string.Join("|", LanguageMonikers.Keys); 11 | var regex = new Regex($"^Round\\.({langPattern})_.*\\.txt$", RegexOptions.CultureInvariant); 12 | foreach (var file in LoadResources(regex)) 13 | { 14 | var lang = LanguageMonikers[file.Item1.Groups[1].Value]; 15 | AddRange(ParseFile(lang, file.Item1.Value, file.Item2)); 16 | } 17 | } 18 | 19 | private static IEnumerable ParseFile(Language lang, string filename, string content) 20 | { 21 | yield return new object[] 22 | { 23 | lang, 24 | filename, 25 | content 26 | }; 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Nicholas Buhro 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/README.md: -------------------------------------------------------------------------------- 1 | # NickBuhro.Translit.Tests.Assets 2 | 3 | This folder contains test cases which should be handled by different ways. 4 | 5 | ## Alphabet test cases 6 | 7 | It is based on transliteration table from original standard. 8 | It covers all letters for all languages. 9 | 10 | Table is defined in tab-delimited file [Alphabet.txt](./Alphabet.txt). 11 | 12 | ## Exact test cases 13 | 14 | File with accurate test cases. 15 | 16 | One file can contain many test cases. 17 | Test cases should be separated by one or more empty lines. 18 | 19 | One test case should be represented by 2 lines: cyrillic and latin values. 20 | Values should be separated by new line (one value = one line). 21 | 22 | File format: 23 | 24 | [Cyrillic value] 25 | [Latin value] 26 | 27 | [Cyrillic value] 28 | [Latin value] 29 | ... 30 | 31 | Example: 32 | 33 | Славься, Отечество наше свободное, 34 | Slav`sya, Otechestvo nashe svobodnoe, 35 | 36 | Братских народов союз вековой, 37 | Bratskix narodov soyuz vekovoj, 38 | 39 | Предками данная мудрость народная! 40 | Predkami dannaya mudrost` narodnaya! 41 | 42 | 43 | ## Round test cases 44 | 45 | It should be file with cyrillic text. 46 | 47 | Test strategy: 48 | 49 | var latin = Transliteration.CyrillicToLatin(originalCyrillic, ...); 50 | var cyrillic = Transliteration.LatinToCyrillic(latin, ...); 51 | 52 | Assert.Eqauls(originalCyrillic, cyrillic); -------------------------------------------------------------------------------- /NickBuhro.Translit.Benchmark/v12/LatinToCyrillicConverter.cs: -------------------------------------------------------------------------------- 1 | using System.Diagnostics; 2 | 3 | namespace NickBuhro.Translit.Benchmark.v12 4 | { 5 | public partial struct LatinToCyrillicConverter 6 | { 7 | private readonly string _src; 8 | private readonly ConvertRule[] _ruleSet; 9 | 10 | /// 11 | /// Create an instance of algorithm. 12 | /// 13 | public LatinToCyrillicConverter(string source, Language lang) 14 | { 15 | Debug.Assert(Language.Unknown == 0); 16 | Debug.Assert((int)Language.Russian == 1); 17 | Debug.Assert((int)Language.Belorussian == 2); 18 | Debug.Assert((int)Language.Ukrainian == 3); 19 | Debug.Assert((int)Language.Bulgarian == 4); 20 | Debug.Assert((int)Language.Macedonian == 5); 21 | 22 | _ruleSet = Rules[(int)lang]; 23 | _src = source; 24 | } 25 | 26 | /// 27 | /// Detransliterate source. Should be invoked only once. 28 | /// 29 | /// Detransliterated cyrillic string. 30 | public string Convert() 31 | { 32 | Debug.Assert(_ruleSet != null); 33 | 34 | if (string.IsNullOrEmpty(_src)) 35 | return _src; 36 | 37 | var result = _src; 38 | for (var i = 0; i < _ruleSet.Length; i++) 39 | { 40 | result = result.Replace(_ruleSet[i].Latin, _ruleSet[i].Cyrillic); 41 | } 42 | return result; 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Benchmark/NickBuhro.Translit.Benchmark.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net472 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | CyrillycToLatinConverter.tt 23 | True 24 | True 25 | 26 | 27 | LatinToCyrillicConverter.tt 28 | True 29 | True 30 | 31 | 32 | 33 | 34 | 35 | CyrillycToLatinConverter.generated.cs 36 | TextTemplatingFileGenerator 37 | 38 | 39 | LatinToCyrillicConverter.generated.cs 40 | TextTemplatingFileGenerator 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/AssetCollection.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Text.RegularExpressions; 5 | 6 | namespace NickBuhro.Translit.Tests.Assets 7 | { 8 | public abstract class AssetCollection : List 9 | { 10 | protected static readonly Dictionary LanguageMonikers = new Dictionary() 11 | { 12 | { "Be", Language.Belorussian }, 13 | { "Bu", Language.Bulgarian }, 14 | { "Ma", Language.Macedonian }, 15 | { "Ru", Language.Russian }, 16 | { "Uk", Language.Ukrainian }, 17 | }; 18 | 19 | protected static IEnumerable> LoadResources(Regex namePattern) 20 | { 21 | var assembly = typeof(AssetCollection).Assembly; 22 | var prefix = typeof(AssetCollection).Namespace + "."; 23 | 24 | foreach (var fullName in assembly.GetManifestResourceNames()) 25 | { 26 | if (!fullName.StartsWith(prefix)) 27 | continue; 28 | 29 | var name = fullName.Substring(prefix.Length); 30 | var match = namePattern.Match(name); 31 | if (!match.Success) 32 | continue; 33 | 34 | string content; 35 | using (var stream = assembly.GetManifestResourceStream(fullName)) 36 | using (var sr = new StreamReader(stream)) 37 | { 38 | content = sr.ReadToEnd(); 39 | } 40 | 41 | yield return new Tuple(match, content); 42 | } 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/Alphabet.txt: -------------------------------------------------------------------------------- 1 | Ru Be Uk Bu Ma 2 | а a a a a a 3 | б b b b b b 4 | в v v v v v 5 | г g h h g g 6 | ѓ g` 7 | ґ g` 8 | д d d d d d 9 | е e e e e e 10 | ё yo yo 11 | є ye 12 | ж zh zh zh zh zh 13 | з z z z z z 14 | s z` 15 | и i y` i i 16 | й j j j j 17 | j j 18 | і i 19 | i i i i 20 | ї yi 21 | к k k k k k 22 | ќ k` 23 | л l l l l l 24 | љ l` 25 | м m m m m m 26 | н n n n n п 27 | њ n` 28 | о o o o o o 29 | п p p p p p 30 | р r r r r r 31 | с s s s s s 32 | т t t t t t 33 | у u u u u u 34 | ў u` 35 | ф f f f f f 36 | х x x x x x 37 | ц cz cz cz cz cz 38 | ч ch ch ch ch ch 39 | џ dh 40 | ш sh sh sh sh sh 41 | щ shh shh sht 42 | ъ `` a` 43 | ы y` y` 44 | ь ` ` ` ` 45 | э e` e` 46 | ю yu yu yu yu 47 | я ya ya ya ya 48 | ’ ' ' ' ' ' 49 | ѣ ye ye 50 | ѳ fh fh 51 | ѵ yh yh 52 | ѫ о` 53 | № # # # # # 54 | 55 | А A A A A A 56 | Б B B B B B 57 | В V V V V V 58 | Г G H H G G 59 | Ѓ G` 60 | Ґ G` 61 | Д D D D D D 62 | Е E E E E E 63 | Ё Yo Yo 64 | Є Ye 65 | Ж Zh Zh Zh Zh Zh 66 | З Z Z Z Z Z 67 | S Z` 68 | И I Y` I I 69 | Й J J J J 70 | J J 71 | І I 72 | I I I I 73 | Ї Yi 74 | К K K K K K 75 | Ќ K` 76 | Л L L L L L 77 | Љ L` 78 | М M M M M M 79 | Н N N N N П 80 | Њ N` 81 | О O O O O O 82 | П P P P P P 83 | Р R R R R R 84 | С S S S S S 85 | Т T T T T T 86 | У U U U U U 87 | Ў U` 88 | Ф F F F F F 89 | Х X X X X X 90 | Ц Cz Cz Cz Cz Cz 91 | Ч Ch Ch Ch Ch Ch 92 | Џ Dh 93 | Ш Sh Sh Sh Sh Sh 94 | Щ Shh Shh Sht 95 | Ъ `` A` 96 | Ы Y` Y` 97 | Ь ` ` ` ` 98 | Э E` E` 99 | Ю Yu Yu Yu Yu 100 | Я Ya Ya Ya Ya 101 | ’ ' ' ' ' ' 102 | Ѣ Ye Ye 103 | Ѳ Fh Fh 104 | Ѵ Yh Yh 105 | Ѫ О` 106 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Benchmark/Benchmark.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using BenchmarkDotNet.Attributes; 3 | using NickBuhro.Translit.Benchmark.v12; 4 | using NickBuhro.Translit.Benchmark.v13; 5 | 6 | namespace NickBuhro.Translit.Benchmark 7 | { 8 | [MemoryDiagnoser] 9 | public class Benchmark 10 | { 11 | private string _cyrillic; 12 | private string _latin; 13 | 14 | [Params("Small", "Big")] 15 | public string Asset; 16 | 17 | [GlobalSetup] 18 | public void Setup() 19 | { 20 | if (Asset == "Big") 21 | { 22 | _cyrillic = Assets.BigCyrillic; 23 | _latin = Assets.BigLatin; 24 | } 25 | else if (Asset == "Small") 26 | { 27 | _cyrillic = Assets.SmallCyrillic; 28 | _latin = Assets.SmallLatin; 29 | } 30 | else 31 | { 32 | throw new NotSupportedException(); 33 | } 34 | } 35 | 36 | 37 | [Benchmark(Baseline = true)] 38 | public string C2Lv12() => new CyrillicToLatinConverter(_cyrillic, Language.Russian).Convert(); 39 | 40 | [Benchmark] 41 | public string L2Cv12() => new LatinToCyrillicConverter(_latin, Language.Russian).Convert(); 42 | 43 | 44 | [Benchmark] 45 | public string C2Lv13() => FSMTranslit.CyrillicToLatin(_cyrillic, Language.Russian); 46 | 47 | [Benchmark] 48 | public string L2Cv13() => FSMTranslit.LatinToCyrillic(_latin, Language.Russian); 49 | 50 | 51 | [Benchmark] 52 | public string C2Lv14() => Transliteration.CyrillicToLatin(_cyrillic, Language.Russian); 53 | 54 | [Benchmark] 55 | public string L2Cv14() => Transliteration.LatinToCyrillic(_latin, Language.Russian); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/AssetTests.cs: -------------------------------------------------------------------------------- 1 | using NickBuhro.Translit.Tests.Assets; 2 | using Xunit; 3 | 4 | namespace NickBuhro.Translit.Tests 5 | { 6 | public sealed class AssetTests 7 | { 8 | [Theory] 9 | [ClassData(typeof(AssetCollectionAlphabetC2L))] 10 | public void AlphabetC2LTest(Language lang, string reference, string cyrillic, string latin) 11 | { 12 | var actual = Transliteration.CyrillicToLatin(cyrillic, lang); 13 | Assert.Equal(latin, actual); 14 | } 15 | 16 | [Theory] 17 | [ClassData(typeof(AssetCollectionAlphabetL2C))] 18 | public void AlphabetL2CTest(Language lang, string reference, string cyrillic, string latin) 19 | { 20 | var actual = Transliteration.LatinToCyrillic(latin, lang); 21 | Assert.Equal(cyrillic, actual); 22 | } 23 | 24 | 25 | [Theory] 26 | [ClassData(typeof(AssetCollectionExact))] 27 | public void ExactC2LTest(Language lang, string reference, string cyrillic, string latin) 28 | { 29 | var actual = Transliteration.CyrillicToLatin(cyrillic, lang); 30 | Assert.Equal(latin, actual); 31 | } 32 | 33 | [Theory] 34 | [ClassData(typeof(AssetCollectionExact))] 35 | public void ExactL2CTest(Language lang, string reference, string cyrillic, string latin) 36 | { 37 | var actual = Transliteration.LatinToCyrillic(latin, lang); 38 | Assert.Equal(cyrillic, actual); 39 | } 40 | 41 | [Theory] 42 | [ClassData(typeof(AssetCollectionRound))] 43 | public void RoundTest(Language lang, string reference, string text) 44 | { 45 | var latin = Transliteration.CyrillicToLatin(text, lang); 46 | var actual = Transliteration.LatinToCyrillic(latin, lang); 47 | Assert.Equal(text, actual); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/AssetCollectionExact.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Globalization; 4 | using System.Text.RegularExpressions; 5 | 6 | namespace NickBuhro.Translit.Tests.Assets 7 | { 8 | public sealed class AssetCollectionExact : AssetCollection 9 | { 10 | public AssetCollectionExact() 11 | { 12 | var langPattern = string.Join("|", LanguageMonikers.Keys); 13 | var regex = new Regex($"^Exact\\.({langPattern})_.*\\.txt$", RegexOptions.CultureInvariant); 14 | foreach (var file in LoadResources(regex)) 15 | { 16 | var lang = LanguageMonikers[file.Item1.Groups[1].Value]; 17 | AddRange(ParseFile(lang, file.Item1.Value, file.Item2)); 18 | } 19 | } 20 | 21 | private static IEnumerable ParseFile(Language lang, string filename, string content) 22 | { 23 | var lines = content.Split(new[] { Environment.NewLine, "\n" }, StringSplitOptions.None); 24 | for (var i = 0; i < (lines.Length - 1);) 25 | { 26 | if (string.IsNullOrEmpty(lines[i])) 27 | { 28 | i++; 29 | continue; 30 | } 31 | 32 | yield return CreateTestCase( 33 | lang, 34 | filename, 35 | i + 1, 36 | lines[i].Replace("@", "").Trim(), 37 | lines[i + 1].Replace("@", "").Trim()); 38 | 39 | i += 2; 40 | } 41 | } 42 | 43 | private static object[] CreateTestCase(Language lang, string fileName, int lineNumber, string cyrillic, string latin) 44 | { 45 | return new object[] 46 | { 47 | lang, 48 | fileName + " Ln:" + lineNumber.ToString("000", CultureInfo.InvariantCulture), 49 | cyrillic, 50 | latin 51 | }; 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | #---------------------------------# 2 | # general configuration # 3 | #---------------------------------# 4 | 5 | version: 1.4.{build} 6 | 7 | branches: 8 | only: 9 | - master 10 | 11 | environment: 12 | COVERALLS_REPO_TOKEN: 13 | secure: GZLA0EL0qjA//e/pc8VZ8cc6zOPMBImlJpXc33Fr4G2LgvtmqTEYn3lKMh5ZKE7N 14 | 15 | #---------------------------------# 16 | # environment configuration # 17 | #---------------------------------# 18 | 19 | image: Visual Studio 2017 20 | 21 | #---------------------------------# 22 | # build configuration # 23 | #---------------------------------# 24 | 25 | platform: Any CPU 26 | configuration: 27 | - Debug 28 | - Release 29 | 30 | build: 31 | project: NickBuhro.Translit.sln 32 | verbosity: minimal 33 | 34 | before_build: 35 | - nuget restore 36 | - ps: | 37 | $xmlPath = "$env:appveyor_build_folder\NickBuhro.Translit\NickBuhro.Translit.csproj" 38 | $xml = [xml](get-content $xmlPath) 39 | $propertyGroup = $xml.Project.PropertyGroup[0] 40 | $propertyGroup.Version = $env:appveyor_build_version 41 | $xml.Save($xmlPath) 42 | 43 | #---------------------------------# 44 | # tests configuration # 45 | #---------------------------------# 46 | 47 | after_test: 48 | - if "%CONFIGURATION%"=="Debug" (coverage.bat) 49 | - if "%CONFIGURATION%"=="Debug" (appveyor PushArtifact Summary.mht) 50 | 51 | #---------------------------------# 52 | # artifacts configuration # 53 | #---------------------------------# 54 | 55 | artifacts: 56 | 57 | - path: NickBuhro.Translit\bin\$(configuration) 58 | type: zip 59 | 60 | - path: NickBuhro.Translit\bin\$(configuration)\*.nupkg 61 | 62 | #---------------------------------# 63 | # deployment configuration # 64 | #---------------------------------# 65 | 66 | deploy: 67 | provider: NuGet 68 | api_key: 69 | secure: vDxpqHHdBtz+P6Nau7V8tLYhtc+5tE6qfEoQI8Vrse11k0vWCrAKY1vUvMoLFURB 70 | skip_symbols: false 71 | artifact: /.*\.nupkg/ 72 | on: 73 | branch: master 74 | configuration: Release 75 | appveyor_repo_tag: true 76 | appveyor_repo_tag_name: publish 77 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/NickBuhro.Translit.Tests.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net461 5 | 6 | 7 | 8 | full 9 | True 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | all 26 | runtime; build; native; contentfiles; analyzers 27 | 28 | 29 | all 30 | runtime; build; native; contentfiles; analyzers 31 | 32 | 33 | 34 | 35 | 36 | 37 | TextTemplatingFileGenerator 38 | TransliterationTests.generated.cs 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | True 50 | True 51 | TransliterationTests.tt 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/GeneralTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Xunit; 3 | 4 | namespace NickBuhro.Translit.Tests 5 | { 6 | public sealed class GeneralTests 7 | { 8 | [Fact] 9 | public void C2LNullTest() 10 | { 11 | var result = Transliteration.CyrillicToLatin(null); 12 | Assert.Null(result); 13 | } 14 | 15 | [Fact] 16 | public void L2CNullTest() 17 | { 18 | var result = Transliteration.LatinToCyrillic(null); 19 | Assert.Null(result); 20 | } 21 | 22 | [Fact] 23 | public void C2LEmptyTest() 24 | { 25 | var result = Transliteration.CyrillicToLatin(""); 26 | Assert.Equal("", result); 27 | } 28 | 29 | [Fact] 30 | public void L2CEmptyTest() 31 | { 32 | var result = Transliteration.LatinToCyrillic(""); 33 | Assert.Equal("", result); 34 | } 35 | 36 | [Fact] 37 | public void C2LSimpleTest() 38 | { 39 | var result = Transliteration.CyrillicToLatin("Абв"); 40 | Assert.Equal("Abv", result); 41 | } 42 | 43 | [Fact] 44 | public void L2CSimpleTest() 45 | { 46 | var result = Transliteration.LatinToCyrillic("Abv"); 47 | Assert.Equal("Абв", result); 48 | } 49 | 50 | [Fact] 51 | public void C2LNumTest() 52 | { 53 | var result = Transliteration.CyrillicToLatin("123"); 54 | Assert.Equal("123", result); 55 | } 56 | 57 | [Fact] 58 | public void L2CNumTest() 59 | { 60 | var result = Transliteration.LatinToCyrillic("123"); 61 | Assert.Equal("123", result); 62 | } 63 | 64 | [Fact] 65 | public void C2LInvalidLanguageTest() 66 | { 67 | var lang = default(Language) - 1; 68 | Assert.Throws(() => Transliteration.CyrillicToLatin("123", lang)); 69 | } 70 | 71 | [Fact] 72 | public void L2CInvalidLanguageTest() 73 | { 74 | var lang = default(Language) - 1; 75 | Assert.Throws(() => Transliteration.LatinToCyrillic("123", lang)); 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Benchmark/README.md: -------------------------------------------------------------------------------- 1 | # NickBuhro.Translit.Benchmark 2 | 3 | Tool for testing performance of NickBuhro.Translit library. 4 | 5 | ## Test results 6 | 7 | ``` ini 8 | 9 | BenchmarkDotNet=v0.11.1, OS=Windows 10.0.17134.286 (1803/April2018Update/Redstone4) 10 | Intel Core i5-8250U CPU 1.60GHz (Kaby Lake R), 1 CPU, 8 logical and 4 physical cores 11 | Frequency=1757813 Hz, Resolution=568.8887 ns, Timer=TSC 12 | [Host] : .NET Framework 4.7.2 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.3163.0 13 | DefaultJob : .NET Framework 4.7.2 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.3163.0 14 | 15 | 16 | ``` 17 | | Method | Asset | Mean | Scaled | Gen 0 | Gen 1 | Gen 2 | Allocated | 18 | |------- |------ |----------------:|-------:|----------:|----------:|----------:|-----------:| 19 | | **C2Lv12** | **Big** | **4,380,925 ns** | **1.00** | **179** | **156** | **93** | **798475 B** | 20 | | L2Cv12 | Big | 34,817,752 ns | 7.95 | 5600 | 5600 | 5600 | 22718376 B | 21 | | C2Lv13 | Big | 4,899,186 ns | 1.12 | 203 | 195 | 195 | 785008 B | 22 | | L2Cv13 | Big | 5,428,736 ns | 1.24 | 187 | 187 | 187 | 751860 B | 23 | | C2Lv14 | Big | 1,835,048 ns | 0.42 | 318 | 318 | 318 | 1476064 B | 24 | | L2Cv14 | Big | 1,991,838 ns | 0.45 | 195 | 195 | 195 | 750480 B | 25 | | | | | | | | | | 26 | | **C2Lv12** | **Small** | **948 ns** | **1.00** | **0.1421** | **-** | **-** | **448 B** | 27 | | L2Cv12 | Small | 5,717 ns | 6.03 | 0.4501 | - | - | 1440 B | 28 | | C2Lv13 | Small | 894 ns | 0.94 | 0.1240 | - | - | 392 B | 29 | | L2Cv13 | Small | 899 ns | 0.95 | 0.0753 | - | - | 240 B | 30 | | C2Lv14 | Small | 127 ns | 0.13 | 0.1066 | - | - | 336 B | 31 | | L2Cv14 | Small | 142 ns | 0.15 | 0.0608 | - | - | 192 B | 32 | 33 | - `C2LvXX` - cyrillic to latin transliteration; 34 | - `L2CvXX` - latin to cyrillic transliteration; 35 | - `XXXv12` - library version 1.2 (first implementation with replacement dictionaries); 36 | - `XXXv13` - library version 1.3 (unpublished impementation based on FSM with preconfigured state transitions); 37 | - `XXXv14` - library version 1.4 (code generated FSM on switches). 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NickBuhro.Translit 2 | 3 | [![License](https://img.shields.io/badge/license-MIT-red.svg)](https://raw.githubusercontent.com/nick-buhro/Translit/master/LICENSE) 4 | [![NuGet Badge](https://buildstats.info/nuget/NickBuhro.Translit)](https://www.nuget.org/packages/NickBuhro.Translit/) 5 | [![Build status](https://ci.appveyor.com/api/projects/status/5xxmbn82hu9762n7?svg=true)](https://ci.appveyor.com/project/nick-buhro/translit) 6 | [![Coverage Status](https://coveralls.io/repos/github/nick-buhro/Translit/badge.svg?branch=master)](https://coveralls.io/github/nick-buhro/Translit?branch=master) 7 | 8 | C# library for cyrillic-latin transliteration by 9 | [ISO 9](https://en.wikipedia.org/wiki/ISO_9) 10 | ([ГОСТ 7.79—2000](https://ru.wikipedia.org/wiki/ISO_9#.D0.93.D0.9E.D0.A1.D0.A2_7.79.E2.80.942000)) 11 | on System B (only for slavic languages). 12 | 13 | Both direction transliteration is supported: 14 | * cyrillic to latin 15 | * latin to cyrillic 16 | 17 | It could be specified concrete language from list: 18 | * Russian 19 | * Belorussian 20 | * Ukrainian 21 | * Bulgarian 22 | * Makedonian 23 | 24 | ## Installation 25 | 26 | It's available over [NuGet](https://www.nuget.org/packages/NickBuhro.Translit/): 27 | 28 | ``` PowerShell 29 | 30 | Install-Package NickBuhro.Translit 31 | 32 | ``` 33 | 34 | ## Usage 35 | 36 | ``` C# 37 | 38 | // Cyrillic to latin example 39 | 40 | var latin = Transliteration.CyrillicToLatin("Предками данная мудрость народная!", Language.Russian); 41 | Console.WriteLine(latin); // Output: Predkami dannaya mudrost` narodnaya! 42 | 43 | // Latin to cyrillic example 44 | 45 | var cyrillic = Transliteration.LatinToCyrillic("Predkami dannaya mudrost` narodnaya!", Language.Russian); 46 | Console.WriteLine(cyrillic); // Output: Предками данная мудрость народная! 47 | 48 | ``` 49 | 50 | ## Running the tests 51 | 52 | Solution includes: 53 | - [NickBuhro.Translit.Tests](./NickBuhro.Translit.Tests) project defines 1000+ xUnit tests; 54 | - [NickBuhro.Translit.Benchmark](./NickBuhro.Translit.Benchmark) allows to analyze library performans and compare different implementations; 55 | - [coverage.bat](./coverage.bat) script allows to analyze code coverage using OpenCover, generate reports and publish results to [coveralls.io](https://coveralls.io/github/nick-buhro/Translit). 56 | 57 | ## Compatibility 58 | 59 | The library uses no references except for `System` - it has no external dependencies. 60 | It is cross compiled to: 61 | 62 | * .NET Framework 2.0 and above 63 | * .NET Standard 1.3 64 | -------------------------------------------------------------------------------- /NickBuhro.Translit/NickBuhro.Translit.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net20;net35;net40;net45;netstandard1.3 5 | True 6 | 7 | Transliteration 8 | 1.0.0 9 | Nicholas Buhro 10 | translit transliteration cyrillic slavic iso-9 11 | https://raw.githubusercontent.com/nick-buhro/Translit/master/LICENSE 12 | $(NoWarn);NU5125 13 | https://github.com/nick-buhro/Translit 14 | Copyright (c) 2016 Nicholas Buhro 15 | 16 | C# library for cyrillic-latin transliteration by GOST 7.79-2000 (ISO 9) System B (only for slavik languages). 17 | Both direction transliteration is supported: cyrillic to latin and latin to cyrillic. 18 | Supported languages: Russian, Belorussian, Ukrainian, Bulgarian and Makedonian. 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | True 27 | 28 | bin\Release\$(TargetFramework)\NickBuhro.Translit.xml 29 | 30 | 31 | 32 | full 33 | True 34 | 35 | 36 | 37 | 38 | TransliterationT4.generated.cs 39 | TextTemplatingFileGenerator 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | TransliterationT4.tt 50 | True 51 | True 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /NickBuhro.Translit.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.26430.6 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NickBuhro.Translit", "NickBuhro.Translit\NickBuhro.Translit.csproj", "{726F86C8-96FB-4152-AC91-8059B88FFE46}" 7 | EndProject 8 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{1DAE9828-E276-4854-AFE9-46B40D2F6E7A}" 9 | ProjectSection(SolutionItems) = preProject 10 | .gitignore = .gitignore 11 | appveyor.yml = appveyor.yml 12 | coverage.bat = coverage.bat 13 | GOST.pdf = GOST.pdf 14 | LICENSE = LICENSE 15 | README.md = README.md 16 | EndProjectSection 17 | EndProject 18 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NickBuhro.Translit.Tests", "NickBuhro.Translit.Tests\NickBuhro.Translit.Tests.csproj", "{2277F7FC-79CF-44E1-BD06-09F2ADCDB1C9}" 19 | EndProject 20 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NickBuhro.Translit.Benchmark", "NickBuhro.Translit.Benchmark\NickBuhro.Translit.Benchmark.csproj", "{1CDBA3DF-03C2-47EA-BEA7-7907D27B6538}" 21 | EndProject 22 | Global 23 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 24 | Debug|Any CPU = Debug|Any CPU 25 | Release|Any CPU = Release|Any CPU 26 | EndGlobalSection 27 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 28 | {726F86C8-96FB-4152-AC91-8059B88FFE46}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 29 | {726F86C8-96FB-4152-AC91-8059B88FFE46}.Debug|Any CPU.Build.0 = Debug|Any CPU 30 | {726F86C8-96FB-4152-AC91-8059B88FFE46}.Release|Any CPU.ActiveCfg = Release|Any CPU 31 | {726F86C8-96FB-4152-AC91-8059B88FFE46}.Release|Any CPU.Build.0 = Release|Any CPU 32 | {2277F7FC-79CF-44E1-BD06-09F2ADCDB1C9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 33 | {2277F7FC-79CF-44E1-BD06-09F2ADCDB1C9}.Debug|Any CPU.Build.0 = Debug|Any CPU 34 | {2277F7FC-79CF-44E1-BD06-09F2ADCDB1C9}.Release|Any CPU.ActiveCfg = Release|Any CPU 35 | {2277F7FC-79CF-44E1-BD06-09F2ADCDB1C9}.Release|Any CPU.Build.0 = Release|Any CPU 36 | {1CDBA3DF-03C2-47EA-BEA7-7907D27B6538}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 37 | {1CDBA3DF-03C2-47EA-BEA7-7907D27B6538}.Debug|Any CPU.Build.0 = Debug|Any CPU 38 | {1CDBA3DF-03C2-47EA-BEA7-7907D27B6538}.Release|Any CPU.ActiveCfg = Release|Any CPU 39 | {1CDBA3DF-03C2-47EA-BEA7-7907D27B6538}.Release|Any CPU.Build.0 = Release|Any CPU 40 | EndGlobalSection 41 | GlobalSection(SolutionProperties) = preSolution 42 | HideSolutionNode = FALSE 43 | EndGlobalSection 44 | GlobalSection(ExtensibilityGlobals) = postSolution 45 | SolutionGuid = {A95EF12C-4113-4342-A37C-03F0AB26F193} 46 | EndGlobalSection 47 | EndGlobal 48 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Tests/Assets/AssetCollectionAlphabet.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Globalization; 4 | using System.Linq; 5 | using System.Text.RegularExpressions; 6 | 7 | namespace NickBuhro.Translit.Tests.Assets 8 | { 9 | public abstract class AssetCollectionAlphabet : AssetCollection 10 | { 11 | public AssetCollectionAlphabet(bool c2l) 12 | { 13 | var content = LoadResources(new Regex("Alphabet.txt", RegexOptions.CultureInvariant)) 14 | .First() 15 | .Item2; 16 | 17 | var strs = content.Split(new string[] { "\r\n", "\n" }, StringSplitOptions.None); 18 | 19 | var header = ParseHeader(strs[0].Split('\t')); 20 | 21 | var emptyRules = new HashSet>(); 22 | var index = new HashSet>(); 23 | for (var i = 1; i < strs.Length; i++) 24 | { 25 | if (string.IsNullOrWhiteSpace(strs[i])) 26 | continue; 27 | 28 | var values = strs[i].Split('\t'); 29 | var cyrillic = values[0]; 30 | for (var j = 1; j < header.Length; j++) 31 | { 32 | var lang = header[j]; 33 | var latin = values[j]; 34 | if (string.IsNullOrEmpty(latin)) 35 | { 36 | emptyRules.Add(new Tuple(lang, cyrillic, i)); 37 | continue; 38 | } 39 | 40 | if (!index.Add(new Tuple(lang, c2l ? cyrillic : latin))) 41 | continue; 42 | 43 | var reference = " Ln:" + (i + 1).ToString("000", CultureInfo.InvariantCulture); 44 | Add(new object[] {lang, reference, cyrillic, latin }); 45 | } 46 | } 47 | 48 | foreach (var p in emptyRules) 49 | { 50 | if (index.Add(new Tuple(p.Item1, p.Item2))) 51 | { 52 | var reference = " Ln:" + (p.Item3 + 1).ToString("000", CultureInfo.InvariantCulture); 53 | Add(new object[] { p.Item1, reference, p.Item2, p.Item2 }); 54 | } 55 | } 56 | } 57 | 58 | private Language[] ParseHeader(string[] header) 59 | { 60 | var result = new Language[header.Length]; 61 | for (var i = 1; i < result.Length; i++) 62 | { 63 | result[i] = LanguageMonikers[header[i]]; 64 | } 65 | return result; 66 | } 67 | } 68 | 69 | 70 | public sealed class AssetCollectionAlphabetC2L : AssetCollectionAlphabet 71 | { 72 | public AssetCollectionAlphabetC2L() 73 | : base(true) { } 74 | } 75 | 76 | 77 | public sealed class AssetCollectionAlphabetL2C : AssetCollectionAlphabet 78 | { 79 | public AssetCollectionAlphabetL2C() 80 | : base(false) { } 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Benchmark/v12/CyrillycToLatinConverter.tt: -------------------------------------------------------------------------------- 1 | <#@ template debug="false" hostspecific="true" language="C#" #> 2 | <#@ assembly name="System.Core" #> 3 | <#@ import namespace="System.Linq" #> 4 | <#@ import namespace="System.IO" #> 5 | <#@ import namespace="System.Text" #> 6 | <#@ import namespace="System.Globalization" #> 7 | <#@ import namespace="System.Collections.Generic" #> 8 | <#@ output extension=".generated.cs" #> 9 | <# 10 | var dir = Path.GetDirectoryName(this.Host.TemplateFile); 11 | var txt = File.ReadAllText(Path.Combine(dir, "Rules.txt")); 12 | 13 | var cult = new [] 14 | { 15 | CultureInfo.InvariantCulture, 16 | new CultureInfo("ru-RU"), 17 | new CultureInfo("be-BY"), 18 | new CultureInfo("uk-UA"), 19 | new CultureInfo("bg-BG"), 20 | new CultureInfo("mk-MK") 21 | }; 22 | 23 | var rules = new [] 24 | { 25 | new Dictionary(), 26 | new Dictionary(), 27 | new Dictionary(), 28 | new Dictionary(), 29 | new Dictionary(), 30 | new Dictionary() 31 | }; 32 | 33 | var ToUpper = new Func((v, f) => { 34 | var arr = v.ToCharArray(); 35 | arr[0] = char.ToUpper(arr[0], f); 36 | return new string(arr); 37 | }); 38 | 39 | var query = txt 40 | .Split(new [] {Environment.NewLine}, StringSplitOptions.RemoveEmptyEntries) 41 | .Skip(1); 42 | 43 | foreach (var line in query) 44 | { 45 | var strs = line 46 | .Split(new char[] {'\t'}, StringSplitOptions.None) 47 | .Select(v => ((v ?? "").Trim().Length > 0) ? v.Trim() : null) 48 | .ToArray(); 49 | 50 | if (strs[0].Trim().Length != 1) throw new Exception("Invalid char length"); 51 | var key = strs[0][0]; 52 | 53 | strs[0] = strs[1] ?? strs[2] ?? strs[3] ?? strs[4] ?? strs[5]; 54 | 55 | for (var i = 0; i < 6; i++) 56 | { 57 | if (strs[i] == null) continue; 58 | 59 | var commaIndex = strs[i].IndexOf(','); 60 | if (commaIndex > 0) strs[i] = strs[i].Substring(0, commaIndex).Trim(); 61 | 62 | var lowerKey = char.ToLower(key, cult[i]); 63 | if ((int)lowerKey != (int)key) 64 | rules[i].Add(lowerKey, strs[i]); 65 | rules[i].Add(key, ToUpper(strs[i], cult[i])); 66 | } 67 | } 68 | #> 69 | using System.Collections.Generic; 70 | 71 | // ------------------------------------------------------------------------------ 72 | // 73 | // This code was generated by a tool. 74 | // Generated at <#=DateTime.Now.ToString("u")#> 75 | // 76 | // Changes to this file may cause incorrect behavior and will be lost if 77 | // the code is regenerated. 78 | // 79 | // ------------------------------------------------------------------------------ 80 | namespace NickBuhro.Translit.Benchmark.v12 81 | { 82 | partial struct CyrillicToLatinConverter 83 | { 84 | private static readonly Dictionary[] Rules = 85 | { 86 | <# for (var i = 0; i < 6; i++) { #> 87 | new Dictionary // <#=cult[i].Name#> 88 | { 89 | <# foreach (var pair in rules[i]) { #> 90 | {'<#=pair.Key#>', @"<#=pair.Value#>"}, 91 | <# } #> 92 | }, 93 | <# } #> 94 | }; 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /NickBuhro.Translit/Transliteration.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Runtime.CompilerServices; 3 | 4 | [assembly: InternalsVisibleTo("NickBuhro.Translit.Benchmark")] 5 | [assembly: InternalsVisibleTo("NickBuhro.Translit.Tests")] 6 | 7 | namespace NickBuhro.Translit 8 | { 9 | /// 10 | /// Cyrillic-latin transliteration (support only slavic languages) by GOST 7.79-2000 (ISO 9). 11 | /// 12 | public static partial class Transliteration 13 | { 14 | /// 15 | /// Transliterate cyrillic string to latin. 16 | /// 17 | /// Source string. 18 | /// Specify it to determine correct transliteration rules 19 | /// (it can be a little bit different for languages). 20 | /// Transliterated string. 21 | public static string CyrillicToLatin(string cyrillicSource, Language language = Language.Unknown) 22 | { 23 | if (string.IsNullOrEmpty(cyrillicSource)) 24 | return cyrillicSource; 25 | 26 | switch (language) 27 | { 28 | case Language.Unknown: 29 | case Language.Russian: 30 | return CyrillicToLatinRussian(cyrillicSource); 31 | case Language.Belorussian: 32 | return CyrillicToLatinBelorussian(cyrillicSource); 33 | case Language.Ukrainian: 34 | return CyrillicToLatinUkrainian(cyrillicSource); 35 | case Language.Bulgarian: 36 | return CyrillicToLatinBulgarian(cyrillicSource); 37 | case Language.Macedonian: 38 | return CyrillicToLatinMacedonian(cyrillicSource); 39 | } 40 | 41 | throw new NotSupportedException(); 42 | } 43 | 44 | /// 45 | /// Transliterate latin string to cyrillic. 46 | /// 47 | /// Source string. 48 | /// Specify it to determine correct transliteration rules 49 | /// (it can be a little bit different for languages). 50 | /// Cyrillic string. 51 | public static string LatinToCyrillic(string latinSource, Language language = Language.Unknown) 52 | { 53 | if (string.IsNullOrEmpty(latinSource)) 54 | return latinSource; 55 | 56 | switch (language) 57 | { 58 | case Language.Unknown: 59 | case Language.Russian: 60 | return LatinToCyrillicRussian(latinSource); 61 | case Language.Belorussian: 62 | return LatinToCyrillicBelorussian(latinSource); 63 | case Language.Ukrainian: 64 | return LatinToCyrillicUkrainian(latinSource); 65 | case Language.Bulgarian: 66 | return LatinToCyrillicBulgarian(latinSource); 67 | case Language.Macedonian: 68 | return LatinToCyrillicMacedonian(latinSource); 69 | } 70 | 71 | throw new NotSupportedException(); 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Benchmark/v12/LatinToCyrillicConverter.tt: -------------------------------------------------------------------------------- 1 | <#@ template debug="false" hostspecific="true" language="C#" #> 2 | <#@ assembly name="System.Core" #> 3 | <#@ import namespace="System.Linq" #> 4 | <#@ import namespace="System.IO" #> 5 | <#@ import namespace="System.Text" #> 6 | <#@ import namespace="System.Globalization" #> 7 | <#@ import namespace="System.Collections.Generic" #> 8 | <#@ output extension=".generated.cs" #> 9 | <# 10 | var dir = Path.GetDirectoryName(this.Host.TemplateFile); 11 | var txt = File.ReadAllText(Path.Combine(dir, "Rules.txt")); 12 | 13 | var cult = new [] 14 | { 15 | CultureInfo.InvariantCulture, 16 | new CultureInfo("ru-RU"), 17 | new CultureInfo("be-BY"), 18 | new CultureInfo("uk-UA"), 19 | new CultureInfo("bg-BG"), 20 | new CultureInfo("mk-MK") 21 | }; 22 | 23 | var rules = new [] 24 | { 25 | new List>(), 26 | new List>(), 27 | new List>(), 28 | new List>(), 29 | new List>(), 30 | new List>() 31 | }; 32 | 33 | var ToUpper = new Func((v, f) => { 34 | var arr = v.ToCharArray(); 35 | arr[0] = char.ToUpper(arr[0], f); 36 | return new string(arr); 37 | }); 38 | 39 | var query = txt 40 | .Split(new [] {Environment.NewLine}, StringSplitOptions.RemoveEmptyEntries) 41 | .Skip(1); 42 | 43 | foreach (var line in query) 44 | { 45 | var strs = line 46 | .Split(new char[] {'\t'}, StringSplitOptions.None) 47 | .Select(v => ((v ?? "").Trim().Length > 0) ? v.Trim() : null) 48 | .ToArray(); 49 | 50 | if (strs[0].Trim().Length != 1) throw new Exception("Invalid char length"); 51 | var key = strs[0][0]; 52 | 53 | strs[0] = strs[1] ?? strs[2] ?? strs[3] ?? strs[4] ?? strs[5]; 54 | 55 | for (var i = 0; i < 6; i++) 56 | { 57 | if (strs[i] == null) continue; 58 | 59 | var values = strs[i] 60 | .Split(new char[] {','}, StringSplitOptions.RemoveEmptyEntries) 61 | .Select(v => v.Trim()); 62 | 63 | foreach (var val in values) 64 | { 65 | var lowerKey = char.ToLower(key, cult[i]); 66 | if ((int)lowerKey != (int)key) 67 | rules[i].Add(new Tuple(lowerKey, val)); 68 | rules[i].Add(new Tuple(key, ToUpper(val, cult[i]))); 69 | } 70 | } 71 | } 72 | #> 73 | 74 | // ------------------------------------------------------------------------------ 75 | // 76 | // This code was generated by a tool. 77 | // Generated at <#=DateTime.Now.ToString("u")#> 78 | // 79 | // Changes to this file may cause incorrect behavior and will be lost if 80 | // the code is regenerated. 81 | // 82 | // ------------------------------------------------------------------------------ 83 | namespace NickBuhro.Translit.Benchmark.v12 84 | { 85 | partial struct LatinToCyrillicConverter 86 | { 87 | private struct ConvertRule 88 | { 89 | public readonly string Latin; 90 | public readonly string Cyrillic; 91 | 92 | public ConvertRule(string cyrillic, string latin) 93 | { 94 | Cyrillic = cyrillic; 95 | Latin = latin; 96 | } 97 | } 98 | 99 | private static readonly ConvertRule[][] Rules = new [] 100 | { 101 | <# for (var i = 0; i < 6; i++) { #> 102 | new [] // <#=cult[i].Name#> 103 | { 104 | <# foreach (var t in rules[i].OrderBy(t => -t.Item2.Length)) { #> 105 | new ConvertRule("<#=t.Item1#>", @"<#=t.Item2#>"), 106 | <# } #> 107 | }, 108 | <# } #> 109 | 110 | }; 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Benchmark/v12/CyrillicToLatinConverter.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Diagnostics; 3 | using System.Text; 4 | 5 | namespace NickBuhro.Translit.Benchmark.v12 6 | { 7 | public partial struct CyrillicToLatinConverter 8 | { 9 | private readonly Language _lang; 10 | private readonly string _src; 11 | private readonly Dictionary _ruleSet; 12 | 13 | private StringBuilder _sb; 14 | 15 | /// 16 | /// Create an instance of algorithm. 17 | /// 18 | public CyrillicToLatinConverter(string source, Language lang) 19 | { 20 | Debug.Assert(Language.Unknown == 0); 21 | Debug.Assert((int)Language.Russian == 1); 22 | Debug.Assert((int)Language.Belorussian == 2); 23 | Debug.Assert((int)Language.Ukrainian == 3); 24 | Debug.Assert((int)Language.Bulgarian == 4); 25 | Debug.Assert((int)Language.Macedonian == 5); 26 | 27 | _ruleSet = Rules[(int)lang]; 28 | _lang = lang; 29 | _src = source; 30 | _sb = null; 31 | } 32 | 33 | /// 34 | /// Should be invoked only once. 35 | /// 36 | public string Convert() 37 | { 38 | Debug.Assert(_src != null); 39 | Debug.Assert(_ruleSet != null); 40 | 41 | if (string.IsNullOrEmpty(_src)) 42 | return _src; 43 | 44 | _sb = new StringBuilder(); 45 | 46 | for (var srcIndex = 0; srcIndex < _src.Length; srcIndex++) 47 | { 48 | string substitute; 49 | if (_ruleSet.TryGetValue(_src[srcIndex], out substitute)) 50 | { 51 | var nextChar = (_src.Length > (srcIndex + 1)) ? _src[srcIndex + 1] : ' '; 52 | substitute = CheckSpecificRules(substitute, nextChar); 53 | _sb.Append(substitute); 54 | } 55 | else 56 | { 57 | _sb.Append(_src[srcIndex]); 58 | } 59 | } 60 | 61 | return _sb.ToString(); 62 | } 63 | 64 | private string CheckSpecificRules(string substitue, char nextSourceChar) 65 | { 66 | // Ц cz, c cz, c cz, c cz, c cz, c рекомендуется использовать С перед буквами I, Е, Y, J; в остальных случаях CZ 67 | if ((substitue.Length != 2) || (substitue[1] != 'z')) 68 | return substitue; 69 | 70 | switch (nextSourceChar) 71 | { 72 | case 'Е': 73 | case 'Ё': 74 | case 'И': 75 | case 'Й': 76 | case 'I': 77 | case 'Ы': 78 | case 'Э': 79 | case 'Ю': 80 | case 'Я': 81 | case 'е': 82 | case 'ё': 83 | case 'и': 84 | case 'й': 85 | case 'i': 86 | case 'ы': 87 | case 'э': 88 | case 'ю': 89 | case 'я': 90 | case 'ѣ': 91 | case 'Ѣ': 92 | case 'ѵ': 93 | case 'Ѵ': 94 | return substitue.Substring(0, 1); 95 | default: 96 | return substitue; 97 | } 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.userosscache 8 | *.sln.docstates 9 | 10 | # User-specific files (MonoDevelop/Xamarin Studio) 11 | *.userprefs 12 | 13 | # Build results 14 | [Dd]ebug/ 15 | [Dd]ebugPublic/ 16 | [Rr]elease/ 17 | [Rr]eleases/ 18 | x64/ 19 | x86/ 20 | bld/ 21 | [Bb]in/ 22 | [Oo]bj/ 23 | 24 | results.xml 25 | summary.mht 26 | 27 | # Visual Studio 2015 cache/options directory 28 | .vs/ 29 | # Uncomment if you have tasks that create the project's static files in wwwroot 30 | #wwwroot/ 31 | 32 | # MSTest test Results 33 | [Tt]est[Rr]esult*/ 34 | [Bb]uild[Ll]og.* 35 | 36 | # NUNIT 37 | *.VisualState.xml 38 | TestResult.xml 39 | 40 | # Build Results of an ATL Project 41 | [Dd]ebugPS/ 42 | [Rr]eleasePS/ 43 | dlldata.c 44 | 45 | # DNX 46 | project.lock.json 47 | artifacts/ 48 | 49 | *_i.c 50 | *_p.c 51 | *_i.h 52 | *.ilk 53 | *.meta 54 | *.obj 55 | *.pch 56 | *.pdb 57 | *.pgc 58 | *.pgd 59 | *.rsp 60 | *.sbr 61 | *.tlb 62 | *.tli 63 | *.tlh 64 | *.tmp 65 | *.tmp_proj 66 | *.log 67 | *.vspscc 68 | *.vssscc 69 | .builds 70 | *.pidb 71 | *.svclog 72 | *.scc 73 | 74 | # Chutzpah Test files 75 | _Chutzpah* 76 | 77 | # Visual C++ cache files 78 | ipch/ 79 | *.aps 80 | *.ncb 81 | *.opendb 82 | *.opensdf 83 | *.sdf 84 | *.cachefile 85 | 86 | # Visual Studio profiler 87 | *.psess 88 | *.vsp 89 | *.vspx 90 | *.sap 91 | 92 | # TFS 2012 Local Workspace 93 | $tf/ 94 | 95 | # Guidance Automation Toolkit 96 | *.gpState 97 | 98 | # ReSharper is a .NET coding add-in 99 | _ReSharper*/ 100 | *.[Rr]e[Ss]harper 101 | *.DotSettings.user 102 | 103 | # JustCode is a .NET coding add-in 104 | .JustCode 105 | 106 | # TeamCity is a build add-in 107 | _TeamCity* 108 | 109 | # DotCover is a Code Coverage Tool 110 | *.dotCover 111 | 112 | # NCrunch 113 | _NCrunch_* 114 | .*crunch*.local.xml 115 | nCrunchTemp_* 116 | 117 | # MightyMoose 118 | *.mm.* 119 | AutoTest.Net/ 120 | 121 | # Web workbench (sass) 122 | .sass-cache/ 123 | 124 | # Installshield output folder 125 | [Ee]xpress/ 126 | 127 | # DocProject is a documentation generator add-in 128 | DocProject/buildhelp/ 129 | DocProject/Help/*.HxT 130 | DocProject/Help/*.HxC 131 | DocProject/Help/*.hhc 132 | DocProject/Help/*.hhk 133 | DocProject/Help/*.hhp 134 | DocProject/Help/Html2 135 | DocProject/Help/html 136 | 137 | # Click-Once directory 138 | publish/ 139 | 140 | # Publish Web Output 141 | *.[Pp]ublish.xml 142 | *.azurePubxml 143 | # TODO: Comment the next line if you want to checkin your web deploy settings 144 | # but database connection strings (with potential passwords) will be unencrypted 145 | *.pubxml 146 | *.publishproj 147 | 148 | # NuGet Packages 149 | *.nupkg 150 | # The packages folder can be ignored because of Package Restore 151 | **/packages/* 152 | # except build/, which is used as an MSBuild target. 153 | !**/packages/build/ 154 | # Uncomment if necessary however generally it will be regenerated when needed 155 | #!**/packages/repositories.config 156 | # NuGet v3's project.json files produces more ignoreable files 157 | *.nuget.props 158 | *.nuget.targets 159 | 160 | # Microsoft Azure Build Output 161 | csx/ 162 | *.build.csdef 163 | 164 | # Microsoft Azure Emulator 165 | ecf/ 166 | rcf/ 167 | 168 | # Microsoft Azure ApplicationInsights config file 169 | ApplicationInsights.config 170 | 171 | # Windows Store app package directory 172 | AppPackages/ 173 | BundleArtifacts/ 174 | 175 | # Visual Studio cache files 176 | # files ending in .cache can be ignored 177 | *.[Cc]ache 178 | # but keep track of directories ending in .cache 179 | !*.[Cc]ache/ 180 | 181 | # Others 182 | ClientBin/ 183 | ~$* 184 | *~ 185 | *.dbmdl 186 | *.dbproj.schemaview 187 | *.pfx 188 | *.publishsettings 189 | node_modules/ 190 | orleans.codegen.cs 191 | 192 | # RIA/Silverlight projects 193 | Generated_Code/ 194 | 195 | # Backup & report files from converting an old project file 196 | # to a newer Visual Studio version. Backup files are not needed, 197 | # because we have git ;-) 198 | _UpgradeReport_Files/ 199 | Backup*/ 200 | UpgradeLog*.XML 201 | UpgradeLog*.htm 202 | 203 | # SQL Server files 204 | *.mdf 205 | *.ldf 206 | 207 | # Business Intelligence projects 208 | *.rdl.data 209 | *.bim.layout 210 | *.bim_*.settings 211 | 212 | # Microsoft Fakes 213 | FakesAssemblies/ 214 | 215 | # GhostDoc plugin setting file 216 | *.GhostDoc.xml 217 | 218 | # Node.js Tools for Visual Studio 219 | .ntvs_analysis.dat 220 | 221 | # Visual Studio 6 build log 222 | *.plg 223 | 224 | # Visual Studio 6 workspace options file 225 | *.opt 226 | 227 | # Visual Studio LightSwitch build output 228 | **/*.HTMLClient/GeneratedArtifacts 229 | **/*.DesktopClient/GeneratedArtifacts 230 | **/*.DesktopClient/ModelManifest.xml 231 | **/*.Server/GeneratedArtifacts 232 | **/*.Server/ModelManifest.xml 233 | _Pvt_Extensions 234 | 235 | # Paket dependency manager 236 | .paket/paket.exe 237 | 238 | # FAKE - F# Make 239 | .fake/ 240 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Benchmark/v13/Rules.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace NickBuhro.Translit.Benchmark.v13 4 | { 5 | internal sealed class Rules 6 | { 7 | private const string _crules = "IEYJ"; 8 | 9 | private readonly Dictionary _langIndex = new Dictionary 10 | { 11 | { Language.Russian, 0 }, 12 | { Language.Belorussian, 1 }, 13 | { Language.Ukrainian, 2 }, 14 | { Language.Bulgarian, 3 }, 15 | { Language.Macedonian, 4 }, 16 | }; 17 | 18 | private readonly Dictionary _data = new Dictionary() 19 | { 20 | { "а", new [] { "a", "a", "a", "a", "a" } }, 21 | { "б", new [] { "b", "b", "b", "b", "b" } }, 22 | { "в", new [] { "v", "v", "v", "v", "v" } }, 23 | { "г", new [] { "g", "h", "h", "g", "g" } }, 24 | { "ѓ", new [] { null, null, null, null, "g`" } }, 25 | { "ґ", new [] { null, null, "g`", null, null } }, 26 | { "д", new [] { "d", "d", "d", "d", "d" } }, 27 | { "е", new [] { "e", "e", "e", "e", "e" } }, 28 | { "ё", new [] { "yo", "yo", null, null, null } }, 29 | { "є", new [] { null, null, "ye", null, null } }, 30 | { "ж", new [] { "zh", "zh", "zh", "zh", "zh" } }, 31 | { "з", new [] { "z", "z", "z", "z", "z" } }, 32 | { "s", new [] { null, null, null, null, "z`" } }, 33 | { "и", new [] { "i", null, "y`", "i", "i" } }, 34 | { "й", new [] { "j", "j", "j", "j", null } }, 35 | { "j", new [] { null, null, null, null, "j" } }, 36 | { "i", new [] { "i", "i", "i", "i", null } }, 37 | { "ї", new [] { null, null, "yi", null, null } }, 38 | { "к", new [] { "k", "k", "k", "k", "k" } }, 39 | { "ќ", new [] { null, null, null, null, "k`" } }, 40 | { "л", new [] { "l", "l", "l", "l", "l" } }, 41 | { "љ", new [] { null, null, null, null, "l`" } }, 42 | { "м", new [] { "m", "m", "m", "m", "m" } }, 43 | { "н", new [] { "n", "n", "n", "n", "п" } }, 44 | { "њ", new [] { null, null, null, null, "n`" } }, 45 | { "о", new [] { "o", "o", "o", "o", "o" } }, 46 | { "п", new [] { "p", "p", "p", "p", "p" } }, 47 | { "р", new [] { "r", "r", "r", "r", "r" } }, 48 | { "с", new [] { "s", "s", "s", "s", "s" } }, 49 | { "т", new [] { "t", "t", "t", "t", "t" } }, 50 | { "у", new [] { "u", "u", "u", "u", "u" } }, 51 | { "ў", new [] { null, "u`", null, null, null } }, 52 | { "ф", new [] { "f", "f", "f", "f", "f" } }, 53 | { "х", new [] { "x", "x", "x", "x", "x" } }, 54 | { "ц", new [] { "cz", "cz", "cz", "cz", "cz" } }, 55 | { "ч", new [] { "ch", "ch", "ch", "ch", "ch" } }, 56 | { "џ", new [] { null, null, null, null, "dh" } }, 57 | { "ш", new [] { "sh", "sh", "sh", "sh", "sh" } }, 58 | { "щ", new [] { "shh", null, "shh", "sht", null } }, 59 | { "ъ", new [] { "``", null, null, "a`", null } }, 60 | { "ы", new [] { "y`", "y`", null, null, null } }, 61 | { "ь", new [] { "`", "`", "`", "`", null } }, 62 | { "э", new [] { "e`", "e`", null, null, null } }, 63 | { "ю", new [] { "yu", "yu", "yu", "yu", null } }, 64 | { "я", new [] { "ya", "ya", "ya", "ya", null } }, 65 | { "’", new [] { "'", "'", "'", "'", "'" } }, 66 | { "ѣ", new [] { "ye", null, null, "ye", null } }, 67 | { "ѳ", new [] { "fh", null, null, "fh", null } }, 68 | { "ѵ", new [] { "yh", null, null, "yh", null } }, 69 | { "ѫ", new [] { null, null, null, "о`", null } }, 70 | { "№", new [] { "#", "#", "#", "#", "#" } } 71 | }; 72 | 73 | 74 | public Dictionary CreateCyrillicToLatinDictionary(Language lang) 75 | { 76 | var result = new Dictionary(); 77 | 78 | foreach (var p in _data) 79 | { 80 | var loCyrillic = p.Key; 81 | var loLatin = p.Value[_langIndex[lang]]; 82 | 83 | if (loLatin == null) continue; 84 | if ((loCyrillic == loLatin) && (loCyrillic.Length == 1)) 85 | continue; 86 | 87 | var upCyrillic = loCyrillic.ToUpper(); 88 | var upLatin = char.ToUpper(loLatin[0]) + loLatin.Substring(1); 89 | 90 | result.Add(loCyrillic, loLatin); 91 | if (loCyrillic != upCyrillic) 92 | { 93 | result.Add(upCyrillic, upLatin); 94 | if (_crules.IndexOf(upLatin[0]) >= 0) 95 | { 96 | result["ц" + loCyrillic] = "c" + loLatin; 97 | result["Ц" + loCyrillic] = "C" + loLatin; 98 | result["ц" + upCyrillic] = "c" + upLatin; 99 | result["Ц" + upCyrillic] = "C" + upLatin; 100 | } 101 | } 102 | } 103 | 104 | return result; 105 | } 106 | 107 | public Dictionary CreateLatinToCyrillicDictionary(Language lang) 108 | { 109 | var result = new Dictionary(); 110 | 111 | foreach (var p in _data) 112 | { 113 | var loCyrillic = p.Key; 114 | var loLatin = p.Value[_langIndex[lang]]; 115 | 116 | if (loLatin == null) continue; 117 | 118 | var upCyrillic = loCyrillic.ToUpper(); 119 | var upLatin = char.ToUpper(loLatin[0]) + loLatin.Substring(1); 120 | 121 | if (!result.ContainsKey(loLatin)) 122 | { 123 | result.Add(loLatin, loCyrillic); 124 | if (!result.ContainsKey(upLatin)) 125 | { 126 | result.Add(upLatin, upCyrillic); 127 | } 128 | } 129 | } 130 | 131 | result.Add("c", "ц"); 132 | result.Add("C", "Ц"); 133 | 134 | return result; 135 | } 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Benchmark/v13/FSMTranslit.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Linq; 3 | using System.Runtime.CompilerServices; 4 | using System.Text; 5 | 6 | [assembly: InternalsVisibleTo("NickBuhro.Translit.Benchmark")] 7 | 8 | namespace NickBuhro.Translit.Benchmark.v13 9 | { 10 | internal static class FSMTranslit 11 | { 12 | private static readonly Dictionary transitions)[]> _c2l; 13 | private static readonly Dictionary transitions)[]> _l2c; 14 | 15 | static FSMTranslit() 16 | { 17 | var rules = new Rules(); 18 | _c2l = new Dictionary transitions)[]> 19 | { 20 | { Language.Russian, BuildFSM(rules.CreateCyrillicToLatinDictionary(Language.Russian)) }, 21 | { Language.Belorussian, BuildFSM(rules.CreateCyrillicToLatinDictionary(Language.Belorussian)) }, 22 | { Language.Ukrainian, BuildFSM(rules.CreateCyrillicToLatinDictionary(Language.Ukrainian)) }, 23 | { Language.Bulgarian, BuildFSM(rules.CreateCyrillicToLatinDictionary(Language.Bulgarian)) }, 24 | { Language.Macedonian, BuildFSM(rules.CreateCyrillicToLatinDictionary(Language.Macedonian)) }, 25 | }; 26 | _l2c = new Dictionary transitions)[]> 27 | { 28 | { Language.Russian, BuildFSM(rules.CreateLatinToCyrillicDictionary(Language.Russian)) }, 29 | { Language.Belorussian, BuildFSM(rules.CreateLatinToCyrillicDictionary(Language.Belorussian)) }, 30 | { Language.Ukrainian, BuildFSM(rules.CreateLatinToCyrillicDictionary(Language.Ukrainian)) }, 31 | { Language.Bulgarian, BuildFSM(rules.CreateLatinToCyrillicDictionary(Language.Bulgarian)) }, 32 | { Language.Macedonian, BuildFSM(rules.CreateLatinToCyrillicDictionary(Language.Macedonian)) }, 33 | }; 34 | } 35 | 36 | public static string CyrillicToLatin(string text, Language language) 37 | { 38 | var fsm = _c2l[language]; 39 | return Convert(text, fsm); 40 | } 41 | 42 | public static string LatinToCyrillic(string text, Language language) 43 | { 44 | var fsm = _l2c[language]; 45 | return Convert(text, fsm); 46 | } 47 | 48 | internal static string Convert(string text, (string stateName, string fallbackText, Dictionary transitions)[] fsm) 49 | { 50 | var sb = new StringBuilder(text.Length); 51 | 52 | var state = 0; 53 | for (var i = 0; i < text.Length; i++) 54 | { 55 | var c = text[i]; 56 | var tc = fsm[state]; 57 | if (tc.transitions.TryGetValue(c, out (int state, string text) output)) 58 | { 59 | state = output.state; 60 | sb.Append(output.text); 61 | } 62 | else 63 | { 64 | state = 0; 65 | sb.Append(tc.fallbackText); 66 | sb.Append(c); 67 | } 68 | } 69 | 70 | { 71 | var tc = fsm[state]; 72 | sb.Append(tc.fallbackText); 73 | } 74 | 75 | return sb.ToString(); 76 | } 77 | 78 | 79 | 80 | internal static (string stateName, string fallbackText, Dictionary transitions)[] BuildFSM(Dictionary replacements) 81 | { 82 | // Find all states 83 | 84 | var result = GetStates(replacements) 85 | .OrderBy(s => s.Length) 86 | .ThenBy(s => s) 87 | .Select(s => (stateName: s, fallbackText: "", transitions: new Dictionary())) 88 | .ToArray(); 89 | 90 | var stateLookup = new Dictionary(result.Length); 91 | for (var i = 0; i < result.Length; i++) 92 | { 93 | stateLookup.Add(result[i].stateName, i); 94 | } 95 | 96 | // Generate simple transitions without output 97 | 98 | foreach (var k in replacements.Keys) 99 | { 100 | for (var i = 1; i < k.Length; i++) 101 | { 102 | var state = stateLookup[k.Substring(0, i - 1)]; 103 | var input = k[i - 1]; 104 | var output = (stateLookup[k.Substring(0, i)], ""); 105 | if (!result[state].transitions.ContainsKey(input)) 106 | { 107 | result[state].transitions.Add(input, output); 108 | } 109 | } 110 | } 111 | 112 | // Generate transitions for completed replacements 113 | 114 | foreach (var p in replacements) 115 | { 116 | var state = stateLookup[p.Key.Substring(0, p.Key.Length - 1)]; 117 | var input = p.Key[p.Key.Length - 1]; 118 | var output = (0, p.Value); 119 | if (!result[state].transitions.ContainsKey(input)) 120 | { 121 | result[state].transitions.Add(input, output); 122 | } 123 | } 124 | 125 | // Generate transitions to go back 126 | 127 | for (var state = 1; state < result.Length; state++) 128 | { 129 | var tail = result[state].stateName; 130 | var outputText = ""; 131 | for (; ; ) 132 | { 133 | string replSource = tail[0].ToString(); 134 | string replTarget = replSource; 135 | { 136 | var s = tail; 137 | while (s.Length > 0) 138 | { 139 | if (replacements.TryGetValue(s, out string value)) 140 | { 141 | replSource = s; 142 | replTarget = value; 143 | break; 144 | } 145 | s = s.Substring(0, s.Length - 1); 146 | } 147 | } 148 | 149 | outputText += replTarget; 150 | tail = tail.Substring(replSource.Length); 151 | 152 | if (stateLookup.TryGetValue(tail, out int outputState)) 153 | { 154 | foreach (var t in result[outputState].transitions) 155 | { 156 | if (!result[state].transitions.ContainsKey(t.Key)) 157 | { 158 | var output = (t.Value.state, outputText + t.Value.text); 159 | result[state].transitions.Add(t.Key, output); 160 | } 161 | } 162 | result[state].fallbackText = outputText; 163 | break; 164 | } 165 | } 166 | } 167 | 168 | // 169 | 170 | return result; 171 | } 172 | 173 | private static IEnumerable GetStates(Dictionary replacements) 174 | { 175 | var result = new HashSet { "" }; 176 | foreach (var k in replacements.Keys) 177 | { 178 | for (var i = 1; i < k.Length; i++) 179 | { 180 | result.Add(k.Substring(0, i)); 181 | } 182 | } 183 | return result; 184 | } 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /NickBuhro.Translit.Benchmark/v12/CyrillycToLatinConverter.generated.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | // ------------------------------------------------------------------------------ 4 | // 5 | // This code was generated by a tool. 6 | // Generated at 2018-09-20 21:34:57Z 7 | // 8 | // Changes to this file may cause incorrect behavior and will be lost if 9 | // the code is regenerated. 10 | // 11 | // ------------------------------------------------------------------------------ 12 | namespace NickBuhro.Translit.Benchmark.v12 13 | { 14 | partial struct CyrillicToLatinConverter 15 | { 16 | private static readonly Dictionary[] Rules = 17 | { 18 | new Dictionary // 19 | { 20 | {'а', @"a"}, 21 | {'А', @"A"}, 22 | {'б', @"b"}, 23 | {'Б', @"B"}, 24 | {'в', @"v"}, 25 | {'В', @"V"}, 26 | {'г', @"g"}, 27 | {'Г', @"G"}, 28 | {'ѓ', @"g`"}, 29 | {'Ѓ', @"G`"}, 30 | {'ґ', @"g`"}, 31 | {'Ґ', @"G`"}, 32 | {'д', @"d"}, 33 | {'Д', @"D"}, 34 | {'е', @"e"}, 35 | {'Е', @"E"}, 36 | {'ё', @"yo"}, 37 | {'Ё', @"Yo"}, 38 | {'є', @"ye"}, 39 | {'Є', @"Ye"}, 40 | {'ж', @"zh"}, 41 | {'Ж', @"Zh"}, 42 | {'з', @"z"}, 43 | {'З', @"Z"}, 44 | {'s', @"z`"}, 45 | {'S', @"Z`"}, 46 | {'и', @"i"}, 47 | {'И', @"I"}, 48 | {'й', @"j"}, 49 | {'Й', @"J"}, 50 | {'j', @"j"}, 51 | {'J', @"J"}, 52 | {'i', @"i"}, 53 | {'I', @"I"}, 54 | {'ї', @"yi"}, 55 | {'Ї', @"Yi"}, 56 | {'к', @"k"}, 57 | {'К', @"K"}, 58 | {'ќ', @"k`"}, 59 | {'Ќ', @"K`"}, 60 | {'л', @"l"}, 61 | {'Л', @"L"}, 62 | {'љ', @"l`"}, 63 | {'Љ', @"L`"}, 64 | {'м', @"m"}, 65 | {'М', @"M"}, 66 | {'н', @"n"}, 67 | {'Н', @"N"}, 68 | {'њ', @"n`"}, 69 | {'Њ', @"N`"}, 70 | {'о', @"o"}, 71 | {'О', @"O"}, 72 | {'п', @"p"}, 73 | {'П', @"P"}, 74 | {'р', @"r"}, 75 | {'Р', @"R"}, 76 | {'с', @"s"}, 77 | {'С', @"S"}, 78 | {'т', @"t"}, 79 | {'Т', @"T"}, 80 | {'у', @"u"}, 81 | {'У', @"U"}, 82 | {'ў', @"u`"}, 83 | {'Ў', @"U`"}, 84 | {'ф', @"f"}, 85 | {'Ф', @"F"}, 86 | {'х', @"x"}, 87 | {'Х', @"X"}, 88 | {'ц', @"cz"}, 89 | {'Ц', @"Cz"}, 90 | {'ч', @"ch"}, 91 | {'Ч', @"Ch"}, 92 | {'џ', @"dh"}, 93 | {'Џ', @"Dh"}, 94 | {'ш', @"sh"}, 95 | {'Ш', @"Sh"}, 96 | {'щ', @"shh"}, 97 | {'Щ', @"Shh"}, 98 | {'ъ', @"``"}, 99 | {'Ъ', @"``"}, 100 | {'ы', @"y`"}, 101 | {'Ы', @"Y`"}, 102 | {'ь', @"`"}, 103 | {'Ь', @"`"}, 104 | {'э', @"e`"}, 105 | {'Э', @"E`"}, 106 | {'ю', @"yu"}, 107 | {'Ю', @"Yu"}, 108 | {'я', @"ya"}, 109 | {'Я', @"Ya"}, 110 | {'’', @"'"}, 111 | {'ѣ', @"ye"}, 112 | {'Ѣ', @"Ye"}, 113 | {'ѳ', @"fh"}, 114 | {'Ѳ', @"Fh"}, 115 | {'ѵ', @"yh"}, 116 | {'Ѵ', @"Yh"}, 117 | {'ѫ', @"о`"}, 118 | {'Ѫ', @"О`"}, 119 | {'№', @"#"}, 120 | }, 121 | new Dictionary // ru-RU 122 | { 123 | {'а', @"a"}, 124 | {'А', @"A"}, 125 | {'б', @"b"}, 126 | {'Б', @"B"}, 127 | {'в', @"v"}, 128 | {'В', @"V"}, 129 | {'г', @"g"}, 130 | {'Г', @"G"}, 131 | {'д', @"d"}, 132 | {'Д', @"D"}, 133 | {'е', @"e"}, 134 | {'Е', @"E"}, 135 | {'ё', @"yo"}, 136 | {'Ё', @"Yo"}, 137 | {'ж', @"zh"}, 138 | {'Ж', @"Zh"}, 139 | {'з', @"z"}, 140 | {'З', @"Z"}, 141 | {'и', @"i"}, 142 | {'И', @"I"}, 143 | {'й', @"j"}, 144 | {'Й', @"J"}, 145 | {'i', @"i"}, 146 | {'I', @"I"}, 147 | {'к', @"k"}, 148 | {'К', @"K"}, 149 | {'л', @"l"}, 150 | {'Л', @"L"}, 151 | {'м', @"m"}, 152 | {'М', @"M"}, 153 | {'н', @"n"}, 154 | {'Н', @"N"}, 155 | {'о', @"o"}, 156 | {'О', @"O"}, 157 | {'п', @"p"}, 158 | {'П', @"P"}, 159 | {'р', @"r"}, 160 | {'Р', @"R"}, 161 | {'с', @"s"}, 162 | {'С', @"S"}, 163 | {'т', @"t"}, 164 | {'Т', @"T"}, 165 | {'у', @"u"}, 166 | {'У', @"U"}, 167 | {'ф', @"f"}, 168 | {'Ф', @"F"}, 169 | {'х', @"x"}, 170 | {'Х', @"X"}, 171 | {'ц', @"cz"}, 172 | {'Ц', @"Cz"}, 173 | {'ч', @"ch"}, 174 | {'Ч', @"Ch"}, 175 | {'ш', @"sh"}, 176 | {'Ш', @"Sh"}, 177 | {'щ', @"shh"}, 178 | {'Щ', @"Shh"}, 179 | {'ъ', @"``"}, 180 | {'Ъ', @"``"}, 181 | {'ы', @"y`"}, 182 | {'Ы', @"Y`"}, 183 | {'ь', @"`"}, 184 | {'Ь', @"`"}, 185 | {'э', @"e`"}, 186 | {'Э', @"E`"}, 187 | {'ю', @"yu"}, 188 | {'Ю', @"Yu"}, 189 | {'я', @"ya"}, 190 | {'Я', @"Ya"}, 191 | {'’', @"'"}, 192 | {'ѣ', @"ye"}, 193 | {'Ѣ', @"Ye"}, 194 | {'ѳ', @"fh"}, 195 | {'Ѳ', @"Fh"}, 196 | {'ѵ', @"yh"}, 197 | {'Ѵ', @"Yh"}, 198 | {'№', @"#"}, 199 | }, 200 | new Dictionary // be-BY 201 | { 202 | {'а', @"a"}, 203 | {'А', @"A"}, 204 | {'б', @"b"}, 205 | {'Б', @"B"}, 206 | {'в', @"v"}, 207 | {'В', @"V"}, 208 | {'г', @"h"}, 209 | {'Г', @"H"}, 210 | {'д', @"d"}, 211 | {'Д', @"D"}, 212 | {'е', @"e"}, 213 | {'Е', @"E"}, 214 | {'ё', @"yo"}, 215 | {'Ё', @"Yo"}, 216 | {'ж', @"zh"}, 217 | {'Ж', @"Zh"}, 218 | {'з', @"z"}, 219 | {'З', @"Z"}, 220 | {'й', @"j"}, 221 | {'Й', @"J"}, 222 | {'i', @"i"}, 223 | {'I', @"I"}, 224 | {'к', @"k"}, 225 | {'К', @"K"}, 226 | {'л', @"l"}, 227 | {'Л', @"L"}, 228 | {'м', @"m"}, 229 | {'М', @"M"}, 230 | {'н', @"n"}, 231 | {'Н', @"N"}, 232 | {'о', @"o"}, 233 | {'О', @"O"}, 234 | {'п', @"p"}, 235 | {'П', @"P"}, 236 | {'р', @"r"}, 237 | {'Р', @"R"}, 238 | {'с', @"s"}, 239 | {'С', @"S"}, 240 | {'т', @"t"}, 241 | {'Т', @"T"}, 242 | {'у', @"u"}, 243 | {'У', @"U"}, 244 | {'ў', @"u`"}, 245 | {'Ў', @"U`"}, 246 | {'ф', @"f"}, 247 | {'Ф', @"F"}, 248 | {'х', @"x"}, 249 | {'Х', @"X"}, 250 | {'ц', @"cz"}, 251 | {'Ц', @"Cz"}, 252 | {'ч', @"ch"}, 253 | {'Ч', @"Ch"}, 254 | {'ш', @"sh"}, 255 | {'Ш', @"Sh"}, 256 | {'ы', @"y`"}, 257 | {'Ы', @"Y`"}, 258 | {'ь', @"`"}, 259 | {'Ь', @"`"}, 260 | {'э', @"e`"}, 261 | {'Э', @"E`"}, 262 | {'ю', @"yu"}, 263 | {'Ю', @"Yu"}, 264 | {'я', @"ya"}, 265 | {'Я', @"Ya"}, 266 | {'’', @"'"}, 267 | {'№', @"#"}, 268 | }, 269 | new Dictionary // uk-UA 270 | { 271 | {'а', @"a"}, 272 | {'А', @"A"}, 273 | {'б', @"b"}, 274 | {'Б', @"B"}, 275 | {'в', @"v"}, 276 | {'В', @"V"}, 277 | {'г', @"h"}, 278 | {'Г', @"H"}, 279 | {'ґ', @"g`"}, 280 | {'Ґ', @"G`"}, 281 | {'д', @"d"}, 282 | {'Д', @"D"}, 283 | {'е', @"e"}, 284 | {'Е', @"E"}, 285 | {'є', @"ye"}, 286 | {'Є', @"Ye"}, 287 | {'ж', @"zh"}, 288 | {'Ж', @"Zh"}, 289 | {'з', @"z"}, 290 | {'З', @"Z"}, 291 | {'и', @"y`"}, 292 | {'И', @"Y`"}, 293 | {'й', @"j"}, 294 | {'Й', @"J"}, 295 | {'i', @"i"}, 296 | {'I', @"I"}, 297 | {'ї', @"yi"}, 298 | {'Ї', @"Yi"}, 299 | {'к', @"k"}, 300 | {'К', @"K"}, 301 | {'л', @"l"}, 302 | {'Л', @"L"}, 303 | {'м', @"m"}, 304 | {'М', @"M"}, 305 | {'н', @"n"}, 306 | {'Н', @"N"}, 307 | {'о', @"o"}, 308 | {'О', @"O"}, 309 | {'п', @"p"}, 310 | {'П', @"P"}, 311 | {'р', @"r"}, 312 | {'Р', @"R"}, 313 | {'с', @"s"}, 314 | {'С', @"S"}, 315 | {'т', @"t"}, 316 | {'Т', @"T"}, 317 | {'у', @"u"}, 318 | {'У', @"U"}, 319 | {'ф', @"f"}, 320 | {'Ф', @"F"}, 321 | {'х', @"x"}, 322 | {'Х', @"X"}, 323 | {'ц', @"cz"}, 324 | {'Ц', @"Cz"}, 325 | {'ч', @"ch"}, 326 | {'Ч', @"Ch"}, 327 | {'ш', @"sh"}, 328 | {'Ш', @"Sh"}, 329 | {'щ', @"shh"}, 330 | {'Щ', @"Shh"}, 331 | {'ь', @"`"}, 332 | {'Ь', @"`"}, 333 | {'ю', @"yu"}, 334 | {'Ю', @"Yu"}, 335 | {'я', @"ya"}, 336 | {'Я', @"Ya"}, 337 | {'’', @"'"}, 338 | {'№', @"#"}, 339 | }, 340 | new Dictionary // bg-BG 341 | { 342 | {'а', @"a"}, 343 | {'А', @"A"}, 344 | {'б', @"b"}, 345 | {'Б', @"B"}, 346 | {'в', @"v"}, 347 | {'В', @"V"}, 348 | {'г', @"g"}, 349 | {'Г', @"G"}, 350 | {'д', @"d"}, 351 | {'Д', @"D"}, 352 | {'е', @"e"}, 353 | {'Е', @"E"}, 354 | {'ж', @"zh"}, 355 | {'Ж', @"Zh"}, 356 | {'з', @"z"}, 357 | {'З', @"Z"}, 358 | {'и', @"i"}, 359 | {'И', @"I"}, 360 | {'й', @"j"}, 361 | {'Й', @"J"}, 362 | {'i', @"i"}, 363 | {'I', @"I"}, 364 | {'к', @"k"}, 365 | {'К', @"K"}, 366 | {'л', @"l"}, 367 | {'Л', @"L"}, 368 | {'м', @"m"}, 369 | {'М', @"M"}, 370 | {'н', @"n"}, 371 | {'Н', @"N"}, 372 | {'о', @"o"}, 373 | {'О', @"O"}, 374 | {'п', @"p"}, 375 | {'П', @"P"}, 376 | {'р', @"r"}, 377 | {'Р', @"R"}, 378 | {'с', @"s"}, 379 | {'С', @"S"}, 380 | {'т', @"t"}, 381 | {'Т', @"T"}, 382 | {'у', @"u"}, 383 | {'У', @"U"}, 384 | {'ф', @"f"}, 385 | {'Ф', @"F"}, 386 | {'х', @"x"}, 387 | {'Х', @"X"}, 388 | {'ц', @"cz"}, 389 | {'Ц', @"Cz"}, 390 | {'ч', @"ch"}, 391 | {'Ч', @"Ch"}, 392 | {'ш', @"sh"}, 393 | {'Ш', @"Sh"}, 394 | {'щ', @"sht"}, 395 | {'Щ', @"Sht"}, 396 | {'ъ', @"a`"}, 397 | {'Ъ', @"A`"}, 398 | {'ь', @"`"}, 399 | {'Ь', @"`"}, 400 | {'ю', @"yu"}, 401 | {'Ю', @"Yu"}, 402 | {'я', @"ya"}, 403 | {'Я', @"Ya"}, 404 | {'’', @"'"}, 405 | {'ѣ', @"ye"}, 406 | {'Ѣ', @"Ye"}, 407 | {'ѳ', @"fh"}, 408 | {'Ѳ', @"Fh"}, 409 | {'ѵ', @"yh"}, 410 | {'Ѵ', @"Yh"}, 411 | {'ѫ', @"о`"}, 412 | {'Ѫ', @"О`"}, 413 | {'№', @"#"}, 414 | }, 415 | new Dictionary // mk-MK 416 | { 417 | {'а', @"a"}, 418 | {'А', @"A"}, 419 | {'б', @"b"}, 420 | {'Б', @"B"}, 421 | {'в', @"v"}, 422 | {'В', @"V"}, 423 | {'г', @"g"}, 424 | {'Г', @"G"}, 425 | {'ѓ', @"g`"}, 426 | {'Ѓ', @"G`"}, 427 | {'д', @"d"}, 428 | {'Д', @"D"}, 429 | {'е', @"e"}, 430 | {'Е', @"E"}, 431 | {'ж', @"zh"}, 432 | {'Ж', @"Zh"}, 433 | {'з', @"z"}, 434 | {'З', @"Z"}, 435 | {'s', @"z`"}, 436 | {'S', @"Z`"}, 437 | {'и', @"i"}, 438 | {'И', @"I"}, 439 | {'j', @"j"}, 440 | {'J', @"J"}, 441 | {'к', @"k"}, 442 | {'К', @"K"}, 443 | {'ќ', @"k`"}, 444 | {'Ќ', @"K`"}, 445 | {'л', @"l"}, 446 | {'Л', @"L"}, 447 | {'љ', @"l`"}, 448 | {'Љ', @"L`"}, 449 | {'м', @"m"}, 450 | {'М', @"M"}, 451 | {'н', @"п"}, 452 | {'Н', @"П"}, 453 | {'њ', @"n`"}, 454 | {'Њ', @"N`"}, 455 | {'о', @"o"}, 456 | {'О', @"O"}, 457 | {'п', @"p"}, 458 | {'П', @"P"}, 459 | {'р', @"r"}, 460 | {'Р', @"R"}, 461 | {'с', @"s"}, 462 | {'С', @"S"}, 463 | {'т', @"t"}, 464 | {'Т', @"T"}, 465 | {'у', @"u"}, 466 | {'У', @"U"}, 467 | {'ф', @"f"}, 468 | {'Ф', @"F"}, 469 | {'х', @"x"}, 470 | {'Х', @"X"}, 471 | {'ц', @"cz"}, 472 | {'Ц', @"Cz"}, 473 | {'ч', @"ch"}, 474 | {'Ч', @"Ch"}, 475 | {'џ', @"dh"}, 476 | {'Џ', @"Dh"}, 477 | {'ш', @"sh"}, 478 | {'Ш', @"Sh"}, 479 | {'’', @"'"}, 480 | {'№', @"#"}, 481 | }, 482 | }; 483 | } 484 | } 485 | -------------------------------------------------------------------------------- /NickBuhro.Translit/TransliterationT4.tt: -------------------------------------------------------------------------------- 1 | <#@ template language="C#" #> 2 | <#@ assembly name="System.Core" #> 3 | <#@ import namespace="System.Linq" #> 4 | <#@ import namespace="System" #> 5 | <#@ import namespace="System.CodeDom" #> 6 | <#@ import namespace="System.CodeDom.Compiler" #> 7 | <#@ import namespace="System.IO" #> 8 | <#@ import namespace="System.Text" #> 9 | <#@ import namespace="System.Globalization" #> 10 | <#@ import namespace="System.Collections.Generic" #> 11 | <#@ output extension=".generated.cs" #> 12 | <# 13 | var langCollection = new string[] 14 | { 15 | "Russian", 16 | "Belorussian", 17 | "Ukrainian", 18 | "Bulgarian", 19 | "Macedonian" 20 | }; 21 | 22 | var definitions = new List<(string name, Dictionary replacements)>(); 23 | 24 | var rules = new Rules(); 25 | for (var i = 0; i < langCollection.Length; i++) 26 | { 27 | definitions.Add((name: "CyrillicToLatin" + langCollection[i], replacements: rules.CreateCyrillicToLatinDictionary(i))); 28 | definitions.Add((name: "LatinToCyrillic" + langCollection[i], replacements: rules.CreateLatinToCyrillicDictionary(i))); 29 | } 30 | #> 31 | // ------------------------------------------------------------------------------ 32 | // 33 | // This code was generated by a tool. 34 | // Generated at <#=DateTime.Now.ToString("u")#> 35 | // 36 | // Changes to this file may cause incorrect behavior and will be lost if 37 | // the code is regenerated. 38 | // 39 | // ------------------------------------------------------------------------------ 40 | namespace NickBuhro.Translit 41 | { 42 | partial class Transliteration 43 | { 44 | <# foreach (var md in definitions) { var fsm = BuildFSM(md.replacements); #> 45 | internal static string <#=md.name#>(string text) 46 | { 47 | using (var sb = new CustomStringBuilder(text.Length<#= md.name[0] == 'C' ? " * 3" : "" #>)) 48 | { 49 | 50 | var state = 0; 51 | for (var i = 0; i < text.Length; i++) 52 | { 53 | var c = text[i]; 54 | switch (state) 55 | { 56 | <# for (var s = 0; s < fsm.Length; s++) { #> 57 | case <#=s#>: // <#=ToLiteral(fsm[s].stateName)#> 58 | switch (c) 59 | { 60 | <# foreach (var t in fsm[s].transitions.OrderBy(tr => tr.Key)) { #> 61 | case <#=ToLiteral(t.Key)#>: 62 | <# foreach (var o in t.Value.text) { #> 63 | sb.Append(<#=ToLiteral(o)#>); 64 | <# } #> 65 | <# if (s != t.Value.state) { #> 66 | state = <#=t.Value.state#>; // <#=ToLiteral(fsm[t.Value.state].stateName)#> 67 | <# } #> 68 | break; 69 | <# } #> 70 | default: 71 | <# foreach (var o in fsm[s].fallbackText) { #> 72 | sb.Append(<#=ToLiteral(o)#>); 73 | <# } #> 74 | sb.Append(c); 75 | <# if (s != 0) { #> 76 | state = 0; // "" 77 | <# } #> 78 | break; 79 | } 80 | break; 81 | <# } #> 82 | } 83 | } 84 | <# if (fsm.Length > 1) { #> 85 | 86 | switch (state) 87 | { 88 | <# for (var s = 1; s < fsm.Length; s++) { #> 89 | case <#=s#>: // <#=ToLiteral(fsm[s].stateName)#> 90 | <# foreach (var o in fsm[s].fallbackText) { #> 91 | sb.Append(<#=ToLiteral(o)#>); 92 | <# } #> 93 | break; 94 | <# } #> 95 | } 96 | <# } #> 97 | return sb.ToString(); 98 | } 99 | } 100 | 101 | <# } #> 102 | } 103 | } 104 | <#+ 105 | //---------------------------------------------------------------- 106 | // Helpers 107 | //---------------------------------------------------------------- 108 | 109 | private static string ToLiteral(object input) 110 | { 111 | using (var writer = new StringWriter()) 112 | using (var provider = CodeDomProvider.CreateProvider("CSharp")) 113 | { 114 | provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null); 115 | return writer.ToString(); 116 | } 117 | } 118 | 119 | //---------------------------------------------------------------- 120 | // Transliteration rules 121 | //---------------------------------------------------------------- 122 | 123 | internal sealed class Rules 124 | { 125 | private const string _crules = "IEYJ"; 126 | 127 | private readonly Dictionary _data = new Dictionary() 128 | { 129 | { "а", new [] { "a", "a", "a", "a", "a" } }, 130 | { "б", new [] { "b", "b", "b", "b", "b" } }, 131 | { "в", new [] { "v", "v", "v", "v", "v" } }, 132 | { "г", new [] { "g", "h", "h", "g", "g" } }, 133 | { "ѓ", new [] { null, null, null, null, "g`" } }, 134 | { "ґ", new [] { null, null, "g`", null, null } }, 135 | { "д", new [] { "d", "d", "d", "d", "d" } }, 136 | { "е", new [] { "e", "e", "e", "e", "e" } }, 137 | { "ё", new [] { "yo", "yo", null, null, null } }, 138 | { "є", new [] { null, null, "ye", null, null } }, 139 | { "ж", new [] { "zh", "zh", "zh", "zh", "zh" } }, 140 | { "з", new [] { "z", "z", "z", "z", "z" } }, 141 | { "s", new [] { null, null, null, null, "z`" } }, 142 | { "и", new [] { "i", null, "y`", "i", "i" } }, 143 | { "й", new [] { "j", "j", "j", "j", null } }, 144 | { "j", new [] { null, null, null, null, "j" } }, 145 | { "і", new [] { null, null, "i", null, null } }, // 0x456 146 | { "i", new [] { "i", "i", null, "i", null } }, // 0x69 147 | { "ї", new [] { null, null, "yi", null, null } }, 148 | { "к", new [] { "k", "k", "k", "k", "k" } }, 149 | { "ќ", new [] { null, null, null, null, "k`" } }, 150 | { "л", new [] { "l", "l", "l", "l", "l" } }, 151 | { "љ", new [] { null, null, null, null, "l`" } }, 152 | { "м", new [] { "m", "m", "m", "m", "m" } }, 153 | { "н", new [] { "n", "n", "n", "n", "п" } }, 154 | { "њ", new [] { null, null, null, null, "n`" } }, 155 | { "о", new [] { "o", "o", "o", "o", "o" } }, 156 | { "п", new [] { "p", "p", "p", "p", "p" } }, 157 | { "р", new [] { "r", "r", "r", "r", "r" } }, 158 | { "с", new [] { "s", "s", "s", "s", "s" } }, 159 | { "т", new [] { "t", "t", "t", "t", "t" } }, 160 | { "у", new [] { "u", "u", "u", "u", "u" } }, 161 | { "ў", new [] { null, "u`", null, null, null } }, 162 | { "ф", new [] { "f", "f", "f", "f", "f" } }, 163 | { "х", new [] { "x", "x", "x", "x", "x" } }, 164 | { "ц", new [] { "cz", "cz", "cz", "cz", "cz" } }, 165 | { "ч", new [] { "ch", "ch", "ch", "ch", "ch" } }, 166 | { "џ", new [] { null, null, null, null, "dh" } }, 167 | { "ш", new [] { "sh", "sh", "sh", "sh", "sh" } }, 168 | { "щ", new [] { "shh", null, "shh", "sht", null } }, 169 | { "ъ", new [] { "``", null, null, "a`", null } }, 170 | { "ы", new [] { "y`", "y`", null, null, null } }, 171 | { "ь", new [] { "`", "`", "`", "`", null } }, 172 | { "э", new [] { "e`", "e`", null, null, null } }, 173 | { "ю", new [] { "yu", "yu", "yu", "yu", null } }, 174 | { "я", new [] { "ya", "ya", "ya", "ya", null } }, 175 | { "’", new [] { "'", "'", "'", "'", "'" } }, 176 | { "ѣ", new [] { "ye", null, null, "ye", null } }, 177 | { "ѳ", new [] { "fh", null, null, "fh", null } }, 178 | { "ѵ", new [] { "yh", null, null, "yh", null } }, 179 | { "ѫ", new [] { null, null, null, "о`", null } }, 180 | { "№", new [] { "#", "#", "#", "#", "#" } } 181 | }; 182 | 183 | 184 | public Dictionary CreateCyrillicToLatinDictionary(int lang) 185 | { 186 | var result = new Dictionary(); 187 | 188 | foreach (var p in _data) 189 | { 190 | var loCyrillic = p.Key; 191 | var loLatin = p.Value[lang]; 192 | 193 | if (loLatin == null) continue; 194 | if ((loCyrillic == loLatin) && (loCyrillic.Length == 1)) 195 | continue; 196 | 197 | var upCyrillic = loCyrillic.ToUpper(); 198 | var upLatin = char.ToUpper(loLatin[0]) + loLatin.Substring(1); 199 | 200 | result.Add(loCyrillic, loLatin); 201 | if (loCyrillic != upCyrillic) 202 | { 203 | result.Add(upCyrillic, upLatin); 204 | if (_crules.IndexOf(upLatin[0]) >= 0) 205 | { 206 | result["ц" + loCyrillic] = "c" + loLatin; 207 | result["Ц" + loCyrillic] = "C" + loLatin; 208 | result["ц" + upCyrillic] = "c" + upLatin; 209 | result["Ц" + upCyrillic] = "C" + upLatin; 210 | } 211 | } 212 | } 213 | 214 | return result; 215 | } 216 | 217 | public Dictionary CreateLatinToCyrillicDictionary(int lang) 218 | { 219 | var result = new Dictionary(); 220 | 221 | foreach (var p in _data) 222 | { 223 | var loCyrillic = p.Key; 224 | var loLatin = p.Value[lang]; 225 | 226 | if (loLatin == null) continue; 227 | 228 | var upCyrillic = loCyrillic.ToUpper(); 229 | var upLatin = char.ToUpper(loLatin[0]) + loLatin.Substring(1); 230 | 231 | if (!result.ContainsKey(loLatin)) 232 | { 233 | result.Add(loLatin, loCyrillic); 234 | if (!result.ContainsKey(upLatin)) 235 | { 236 | result.Add(upLatin, upCyrillic); 237 | } 238 | } 239 | } 240 | 241 | result.Add("c", "ц"); 242 | result.Add("C", "Ц"); 243 | 244 | return result; 245 | } 246 | } 247 | 248 | //---------------------------------------------------------------- 249 | // FSM Parser 250 | //---------------------------------------------------------------- 251 | 252 | internal static (string stateName, string fallbackText, Dictionary transitions)[] BuildFSM(Dictionary replacements) 253 | { 254 | // Find all states 255 | 256 | var result = GetStates(replacements) 257 | .OrderBy(s => s.Length) 258 | .ThenBy(s => s) 259 | .Select(s => (stateName: s, fallbackText: "", transitions: new Dictionary())) 260 | .ToArray(); 261 | 262 | var stateLookup = new Dictionary(result.Length); 263 | for (var i = 0; i < result.Length; i++) 264 | { 265 | stateLookup.Add(result[i].stateName, i); 266 | } 267 | 268 | // Generate simple transitions without output 269 | 270 | foreach (var k in replacements.Keys) 271 | { 272 | for (var i = 1; i < k.Length; i++) 273 | { 274 | var state = stateLookup[k.Substring(0, i - 1)]; 275 | var input = k[i - 1]; 276 | var output = (stateLookup[k.Substring(0, i)], ""); 277 | if (!result[state].transitions.ContainsKey(input)) 278 | { 279 | result[state].transitions.Add(input, output); 280 | } 281 | } 282 | } 283 | 284 | // Generate transitions for completed replacements 285 | 286 | foreach (var p in replacements) 287 | { 288 | var state = stateLookup[p.Key.Substring(0, p.Key.Length - 1)]; 289 | var input = p.Key[p.Key.Length - 1]; 290 | var output = (0, p.Value); 291 | if (!result[state].transitions.ContainsKey(input)) 292 | { 293 | result[state].transitions.Add(input, output); 294 | } 295 | } 296 | 297 | // Generate transitions to go back 298 | 299 | for (var state = 1; state < result.Length; state++) 300 | { 301 | var tail = result[state].stateName; 302 | var outputText = ""; 303 | for (; ; ) 304 | { 305 | string replSource = tail[0].ToString(); 306 | string replTarget = replSource; 307 | { 308 | var s = tail; 309 | while (s.Length > 0) 310 | { 311 | if (replacements.TryGetValue(s, out string value)) 312 | { 313 | replSource = s; 314 | replTarget = value; 315 | break; 316 | } 317 | s = s.Substring(0, s.Length - 1); 318 | } 319 | } 320 | 321 | outputText += replTarget; 322 | tail = tail.Substring(replSource.Length); 323 | 324 | if (stateLookup.TryGetValue(tail, out int outputState)) 325 | { 326 | foreach (var t in result[outputState].transitions) 327 | { 328 | if (!result[state].transitions.ContainsKey(t.Key)) 329 | { 330 | var output = (t.Value.state, outputText + t.Value.text); 331 | result[state].transitions.Add(t.Key, output); 332 | } 333 | } 334 | result[state].fallbackText = outputText; 335 | break; 336 | } 337 | } 338 | } 339 | 340 | // 341 | 342 | return result; 343 | } 344 | 345 | private static IEnumerable GetStates(Dictionary replacements) 346 | { 347 | var result = new HashSet { "" }; 348 | foreach (var k in replacements.Keys) 349 | { 350 | for (var i = 1; i < k.Length; i++) 351 | { 352 | result.Add(k.Substring(0, i)); 353 | } 354 | } 355 | return result; 356 | } 357 | #> -------------------------------------------------------------------------------- /NickBuhro.Translit.Benchmark/v12/LatinToCyrillicConverter.generated.cs: -------------------------------------------------------------------------------- 1 | 2 | // ------------------------------------------------------------------------------ 3 | // 4 | // This code was generated by a tool. 5 | // Generated at 2018-09-20 21:34:57Z 6 | // 7 | // Changes to this file may cause incorrect behavior and will be lost if 8 | // the code is regenerated. 9 | // 10 | // ------------------------------------------------------------------------------ 11 | namespace NickBuhro.Translit.Benchmark.v12 12 | { 13 | partial struct LatinToCyrillicConverter 14 | { 15 | private struct ConvertRule 16 | { 17 | public readonly string Latin; 18 | public readonly string Cyrillic; 19 | 20 | public ConvertRule(string cyrillic, string latin) 21 | { 22 | Cyrillic = cyrillic; 23 | Latin = latin; 24 | } 25 | } 26 | 27 | private static readonly ConvertRule[][] Rules = new [] 28 | { 29 | new [] // 30 | { 31 | new ConvertRule("щ", @"shh"), 32 | new ConvertRule("Щ", @"Shh"), 33 | new ConvertRule("ѓ", @"g`"), 34 | new ConvertRule("Ѓ", @"G`"), 35 | new ConvertRule("ґ", @"g`"), 36 | new ConvertRule("Ґ", @"G`"), 37 | new ConvertRule("ё", @"yo"), 38 | new ConvertRule("Ё", @"Yo"), 39 | new ConvertRule("є", @"ye"), 40 | new ConvertRule("Є", @"Ye"), 41 | new ConvertRule("ж", @"zh"), 42 | new ConvertRule("Ж", @"Zh"), 43 | new ConvertRule("s", @"z`"), 44 | new ConvertRule("S", @"Z`"), 45 | new ConvertRule("ї", @"yi"), 46 | new ConvertRule("Ї", @"Yi"), 47 | new ConvertRule("ќ", @"k`"), 48 | new ConvertRule("Ќ", @"K`"), 49 | new ConvertRule("љ", @"l`"), 50 | new ConvertRule("Љ", @"L`"), 51 | new ConvertRule("њ", @"n`"), 52 | new ConvertRule("Њ", @"N`"), 53 | new ConvertRule("ў", @"u`"), 54 | new ConvertRule("Ў", @"U`"), 55 | new ConvertRule("ц", @"cz"), 56 | new ConvertRule("Ц", @"Cz"), 57 | new ConvertRule("ч", @"ch"), 58 | new ConvertRule("Ч", @"Ch"), 59 | new ConvertRule("џ", @"dh"), 60 | new ConvertRule("Џ", @"Dh"), 61 | new ConvertRule("ш", @"sh"), 62 | new ConvertRule("Ш", @"Sh"), 63 | new ConvertRule("ъ", @"``"), 64 | new ConvertRule("Ъ", @"``"), 65 | new ConvertRule("ы", @"y`"), 66 | new ConvertRule("Ы", @"Y`"), 67 | new ConvertRule("э", @"e`"), 68 | new ConvertRule("Э", @"E`"), 69 | new ConvertRule("ю", @"yu"), 70 | new ConvertRule("Ю", @"Yu"), 71 | new ConvertRule("я", @"ya"), 72 | new ConvertRule("Я", @"Ya"), 73 | new ConvertRule("ѣ", @"ye"), 74 | new ConvertRule("Ѣ", @"Ye"), 75 | new ConvertRule("ѳ", @"fh"), 76 | new ConvertRule("Ѳ", @"Fh"), 77 | new ConvertRule("ѵ", @"yh"), 78 | new ConvertRule("Ѵ", @"Yh"), 79 | new ConvertRule("ѫ", @"о`"), 80 | new ConvertRule("Ѫ", @"О`"), 81 | new ConvertRule("а", @"a"), 82 | new ConvertRule("А", @"A"), 83 | new ConvertRule("б", @"b"), 84 | new ConvertRule("Б", @"B"), 85 | new ConvertRule("в", @"v"), 86 | new ConvertRule("В", @"V"), 87 | new ConvertRule("г", @"g"), 88 | new ConvertRule("Г", @"G"), 89 | new ConvertRule("д", @"d"), 90 | new ConvertRule("Д", @"D"), 91 | new ConvertRule("е", @"e"), 92 | new ConvertRule("Е", @"E"), 93 | new ConvertRule("з", @"z"), 94 | new ConvertRule("З", @"Z"), 95 | new ConvertRule("и", @"i"), 96 | new ConvertRule("И", @"I"), 97 | new ConvertRule("й", @"j"), 98 | new ConvertRule("Й", @"J"), 99 | new ConvertRule("j", @"j"), 100 | new ConvertRule("J", @"J"), 101 | new ConvertRule("i", @"i"), 102 | new ConvertRule("I", @"I"), 103 | new ConvertRule("к", @"k"), 104 | new ConvertRule("К", @"K"), 105 | new ConvertRule("л", @"l"), 106 | new ConvertRule("Л", @"L"), 107 | new ConvertRule("м", @"m"), 108 | new ConvertRule("М", @"M"), 109 | new ConvertRule("н", @"n"), 110 | new ConvertRule("Н", @"N"), 111 | new ConvertRule("о", @"o"), 112 | new ConvertRule("О", @"O"), 113 | new ConvertRule("п", @"p"), 114 | new ConvertRule("П", @"P"), 115 | new ConvertRule("р", @"r"), 116 | new ConvertRule("Р", @"R"), 117 | new ConvertRule("с", @"s"), 118 | new ConvertRule("С", @"S"), 119 | new ConvertRule("т", @"t"), 120 | new ConvertRule("Т", @"T"), 121 | new ConvertRule("у", @"u"), 122 | new ConvertRule("У", @"U"), 123 | new ConvertRule("ф", @"f"), 124 | new ConvertRule("Ф", @"F"), 125 | new ConvertRule("х", @"x"), 126 | new ConvertRule("Х", @"X"), 127 | new ConvertRule("ц", @"c"), 128 | new ConvertRule("Ц", @"C"), 129 | new ConvertRule("ь", @"`"), 130 | new ConvertRule("Ь", @"`"), 131 | new ConvertRule("’", @"'"), 132 | new ConvertRule("№", @"#"), 133 | }, 134 | new [] // ru-RU 135 | { 136 | new ConvertRule("щ", @"shh"), 137 | new ConvertRule("Щ", @"Shh"), 138 | new ConvertRule("ё", @"yo"), 139 | new ConvertRule("Ё", @"Yo"), 140 | new ConvertRule("ж", @"zh"), 141 | new ConvertRule("Ж", @"Zh"), 142 | new ConvertRule("ц", @"cz"), 143 | new ConvertRule("Ц", @"Cz"), 144 | new ConvertRule("ч", @"ch"), 145 | new ConvertRule("Ч", @"Ch"), 146 | new ConvertRule("ш", @"sh"), 147 | new ConvertRule("Ш", @"Sh"), 148 | new ConvertRule("ъ", @"``"), 149 | new ConvertRule("Ъ", @"``"), 150 | new ConvertRule("ы", @"y`"), 151 | new ConvertRule("Ы", @"Y`"), 152 | new ConvertRule("э", @"e`"), 153 | new ConvertRule("Э", @"E`"), 154 | new ConvertRule("ю", @"yu"), 155 | new ConvertRule("Ю", @"Yu"), 156 | new ConvertRule("я", @"ya"), 157 | new ConvertRule("Я", @"Ya"), 158 | new ConvertRule("ѣ", @"ye"), 159 | new ConvertRule("Ѣ", @"Ye"), 160 | new ConvertRule("ѳ", @"fh"), 161 | new ConvertRule("Ѳ", @"Fh"), 162 | new ConvertRule("ѵ", @"yh"), 163 | new ConvertRule("Ѵ", @"Yh"), 164 | new ConvertRule("а", @"a"), 165 | new ConvertRule("А", @"A"), 166 | new ConvertRule("б", @"b"), 167 | new ConvertRule("Б", @"B"), 168 | new ConvertRule("в", @"v"), 169 | new ConvertRule("В", @"V"), 170 | new ConvertRule("г", @"g"), 171 | new ConvertRule("Г", @"G"), 172 | new ConvertRule("д", @"d"), 173 | new ConvertRule("Д", @"D"), 174 | new ConvertRule("е", @"e"), 175 | new ConvertRule("Е", @"E"), 176 | new ConvertRule("з", @"z"), 177 | new ConvertRule("З", @"Z"), 178 | new ConvertRule("и", @"i"), 179 | new ConvertRule("И", @"I"), 180 | new ConvertRule("й", @"j"), 181 | new ConvertRule("Й", @"J"), 182 | new ConvertRule("i", @"i"), 183 | new ConvertRule("I", @"I"), 184 | new ConvertRule("к", @"k"), 185 | new ConvertRule("К", @"K"), 186 | new ConvertRule("л", @"l"), 187 | new ConvertRule("Л", @"L"), 188 | new ConvertRule("м", @"m"), 189 | new ConvertRule("М", @"M"), 190 | new ConvertRule("н", @"n"), 191 | new ConvertRule("Н", @"N"), 192 | new ConvertRule("о", @"o"), 193 | new ConvertRule("О", @"O"), 194 | new ConvertRule("п", @"p"), 195 | new ConvertRule("П", @"P"), 196 | new ConvertRule("р", @"r"), 197 | new ConvertRule("Р", @"R"), 198 | new ConvertRule("с", @"s"), 199 | new ConvertRule("С", @"S"), 200 | new ConvertRule("т", @"t"), 201 | new ConvertRule("Т", @"T"), 202 | new ConvertRule("у", @"u"), 203 | new ConvertRule("У", @"U"), 204 | new ConvertRule("ф", @"f"), 205 | new ConvertRule("Ф", @"F"), 206 | new ConvertRule("х", @"x"), 207 | new ConvertRule("Х", @"X"), 208 | new ConvertRule("ц", @"c"), 209 | new ConvertRule("Ц", @"C"), 210 | new ConvertRule("ь", @"`"), 211 | new ConvertRule("Ь", @"`"), 212 | new ConvertRule("’", @"'"), 213 | new ConvertRule("№", @"#"), 214 | }, 215 | new [] // be-BY 216 | { 217 | new ConvertRule("ё", @"yo"), 218 | new ConvertRule("Ё", @"Yo"), 219 | new ConvertRule("ж", @"zh"), 220 | new ConvertRule("Ж", @"Zh"), 221 | new ConvertRule("ў", @"u`"), 222 | new ConvertRule("Ў", @"U`"), 223 | new ConvertRule("ц", @"cz"), 224 | new ConvertRule("Ц", @"Cz"), 225 | new ConvertRule("ч", @"ch"), 226 | new ConvertRule("Ч", @"Ch"), 227 | new ConvertRule("ш", @"sh"), 228 | new ConvertRule("Ш", @"Sh"), 229 | new ConvertRule("ы", @"y`"), 230 | new ConvertRule("Ы", @"Y`"), 231 | new ConvertRule("э", @"e`"), 232 | new ConvertRule("Э", @"E`"), 233 | new ConvertRule("ю", @"yu"), 234 | new ConvertRule("Ю", @"Yu"), 235 | new ConvertRule("я", @"ya"), 236 | new ConvertRule("Я", @"Ya"), 237 | new ConvertRule("а", @"a"), 238 | new ConvertRule("А", @"A"), 239 | new ConvertRule("б", @"b"), 240 | new ConvertRule("Б", @"B"), 241 | new ConvertRule("в", @"v"), 242 | new ConvertRule("В", @"V"), 243 | new ConvertRule("г", @"h"), 244 | new ConvertRule("Г", @"H"), 245 | new ConvertRule("д", @"d"), 246 | new ConvertRule("Д", @"D"), 247 | new ConvertRule("е", @"e"), 248 | new ConvertRule("Е", @"E"), 249 | new ConvertRule("з", @"z"), 250 | new ConvertRule("З", @"Z"), 251 | new ConvertRule("й", @"j"), 252 | new ConvertRule("Й", @"J"), 253 | new ConvertRule("i", @"i"), 254 | new ConvertRule("I", @"I"), 255 | new ConvertRule("к", @"k"), 256 | new ConvertRule("К", @"K"), 257 | new ConvertRule("л", @"l"), 258 | new ConvertRule("Л", @"L"), 259 | new ConvertRule("м", @"m"), 260 | new ConvertRule("М", @"M"), 261 | new ConvertRule("н", @"n"), 262 | new ConvertRule("Н", @"N"), 263 | new ConvertRule("о", @"o"), 264 | new ConvertRule("О", @"O"), 265 | new ConvertRule("п", @"p"), 266 | new ConvertRule("П", @"P"), 267 | new ConvertRule("р", @"r"), 268 | new ConvertRule("Р", @"R"), 269 | new ConvertRule("с", @"s"), 270 | new ConvertRule("С", @"S"), 271 | new ConvertRule("т", @"t"), 272 | new ConvertRule("Т", @"T"), 273 | new ConvertRule("у", @"u"), 274 | new ConvertRule("У", @"U"), 275 | new ConvertRule("ф", @"f"), 276 | new ConvertRule("Ф", @"F"), 277 | new ConvertRule("х", @"x"), 278 | new ConvertRule("Х", @"X"), 279 | new ConvertRule("ц", @"c"), 280 | new ConvertRule("Ц", @"C"), 281 | new ConvertRule("ь", @"`"), 282 | new ConvertRule("Ь", @"`"), 283 | new ConvertRule("’", @"'"), 284 | new ConvertRule("№", @"#"), 285 | }, 286 | new [] // uk-UA 287 | { 288 | new ConvertRule("щ", @"shh"), 289 | new ConvertRule("Щ", @"Shh"), 290 | new ConvertRule("ґ", @"g`"), 291 | new ConvertRule("Ґ", @"G`"), 292 | new ConvertRule("є", @"ye"), 293 | new ConvertRule("Є", @"Ye"), 294 | new ConvertRule("ж", @"zh"), 295 | new ConvertRule("Ж", @"Zh"), 296 | new ConvertRule("и", @"y`"), 297 | new ConvertRule("И", @"Y`"), 298 | new ConvertRule("ї", @"yi"), 299 | new ConvertRule("Ї", @"Yi"), 300 | new ConvertRule("ц", @"cz"), 301 | new ConvertRule("Ц", @"Cz"), 302 | new ConvertRule("ч", @"ch"), 303 | new ConvertRule("Ч", @"Ch"), 304 | new ConvertRule("ш", @"sh"), 305 | new ConvertRule("Ш", @"Sh"), 306 | new ConvertRule("ю", @"yu"), 307 | new ConvertRule("Ю", @"Yu"), 308 | new ConvertRule("я", @"ya"), 309 | new ConvertRule("Я", @"Ya"), 310 | new ConvertRule("а", @"a"), 311 | new ConvertRule("А", @"A"), 312 | new ConvertRule("б", @"b"), 313 | new ConvertRule("Б", @"B"), 314 | new ConvertRule("в", @"v"), 315 | new ConvertRule("В", @"V"), 316 | new ConvertRule("г", @"h"), 317 | new ConvertRule("Г", @"H"), 318 | new ConvertRule("д", @"d"), 319 | new ConvertRule("Д", @"D"), 320 | new ConvertRule("е", @"e"), 321 | new ConvertRule("Е", @"E"), 322 | new ConvertRule("з", @"z"), 323 | new ConvertRule("З", @"Z"), 324 | new ConvertRule("й", @"j"), 325 | new ConvertRule("Й", @"J"), 326 | new ConvertRule("i", @"i"), 327 | new ConvertRule("I", @"I"), 328 | new ConvertRule("к", @"k"), 329 | new ConvertRule("К", @"K"), 330 | new ConvertRule("л", @"l"), 331 | new ConvertRule("Л", @"L"), 332 | new ConvertRule("м", @"m"), 333 | new ConvertRule("М", @"M"), 334 | new ConvertRule("н", @"n"), 335 | new ConvertRule("Н", @"N"), 336 | new ConvertRule("о", @"o"), 337 | new ConvertRule("О", @"O"), 338 | new ConvertRule("п", @"p"), 339 | new ConvertRule("П", @"P"), 340 | new ConvertRule("р", @"r"), 341 | new ConvertRule("Р", @"R"), 342 | new ConvertRule("с", @"s"), 343 | new ConvertRule("С", @"S"), 344 | new ConvertRule("т", @"t"), 345 | new ConvertRule("Т", @"T"), 346 | new ConvertRule("у", @"u"), 347 | new ConvertRule("У", @"U"), 348 | new ConvertRule("ф", @"f"), 349 | new ConvertRule("Ф", @"F"), 350 | new ConvertRule("х", @"x"), 351 | new ConvertRule("Х", @"X"), 352 | new ConvertRule("ц", @"c"), 353 | new ConvertRule("Ц", @"C"), 354 | new ConvertRule("ь", @"`"), 355 | new ConvertRule("Ь", @"`"), 356 | new ConvertRule("’", @"'"), 357 | new ConvertRule("№", @"#"), 358 | }, 359 | new [] // bg-BG 360 | { 361 | new ConvertRule("щ", @"sht"), 362 | new ConvertRule("Щ", @"Sht"), 363 | new ConvertRule("ж", @"zh"), 364 | new ConvertRule("Ж", @"Zh"), 365 | new ConvertRule("ц", @"cz"), 366 | new ConvertRule("Ц", @"Cz"), 367 | new ConvertRule("ч", @"ch"), 368 | new ConvertRule("Ч", @"Ch"), 369 | new ConvertRule("ш", @"sh"), 370 | new ConvertRule("Ш", @"Sh"), 371 | new ConvertRule("ъ", @"a`"), 372 | new ConvertRule("Ъ", @"A`"), 373 | new ConvertRule("ю", @"yu"), 374 | new ConvertRule("Ю", @"Yu"), 375 | new ConvertRule("я", @"ya"), 376 | new ConvertRule("Я", @"Ya"), 377 | new ConvertRule("ѣ", @"ye"), 378 | new ConvertRule("Ѣ", @"Ye"), 379 | new ConvertRule("ѳ", @"fh"), 380 | new ConvertRule("Ѳ", @"Fh"), 381 | new ConvertRule("ѵ", @"yh"), 382 | new ConvertRule("Ѵ", @"Yh"), 383 | new ConvertRule("ѫ", @"о`"), 384 | new ConvertRule("Ѫ", @"О`"), 385 | new ConvertRule("а", @"a"), 386 | new ConvertRule("А", @"A"), 387 | new ConvertRule("б", @"b"), 388 | new ConvertRule("Б", @"B"), 389 | new ConvertRule("в", @"v"), 390 | new ConvertRule("В", @"V"), 391 | new ConvertRule("г", @"g"), 392 | new ConvertRule("Г", @"G"), 393 | new ConvertRule("д", @"d"), 394 | new ConvertRule("Д", @"D"), 395 | new ConvertRule("е", @"e"), 396 | new ConvertRule("Е", @"E"), 397 | new ConvertRule("з", @"z"), 398 | new ConvertRule("З", @"Z"), 399 | new ConvertRule("и", @"i"), 400 | new ConvertRule("И", @"I"), 401 | new ConvertRule("й", @"j"), 402 | new ConvertRule("Й", @"J"), 403 | new ConvertRule("i", @"i"), 404 | new ConvertRule("I", @"I"), 405 | new ConvertRule("к", @"k"), 406 | new ConvertRule("К", @"K"), 407 | new ConvertRule("л", @"l"), 408 | new ConvertRule("Л", @"L"), 409 | new ConvertRule("м", @"m"), 410 | new ConvertRule("М", @"M"), 411 | new ConvertRule("н", @"n"), 412 | new ConvertRule("Н", @"N"), 413 | new ConvertRule("о", @"o"), 414 | new ConvertRule("О", @"O"), 415 | new ConvertRule("п", @"p"), 416 | new ConvertRule("П", @"P"), 417 | new ConvertRule("р", @"r"), 418 | new ConvertRule("Р", @"R"), 419 | new ConvertRule("с", @"s"), 420 | new ConvertRule("С", @"S"), 421 | new ConvertRule("т", @"t"), 422 | new ConvertRule("Т", @"T"), 423 | new ConvertRule("у", @"u"), 424 | new ConvertRule("У", @"U"), 425 | new ConvertRule("ф", @"f"), 426 | new ConvertRule("Ф", @"F"), 427 | new ConvertRule("х", @"x"), 428 | new ConvertRule("Х", @"X"), 429 | new ConvertRule("ц", @"c"), 430 | new ConvertRule("Ц", @"C"), 431 | new ConvertRule("ь", @"`"), 432 | new ConvertRule("Ь", @"`"), 433 | new ConvertRule("’", @"'"), 434 | new ConvertRule("№", @"#"), 435 | }, 436 | new [] // mk-MK 437 | { 438 | new ConvertRule("ѓ", @"g`"), 439 | new ConvertRule("Ѓ", @"G`"), 440 | new ConvertRule("ж", @"zh"), 441 | new ConvertRule("Ж", @"Zh"), 442 | new ConvertRule("s", @"z`"), 443 | new ConvertRule("S", @"Z`"), 444 | new ConvertRule("ќ", @"k`"), 445 | new ConvertRule("Ќ", @"K`"), 446 | new ConvertRule("љ", @"l`"), 447 | new ConvertRule("Љ", @"L`"), 448 | new ConvertRule("њ", @"n`"), 449 | new ConvertRule("Њ", @"N`"), 450 | new ConvertRule("ц", @"cz"), 451 | new ConvertRule("Ц", @"Cz"), 452 | new ConvertRule("ч", @"ch"), 453 | new ConvertRule("Ч", @"Ch"), 454 | new ConvertRule("џ", @"dh"), 455 | new ConvertRule("Џ", @"Dh"), 456 | new ConvertRule("ш", @"sh"), 457 | new ConvertRule("Ш", @"Sh"), 458 | new ConvertRule("а", @"a"), 459 | new ConvertRule("А", @"A"), 460 | new ConvertRule("б", @"b"), 461 | new ConvertRule("Б", @"B"), 462 | new ConvertRule("в", @"v"), 463 | new ConvertRule("В", @"V"), 464 | new ConvertRule("г", @"g"), 465 | new ConvertRule("Г", @"G"), 466 | new ConvertRule("д", @"d"), 467 | new ConvertRule("Д", @"D"), 468 | new ConvertRule("е", @"e"), 469 | new ConvertRule("Е", @"E"), 470 | new ConvertRule("з", @"z"), 471 | new ConvertRule("З", @"Z"), 472 | new ConvertRule("и", @"i"), 473 | new ConvertRule("И", @"I"), 474 | new ConvertRule("j", @"j"), 475 | new ConvertRule("J", @"J"), 476 | new ConvertRule("к", @"k"), 477 | new ConvertRule("К", @"K"), 478 | new ConvertRule("л", @"l"), 479 | new ConvertRule("Л", @"L"), 480 | new ConvertRule("м", @"m"), 481 | new ConvertRule("М", @"M"), 482 | new ConvertRule("н", @"п"), 483 | new ConvertRule("Н", @"П"), 484 | new ConvertRule("о", @"o"), 485 | new ConvertRule("О", @"O"), 486 | new ConvertRule("п", @"p"), 487 | new ConvertRule("П", @"P"), 488 | new ConvertRule("р", @"r"), 489 | new ConvertRule("Р", @"R"), 490 | new ConvertRule("с", @"s"), 491 | new ConvertRule("С", @"S"), 492 | new ConvertRule("т", @"t"), 493 | new ConvertRule("Т", @"T"), 494 | new ConvertRule("у", @"u"), 495 | new ConvertRule("У", @"U"), 496 | new ConvertRule("ф", @"f"), 497 | new ConvertRule("Ф", @"F"), 498 | new ConvertRule("х", @"x"), 499 | new ConvertRule("Х", @"X"), 500 | new ConvertRule("ц", @"c"), 501 | new ConvertRule("Ц", @"C"), 502 | new ConvertRule("’", @"'"), 503 | new ConvertRule("№", @"#"), 504 | }, 505 | 506 | }; 507 | } 508 | } 509 | --------------------------------------------------------------------------------