├── .github └── workflows │ └── dotnet.yml ├── .gitignore ├── LICENSE ├── LinkDotNet.Benchmarks ├── 1000Words.txt ├── Benchmarks.cs ├── LinkDotNet.Benchmarks.csproj ├── RopeConcatTests.cs ├── SearchTests.cs └── TrieVsHashSet.cs ├── LinkDotNet.StringOperations.UnitTests ├── Compression │ └── LempelZivWelchTests.cs ├── DataStructure │ ├── RopeTests.cs │ └── TrieTests.cs ├── EditDistance │ └── EditDistancesTests.cs ├── LinkDotNet.StringOperations.UnitTests.csproj └── Search │ ├── BoyerMooreTests.cs │ ├── KnuthMorrisPrattTests.cs │ └── ZAlgorithmTests.cs ├── LinkDotNet.StringOperations.sln ├── LinkDotNet.StringOperations ├── Compression │ └── LempelZivWelch.cs ├── DataStructure │ ├── Rope.cs │ └── Trie.cs ├── EditDistance │ ├── HammingDistance.cs │ ├── Levenshtein.cs │ ├── LongestCommonSubsequence.cs │ └── LongestCommonSubstring.cs ├── LinkDotNet.StringOperations.csproj └── Search │ ├── BoyerMoore.cs │ ├── KnuthMorrisPratt.cs │ └── ZAlgorithm.cs └── README.md /.github/workflows/dotnet.yml: -------------------------------------------------------------------------------- 1 | name: .NET 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Setup .NET 17 | uses: actions/setup-dotnet@v1 18 | with: 19 | dotnet-version: 6.0.x 20 | - name: Restore dependencies 21 | run: dotnet restore 22 | - name: Build 23 | run: dotnet build --no-restore 24 | - name: Test 25 | run: dotnet test --no-build --verbosity normal 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | obj/ 3 | /packages/ 4 | riderModule.iml 5 | /_ReSharper.Caches/ 6 | .idea/ 7 | *.DotSettings.user 8 | .vs/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Steven Giesel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LinkDotNet.Benchmarks/1000Words.txt: -------------------------------------------------------------------------------- 1 | a 2 | ability 3 | able 4 | about 5 | above 6 | accept 7 | according 8 | account 9 | across 10 | act 11 | action 12 | activity 13 | actually 14 | add 15 | address 16 | administration 17 | admit 18 | adult 19 | affect 20 | after 21 | again 22 | against 23 | age 24 | agency 25 | agent 26 | ago 27 | agree 28 | agreement 29 | ahead 30 | air 31 | all 32 | allow 33 | almost 34 | alone 35 | along 36 | already 37 | also 38 | although 39 | always 40 | American 41 | among 42 | amount 43 | analysis 44 | and 45 | animal 46 | another 47 | answer 48 | any 49 | anyone 50 | anything 51 | appear 52 | apply 53 | approach 54 | area 55 | argue 56 | arm 57 | around 58 | arrive 59 | art 60 | article 61 | artist 62 | as 63 | ask 64 | assume 65 | at 66 | attack 67 | attention 68 | attorney 69 | audience 70 | author 71 | authority 72 | available 73 | avoid 74 | away 75 | baby 76 | back 77 | bad 78 | bag 79 | ball 80 | bank 81 | bar 82 | base 83 | be 84 | beat 85 | beautiful 86 | because 87 | become 88 | bed 89 | before 90 | begin 91 | behavior 92 | behind 93 | believe 94 | benefit 95 | best 96 | better 97 | between 98 | beyond 99 | big 100 | bill 101 | billion 102 | bit 103 | black 104 | blood 105 | blue 106 | board 107 | body 108 | book 109 | born 110 | both 111 | box 112 | boy 113 | break 114 | bring 115 | brother 116 | budget 117 | build 118 | building 119 | business 120 | but 121 | buy 122 | by 123 | call 124 | camera 125 | campaign 126 | can 127 | cancer 128 | candidate 129 | capital 130 | car 131 | card 132 | care 133 | career 134 | carry 135 | case 136 | catch 137 | cause 138 | cell 139 | center 140 | central 141 | century 142 | certain 143 | certainly 144 | chair 145 | challenge 146 | chance 147 | change 148 | character 149 | charge 150 | check 151 | child 152 | choice 153 | choose 154 | church 155 | citizen 156 | city 157 | civil 158 | claim 159 | class 160 | clear 161 | clearly 162 | close 163 | coach 164 | cold 165 | collection 166 | college 167 | color 168 | come 169 | commercial 170 | common 171 | community 172 | company 173 | compare 174 | computer 175 | concern 176 | condition 177 | conference 178 | Congress 179 | consider 180 | consumer 181 | contain 182 | continue 183 | control 184 | cost 185 | could 186 | country 187 | couple 188 | course 189 | court 190 | cover 191 | create 192 | crime 193 | cultural 194 | culture 195 | cup 196 | current 197 | customer 198 | cut 199 | dark 200 | data 201 | daughter 202 | day 203 | dead 204 | deal 205 | death 206 | debate 207 | decade 208 | decide 209 | decision 210 | deep 211 | defense 212 | degree 213 | Democrat 214 | democratic 215 | describe 216 | design 217 | despite 218 | detail 219 | determine 220 | develop 221 | development 222 | die 223 | difference 224 | different 225 | difficult 226 | dinner 227 | direction 228 | director 229 | discover 230 | discuss 231 | discussion 232 | disease 233 | do 234 | doctor 235 | dog 236 | door 237 | down 238 | draw 239 | dream 240 | drive 241 | drop 242 | drug 243 | during 244 | each 245 | early 246 | east 247 | easy 248 | eat 249 | economic 250 | economy 251 | edge 252 | education 253 | effect 254 | effort 255 | eight 256 | either 257 | election 258 | else 259 | employee 260 | end 261 | energy 262 | enjoy 263 | enough 264 | enter 265 | entire 266 | environment 267 | environmental 268 | especially 269 | establish 270 | even 271 | evening 272 | event 273 | ever 274 | every 275 | everybody 276 | everyone 277 | everything 278 | evidence 279 | exactly 280 | example 281 | executive 282 | exist 283 | expect 284 | experience 285 | expert 286 | explain 287 | eye 288 | face 289 | fact 290 | factor 291 | fail 292 | fall 293 | family 294 | far 295 | fast 296 | father 297 | fear 298 | federal 299 | feel 300 | feeling 301 | few 302 | field 303 | fight 304 | figure 305 | fill 306 | film 307 | final 308 | finally 309 | financial 310 | find 311 | fine 312 | finger 313 | finish 314 | fire 315 | firm 316 | first 317 | fish 318 | five 319 | floor 320 | fly 321 | focus 322 | follow 323 | food 324 | foot 325 | for 326 | force 327 | foreign 328 | forget 329 | form 330 | former 331 | forward 332 | four 333 | free 334 | friend 335 | from 336 | front 337 | full 338 | fund 339 | future 340 | game 341 | garden 342 | gas 343 | general 344 | generation 345 | get 346 | girl 347 | give 348 | glass 349 | go 350 | goal 351 | good 352 | government 353 | great 354 | green 355 | ground 356 | group 357 | grow 358 | growth 359 | guess 360 | gun 361 | guy 362 | hair 363 | half 364 | hand 365 | hang 366 | happen 367 | happy 368 | hard 369 | have 370 | he 371 | head 372 | health 373 | hear 374 | heart 375 | heat 376 | heavy 377 | help 378 | her 379 | here 380 | herself 381 | high 382 | him 383 | himself 384 | his 385 | history 386 | hit 387 | hold 388 | home 389 | hope 390 | hospital 391 | hot 392 | hotel 393 | hour 394 | house 395 | how 396 | however 397 | huge 398 | human 399 | hundred 400 | husband 401 | I 402 | idea 403 | identify 404 | if 405 | image 406 | imagine 407 | impact 408 | important 409 | improve 410 | in 411 | include 412 | including 413 | increase 414 | indeed 415 | indicate 416 | individual 417 | industry 418 | information 419 | inside 420 | instead 421 | institution 422 | interest 423 | interesting 424 | international 425 | interview 426 | into 427 | investment 428 | involve 429 | issue 430 | it 431 | item 432 | its 433 | itself 434 | job 435 | join 436 | just 437 | keep 438 | key 439 | kid 440 | kill 441 | kind 442 | kitchen 443 | know 444 | knowledge 445 | land 446 | language 447 | large 448 | last 449 | late 450 | later 451 | laugh 452 | law 453 | lawyer 454 | lay 455 | lead 456 | leader 457 | learn 458 | least 459 | leave 460 | left 461 | leg 462 | legal 463 | less 464 | let 465 | letter 466 | level 467 | lie 468 | life 469 | light 470 | like 471 | likely 472 | line 473 | list 474 | listen 475 | little 476 | live 477 | local 478 | long 479 | look 480 | lose 481 | loss 482 | lot 483 | love 484 | low 485 | machine 486 | magazine 487 | main 488 | maintain 489 | major 490 | majority 491 | make 492 | man 493 | manage 494 | management 495 | manager 496 | many 497 | market 498 | marriage 499 | material 500 | matter 501 | may 502 | maybe 503 | me 504 | mean 505 | measure 506 | media 507 | medical 508 | meet 509 | meeting 510 | member 511 | memory 512 | mention 513 | message 514 | method 515 | middle 516 | might 517 | military 518 | million 519 | mind 520 | minute 521 | miss 522 | mission 523 | model 524 | modern 525 | moment 526 | money 527 | month 528 | more 529 | morning 530 | most 531 | mother 532 | mouth 533 | move 534 | movement 535 | movie 536 | Mr 537 | Mrs 538 | much 539 | music 540 | must 541 | my 542 | myself 543 | name 544 | nation 545 | national 546 | natural 547 | nature 548 | near 549 | nearly 550 | necessary 551 | need 552 | network 553 | never 554 | new 555 | news 556 | newspaper 557 | next 558 | nice 559 | night 560 | no 561 | none 562 | nor 563 | north 564 | not 565 | note 566 | nothing 567 | notice 568 | now 569 | n't 570 | number 571 | occur 572 | of 573 | off 574 | offer 575 | office 576 | officer 577 | official 578 | often 579 | oh 580 | oil 581 | ok 582 | old 583 | on 584 | once 585 | one 586 | only 587 | onto 588 | open 589 | operation 590 | opportunity 591 | option 592 | or 593 | order 594 | organization 595 | other 596 | others 597 | our 598 | out 599 | outside 600 | over 601 | own 602 | owner 603 | page 604 | pain 605 | painting 606 | paper 607 | parent 608 | part 609 | participant 610 | particular 611 | particularly 612 | partner 613 | party 614 | pass 615 | past 616 | patient 617 | pattern 618 | pay 619 | peace 620 | people 621 | per 622 | perform 623 | performance 624 | perhaps 625 | period 626 | person 627 | personal 628 | phone 629 | physical 630 | pick 631 | picture 632 | piece 633 | place 634 | plan 635 | plant 636 | play 637 | player 638 | PM 639 | point 640 | police 641 | policy 642 | political 643 | politics 644 | poor 645 | popular 646 | population 647 | position 648 | positive 649 | possible 650 | power 651 | practice 652 | prepare 653 | present 654 | president 655 | pressure 656 | pretty 657 | prevent 658 | price 659 | private 660 | probably 661 | problem 662 | process 663 | produce 664 | product 665 | production 666 | professional 667 | professor 668 | program 669 | project 670 | property 671 | protect 672 | prove 673 | provide 674 | public 675 | pull 676 | purpose 677 | push 678 | put 679 | quality 680 | question 681 | quickly 682 | quite 683 | race 684 | radio 685 | raise 686 | range 687 | rate 688 | rather 689 | reach 690 | read 691 | ready 692 | real 693 | reality 694 | realize 695 | really 696 | reason 697 | receive 698 | recent 699 | recently 700 | recognize 701 | record 702 | red 703 | reduce 704 | reflect 705 | region 706 | relate 707 | relationship 708 | religious 709 | remain 710 | remember 711 | remove 712 | report 713 | represent 714 | Republican 715 | require 716 | research 717 | resource 718 | respond 719 | response 720 | responsibility 721 | rest 722 | result 723 | return 724 | reveal 725 | rich 726 | right 727 | rise 728 | risk 729 | road 730 | rock 731 | role 732 | room 733 | rule 734 | run 735 | safe 736 | same 737 | save 738 | say 739 | scene 740 | school 741 | science 742 | scientist 743 | score 744 | sea 745 | season 746 | seat 747 | second 748 | section 749 | security 750 | see 751 | seek 752 | seem 753 | sell 754 | send 755 | senior 756 | sense 757 | series 758 | serious 759 | serve 760 | service 761 | set 762 | seven 763 | several 764 | sex 765 | sexual 766 | shake 767 | share 768 | she 769 | shoot 770 | short 771 | shot 772 | should 773 | shoulder 774 | show 775 | side 776 | sign 777 | significant 778 | similar 779 | simple 780 | simply 781 | since 782 | sing 783 | single 784 | sister 785 | sit 786 | site 787 | situation 788 | six 789 | size 790 | skill 791 | skin 792 | small 793 | smile 794 | so 795 | social 796 | society 797 | soldier 798 | some 799 | somebody 800 | someone 801 | something 802 | sometimes 803 | son 804 | song 805 | soon 806 | sort 807 | sound 808 | source 809 | south 810 | southern 811 | space 812 | speak 813 | special 814 | specific 815 | speech 816 | spend 817 | sport 818 | spring 819 | staff 820 | stage 821 | stand 822 | standard 823 | star 824 | start 825 | state 826 | statement 827 | station 828 | stay 829 | step 830 | still 831 | stock 832 | stop 833 | store 834 | story 835 | strategy 836 | street 837 | strong 838 | structure 839 | student 840 | study 841 | stuff 842 | style 843 | subject 844 | success 845 | successful 846 | such 847 | suddenly 848 | suffer 849 | suggest 850 | summer 851 | support 852 | sure 853 | surface 854 | system 855 | table 856 | take 857 | talk 858 | task 859 | tax 860 | teach 861 | teacher 862 | team 863 | technology 864 | television 865 | tell 866 | ten 867 | tend 868 | term 869 | test 870 | than 871 | thank 872 | that 873 | the 874 | their 875 | them 876 | themselves 877 | then 878 | theory 879 | there 880 | these 881 | they 882 | thing 883 | think 884 | third 885 | this 886 | those 887 | though 888 | thought 889 | thousand 890 | threat 891 | three 892 | through 893 | throughout 894 | throw 895 | thus 896 | time 897 | to 898 | today 899 | together 900 | tonight 901 | too 902 | top 903 | total 904 | tough 905 | toward 906 | town 907 | trade 908 | traditional 909 | training 910 | travel 911 | treat 912 | treatment 913 | tree 914 | trial 915 | trip 916 | trouble 917 | true 918 | truth 919 | try 920 | turn 921 | TV 922 | two 923 | type 924 | under 925 | understand 926 | unit 927 | until 928 | up 929 | upon 930 | us 931 | use 932 | usually 933 | value 934 | various 935 | very 936 | victim 937 | view 938 | violence 939 | visit 940 | voice 941 | vote 942 | wait 943 | walk 944 | wall 945 | want 946 | war 947 | watch 948 | water 949 | way 950 | we 951 | weapon 952 | wear 953 | week 954 | weight 955 | well 956 | west 957 | western 958 | what 959 | whatever 960 | when 961 | where 962 | whether 963 | which 964 | while 965 | white 966 | who 967 | whole 968 | whom 969 | whose 970 | why 971 | wide 972 | wife 973 | will 974 | win 975 | wind 976 | window 977 | wish 978 | with 979 | within 980 | without 981 | woman 982 | wonder 983 | word 984 | work 985 | worker 986 | world 987 | worry 988 | would 989 | write 990 | writer 991 | wrong 992 | yard 993 | yeah 994 | year 995 | yes 996 | yet 997 | you 998 | young 999 | your 1000 | yourself -------------------------------------------------------------------------------- /LinkDotNet.Benchmarks/Benchmarks.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Running; 2 | 3 | namespace LinkDotNet.Benchmarks; 4 | 5 | internal static class Benchmarks 6 | { 7 | internal static void Main() 8 | { 9 | BenchmarkSwitcher.FromAssembly(typeof(Benchmarks).Assembly).Run(); 10 | } 11 | } -------------------------------------------------------------------------------- /LinkDotNet.Benchmarks/LinkDotNet.Benchmarks.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | PreserveNewest 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /LinkDotNet.Benchmarks/RopeConcatTests.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | using BenchmarkDotNet.Attributes; 3 | using LinkDotNet.StringOperations.DataStructure; 4 | 5 | namespace LinkDotNet.Benchmarks; 6 | 7 | public class RopeConcatTests 8 | { 9 | [Benchmark(Baseline = true)] 10 | public void ConcatenateCLRStrings() 11 | { 12 | var clrString = "Test"; 13 | for (var i = 0; i < 10000; i++) 14 | { 15 | clrString += $"some string{i}"; 16 | } 17 | } 18 | 19 | [Benchmark] 20 | public void ConcatenateStringBuilder() 21 | { 22 | var stringBuilder = new StringBuilder(); 23 | for (var i = 0; i < 10000; i++) 24 | { 25 | stringBuilder.Append($"some string{i}"); 26 | } 27 | } 28 | 29 | [Benchmark] 30 | public void ConcatenateRope() 31 | { 32 | var rope = Rope.Create("Test"); 33 | for (var i = 0; i < 10000; i++) 34 | { 35 | rope += $"some string{i}"; 36 | } 37 | } 38 | } -------------------------------------------------------------------------------- /LinkDotNet.Benchmarks/SearchTests.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Linq; 3 | using BenchmarkDotNet.Attributes; 4 | using LinkDotNet.StringOperations.Search; 5 | 6 | namespace LinkDotNet.Benchmarks; 7 | 8 | public class SearchTests 9 | { 10 | private const string Text = "The quick brown fox jumps over the lazy dog maybe also a cat a sheep and another dog"; 11 | private const string Word = "dog"; 12 | 13 | [Benchmark] 14 | public bool KnuthMorrisPrattContains() => KnuthMorrisPratt.HasPattern(Text, Word); 15 | 16 | [Benchmark] 17 | public bool BoyerMooreContains() => BoyerMoore.HasPattern(Text, Word); 18 | 19 | [Benchmark] 20 | public bool ZAlgorithmContains() => ZAlgorithm.HasPattern(Text, Word); 21 | 22 | [Benchmark] 23 | public IList KnuthMorrisPrattPrattFindAll() => KnuthMorrisPratt.FindAll(Text, Word).ToList(); 24 | 25 | [Benchmark] 26 | public IList BoyerMooreFindAll() => BoyerMoore.FindAll(Text, Word).ToList(); 27 | 28 | [Benchmark] 29 | public IList ZAlgorithmFindAll() => ZAlgorithm.FindAll(Text, Word).ToList(); 30 | 31 | 32 | } -------------------------------------------------------------------------------- /LinkDotNet.Benchmarks/TrieVsHashSet.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.IO; 3 | using System.Linq; 4 | using BenchmarkDotNet.Attributes; 5 | using LinkDotNet.StringOperations.DataStructure; 6 | 7 | namespace LinkDotNet.Benchmarks; 8 | 9 | [MemoryDiagnoser] 10 | public class TrieVsHashSet 11 | { 12 | private readonly HashSet _hashSet = new(); 13 | private readonly Trie _trie = new(); 14 | 15 | [GlobalSetup] 16 | public void Setup() 17 | { 18 | var wordsToAdd = File.ReadAllLines("1000words.txt"); 19 | 20 | foreach (var word in wordsToAdd) 21 | { 22 | _hashSet.Add(word); 23 | _trie.Add(word); 24 | } 25 | } 26 | 27 | [Benchmark] 28 | public IList FindAllInHashSet() => _hashSet.Where(h => h.StartsWith("Hel")).ToList(); 29 | 30 | [Benchmark] 31 | public IList FindAllInTrie() => _trie.GetWordsWithPrefix("Hel").ToList(); 32 | 33 | [Benchmark] 34 | public bool FindOneInHashSet() => _hashSet.Any(h => h == "happy"); 35 | 36 | [Benchmark] 37 | public bool FindOneInTrie() => _trie.Find("happy"); 38 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations.UnitTests/Compression/LempelZivWelchTests.cs: -------------------------------------------------------------------------------- 1 | using LinkDotNet.StringOperations.Compression; 2 | using Xunit; 3 | 4 | namespace LinkDotNet.StringOperations.UnitTests.Compression; 5 | 6 | public class LempelZivWelchTests 7 | { 8 | [Fact] 9 | public void ShouldEncodeAndDecode() 10 | { 11 | const string sentence = "Hey my name is Steven"; 12 | var encoded = LempelZivWelch.Encode(sentence); 13 | 14 | var output = LempelZivWelch.Decode(encoded); 15 | 16 | Assert.Equal(sentence, output); 17 | } 18 | 19 | [Fact] 20 | public void ShouldCompressText() 21 | { 22 | const string sentence = "Here is your text, which consists out of multiple words. They, the words, can appear again"; 23 | var output = LempelZivWelch.Encode(sentence); 24 | 25 | Assert.True(output.Length < sentence.Length); 26 | } 27 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations.UnitTests/DataStructure/RopeTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using LinkDotNet.StringOperations.DataStructure; 3 | using Xunit; 4 | 5 | namespace LinkDotNet.StringOperations.UnitTests.DataStructure; 6 | 7 | public class RopeTests 8 | { 9 | [Fact] 10 | public void ShouldCreateAndDisplayRope() 11 | { 12 | const string sentence = "Hello_my_name_is_Simon"; 13 | var rope = Rope.Create(sentence, 4); 14 | 15 | var output = rope.ToString(); 16 | 17 | Assert.Equal(sentence, output); 18 | } 19 | 20 | [Fact] 21 | public void ShouldConcat() 22 | { 23 | var left = Rope.Create("Hello"); 24 | var right = Rope.Create("World"); 25 | 26 | var concat = (left + right).ToString(); 27 | 28 | Assert.Equal("HelloWorld", concat); 29 | } 30 | 31 | [Fact] 32 | public void ShouldConcatWithStrings() 33 | { 34 | var left = Rope.Create("Hello"); 35 | var right = Rope.Create("World"); 36 | 37 | var first = left + "World"; 38 | var second = "Hello" + right; 39 | 40 | Assert.Equal("HelloWorld", first.ToString()); 41 | Assert.Equal("HelloWorld", second.ToString()); 42 | } 43 | 44 | [Fact] 45 | public void ShouldGetIndex() 46 | { 47 | const string text = "0123456789"; 48 | var rope = Rope.Create(text, 2); 49 | 50 | Assert.Equal(text[5], rope[5]); 51 | } 52 | 53 | [Fact] 54 | public void ShouldGetIndexAfterRebalance() 55 | { 56 | var rope1 = Rope.Create("012"); 57 | var rope2 = Rope.Create("345"); 58 | var rope = rope1 + rope2; 59 | 60 | var index = rope[3]; 61 | 62 | Assert.Equal('3', index); 63 | } 64 | 65 | [Theory] 66 | [InlineData("HelloWorld", 4, "Hello", "World")] 67 | [InlineData("HelloWorld", 5, "HelloW", "orld")] 68 | [InlineData("HelloWorld", 6, "HelloWo", "rld")] 69 | [InlineData("0123456789", 2, "012", "3456789")] 70 | [InlineData("0123456789", 8, "012345678", "9")] 71 | [InlineData("0123456789", 0, "0", "123456789")] 72 | public void ShouldSplitRope(string word, int indexToSplit, string expectedLeftSide, string expectedRightSide) 73 | { 74 | var rope = Rope.Create(word); 75 | 76 | var splitPair = rope.Split(indexToSplit); 77 | 78 | Assert.Equal(expectedLeftSide, splitPair.Item1.ToString()); 79 | Assert.Equal(expectedRightSide, splitPair.Item2.ToString()); 80 | } 81 | 82 | [Fact] 83 | public void ShouldThrowExceptionWhenNegativeIndex() 84 | { 85 | Assert.Throws(() => Rope.Create("a").Split(-1)); 86 | } 87 | 88 | [Fact] 89 | public void ShouldReturnLeftPartWhenCompleteLength() 90 | { 91 | var rope = Rope.Create("01234567"); 92 | 93 | var pair = rope.Split(7); 94 | 95 | Assert.Equal("01234567", pair.Item1.ToString()); 96 | Assert.Null(pair.Item2); 97 | } 98 | 99 | [Fact] 100 | public void ShouldInsertRope() 101 | { 102 | var rope1 = Rope.Create("Hello World"); 103 | var rope2 = Rope.Create(" dear"); 104 | 105 | var newRope = rope1.Insert(rope2, 4); 106 | 107 | Assert.Equal("Hello dear World", newRope.ToString()); 108 | } 109 | 110 | [Fact] 111 | public void ShouldSplitAfterConcat() 112 | { 113 | var split = (Rope.Create("Hello") + "World").Split(6); 114 | 115 | Assert.Equal("HelloWo", split.Item1.ToString()); 116 | Assert.Equal("rld", split.Item2.ToString()); 117 | } 118 | 119 | [Fact] 120 | public void ShouldDelete() 121 | { 122 | var rope = Rope.Create("0123456789"); 123 | 124 | var newRope = rope.Delete(3, 3); 125 | 126 | Assert.Equal("0126789", newRope.ToString()); 127 | } 128 | 129 | [Fact] 130 | public void ShouldHavePositiveIndexAndLength() 131 | { 132 | var rope = Rope.Create("1"); 133 | 134 | Assert.Throws(() => rope.Delete(-1, 2)); 135 | Assert.Throws(() => rope.Delete(1, 0)); 136 | } 137 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations.UnitTests/DataStructure/TrieTests.cs: -------------------------------------------------------------------------------- 1 | using System.Linq; 2 | using LinkDotNet.StringOperations.DataStructure; 3 | using Xunit; 4 | 5 | namespace LinkDotNet.StringOperations.UnitTests.DataStructure; 6 | 7 | public class TrieTests 8 | { 9 | [Theory] 10 | [InlineData("csharp", "csharp", false, true)] 11 | [InlineData("cccc", "ccccc", false, false)] 12 | [InlineData("words", "word", false, false)] 13 | [InlineData("WOrd", "word", true, true)] 14 | [InlineData("Word", "", true, false)] 15 | [InlineData("Word", null, true, false)] 16 | public void ShouldFindEntries(string wordToAdd, string wordToSearch, bool ignoreCase, bool expectedHit) 17 | { 18 | var trie = new Trie(ignoreCase); 19 | trie.Add(wordToAdd); 20 | 21 | var actualHit = trie.Find(wordToSearch); 22 | 23 | Assert.Equal(expectedHit, actualHit); 24 | } 25 | 26 | [Fact] 27 | public void GivenMultipleWords_ShouldFindNotSubstring() 28 | { 29 | var trie = new Trie(); 30 | trie.Add("abcde"); 31 | trie.Add("abcdefg"); 32 | trie.Add("efgh"); 33 | 34 | var hasHit = trie.Find("efg"); 35 | 36 | Assert.False(hasHit); 37 | } 38 | 39 | [Theory] 40 | [InlineData("text", "te", false, true)] 41 | [InlineData("Text", "tE", true, true)] 42 | [InlineData("Word", "", true, false)] 43 | [InlineData("Word", null, true, false)] 44 | [InlineData("word", "word", false, true)] 45 | [InlineData("word", "words", false, false)] 46 | [InlineData("word", "odr", false, false)] 47 | public void ShouldStartsWithEntries(string wordToAdd, string wordToSearch, bool ignoreCase, bool expectedHit) 48 | { 49 | var trie = new Trie(ignoreCase); 50 | trie.Add(wordToAdd); 51 | 52 | var actualHit = trie.StartsWith(wordToSearch); 53 | 54 | Assert.Equal(expectedHit, actualHit); 55 | } 56 | 57 | [Fact] 58 | public void GivenMultipleWords_ShouldNotFindStartsWithSubstring() 59 | { 60 | var trie = new Trie(); 61 | trie.Add("abcde"); 62 | trie.Add("abcdefg"); 63 | trie.Add("efgh"); 64 | 65 | var hasHit = trie.StartsWith("def"); 66 | 67 | Assert.False(hasHit); 68 | } 69 | 70 | [Fact] 71 | public void ShouldReturnAllWordsWithStartingPrefix() 72 | { 73 | var trie = new Trie(); 74 | trie.Add("Hello"); 75 | trie.Add("Helsinki"); 76 | 77 | var hits = trie.GetWordsWithPrefix("Hel").ToList(); 78 | 79 | Assert.Equal(2, hits.Count); 80 | } 81 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations.UnitTests/EditDistance/EditDistancesTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Linq; 3 | using LinkDotNet.StringOperations.EditDistance; 4 | using Xunit; 5 | 6 | namespace LinkDotNet.StringOperations.UnitTests.EditDistance; 7 | 8 | public class EditDistancesTests 9 | { 10 | [Theory] 11 | [InlineData("Hello", "Hallo", false, "Hllo")] 12 | [InlineData("HeLlO", "hallo", true, "HLlO")] 13 | [InlineData("Hello", "hel", true, "Hel")] 14 | [InlineData("abc", "cbe", false, "b")] 15 | [InlineData("", "", false, "")] 16 | [InlineData("Test", "", false, "")] 17 | public void CheckLongestCommonSubsequent(string one, string two, bool ignoreCase, string expected) 18 | { 19 | var actual = one.GetLongestCommonSubsequence(two, ignoreCase); 20 | 21 | Assert.Equal(expected, actual); 22 | } 23 | 24 | [Fact] 25 | public void ReturnNullOnNullWhen_WhenCallingGetLongestCommonSubsequent() 26 | { 27 | Assert.Null("string".GetLongestCommonSubsequence(null)); 28 | } 29 | 30 | [Fact] 31 | public void ShouldReturnNull_WhenNullValueForLongestCommonSubsequence() 32 | { 33 | Assert.Null("test".GetLongestCommonSubsequence(null)); 34 | Assert.Null(((string)null).GetLongestCommonSubsequence("null")); 35 | } 36 | 37 | [Theory] 38 | [InlineData("Hallo", "Hello", false, 1)] 39 | [InlineData("hALLO", "Hello", true, 1)] 40 | [InlineData("", "Hello", false, 5)] 41 | [InlineData("Hallo", "", false, 5)] 42 | [InlineData("olleH", "Hello", false, 4)] 43 | [InlineData("ABCDEF", "abcdef", false, 6)] 44 | public void CheckLevenshteinDistance(string one, string two, bool ignoreCase, int expectedDistance) 45 | { 46 | var actual = one.GetLevenshteinDistance(two, ignoreCase); 47 | 48 | Assert.Equal(expectedDistance, actual); 49 | } 50 | 51 | [Fact] 52 | public void ShouldReturn_WhenAbortCostHasReached() 53 | { 54 | const int abortCost = 3; 55 | var cost = "ABCDEFGHIKLMN".GetLevenshteinDistance("abcdefghijlkm", abortCost: abortCost); 56 | 57 | Assert.Equal(cost, abortCost); 58 | } 59 | 60 | [Fact] 61 | public void ShouldThrow_WhenNullValueForLevenshtein() 62 | { 63 | Assert.Throws(() => "test".GetLevenshteinDistance(null)); 64 | Assert.Throws(() => ((string) null).GetLevenshteinDistance("Test")); 65 | } 66 | 67 | [Theory] 68 | [InlineData("ThatIsAWord", "Word", false, "Word")] 69 | [InlineData("WordLonger", "LongerWord", false, "Longer")] 70 | public void CheckLongestSubstring(string one, string two, bool ignoreCase, string expectedSubstring) 71 | { 72 | var longestCommonSubstring = one.GetLongestCommonSubstring(two, ignoreCase); 73 | 74 | Assert.Equal(expectedSubstring, longestCommonSubstring); 75 | } 76 | 77 | [Fact] 78 | public void ShouldReturnNull_WhenNullValueForLongestCommonSubstring() 79 | { 80 | Assert.Null("test".GetLongestCommonSubstring(null)); 81 | Assert.Null(((string)null).GetLongestCommonSubstring("null")); 82 | } 83 | 84 | [Theory] 85 | [InlineData("Hallo", "Hello", false, 1)] 86 | [InlineData("a", "abc", false, 0)] 87 | [InlineData("abc", "a", false, 2)] 88 | [InlineData("ABC", "abc", true, 0)] 89 | [InlineData("ABC", "abc", false, 3)] 90 | public void ShouldCalculateHammingDistance(string one, string two, bool ignoreCase, int expectedCost) 91 | { 92 | var actualCost = one.GetHammingDistance(two, ignoreCase); 93 | 94 | Assert.Equal(expectedCost, actualCost); 95 | } 96 | 97 | [Fact] 98 | public void ShouldThrow_WhenNullValueForHammingDistance() 99 | { 100 | Assert.Throws(() => "test".GetHammingDistance(null)); 101 | Assert.Throws(() => ((string) null).GetHammingDistance("Test")); 102 | } 103 | 104 | [Fact] 105 | public void ShouldGetClosestWords() 106 | { 107 | var actual = "Hallo".GetClosestWords(2, false, "Hallo", "Auto", "Something else", "Haribo"); 108 | 109 | Assert.NotNull(actual); 110 | var collection = actual.ToArray(); 111 | Assert.NotEmpty(collection); 112 | Assert.Equal(2, collection.Length); 113 | Assert.Equal("Hallo", collection[0]); 114 | Assert.Equal("Haribo", collection[1]); 115 | } 116 | 117 | [Fact] 118 | public void ShouldReturnEmptyArrayWhenNoInput() 119 | { 120 | var actual = ((string) null).GetClosestWords(1, false, "H"); 121 | 122 | Assert.Empty(actual); 123 | } 124 | 125 | [Fact] 126 | public void ShouldReturnEmptyArrayWhenWordsEmpty() 127 | { 128 | var actual = "Test".GetClosestWords(1, false); 129 | 130 | Assert.Empty(actual); 131 | } 132 | 133 | [Fact] 134 | public void ShouldCheckIfWordIsNull() 135 | { 136 | var actual = "Hallo".GetClosestWords(2, false, "Hallo", null).ToArray(); 137 | 138 | Assert.Single(actual); 139 | Assert.Equal("Hallo", actual[0]); 140 | } 141 | 142 | [Fact] 143 | public void ShouldGetClosestWord() 144 | { 145 | var actual = "Hallo".GetClosestWord(false, "Hello", "Helbo"); 146 | 147 | Assert.Equal("Hello", actual); 148 | } 149 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations.UnitTests/LinkDotNet.StringOperations.UnitTests.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net6.0 5 | 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | runtime; build; native; contentfiles; analyzers; buildtransitive 14 | all 15 | 16 | 17 | runtime; build; native; contentfiles; analyzers; buildtransitive 18 | all 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /LinkDotNet.StringOperations.UnitTests/Search/BoyerMooreTests.cs: -------------------------------------------------------------------------------- 1 | using System.Linq; 2 | using LinkDotNet.StringOperations.Search; 3 | using Xunit; 4 | 5 | namespace LinkDotNet.StringOperations.UnitTests.Search; 6 | 7 | public class BoyerMooreTests 8 | { 9 | [Fact] 10 | public void ShouldFindAllOccurrences() 11 | { 12 | const string text = "That is my text with the word text 3 times. That is why text again"; 13 | const string pattern = "Text"; 14 | 15 | var occurrences = BoyerMoore.FindAll(text, pattern, true).ToList(); 16 | 17 | Assert.Equal(3, occurrences.Count); 18 | Assert.Equal(11, occurrences[0]); 19 | Assert.Equal(30, occurrences[1]); 20 | Assert.Equal(56, occurrences[2]); 21 | } 22 | 23 | [Fact] 24 | public void DoNotGoOutOfBounds() 25 | { 26 | const string text = "The quick brown fox jumps over the lazy dog maybe also a cat a sheep and another dog"; 27 | const string word = "dog"; 28 | 29 | var occurrences = BoyerMoore.FindAll(text, word).ToList(); 30 | 31 | Assert.Equal(2, occurrences.Count); 32 | } 33 | 34 | [Fact] 35 | public void ShouldAbortOnFirstOccurence() 36 | { 37 | const string text = "That is my text with the word text 3 times. That is why text again"; 38 | const string pattern = "Text"; 39 | 40 | var occurrences = BoyerMoore.FindAll(text, pattern, true, true).ToList(); 41 | 42 | Assert.Single(occurrences); 43 | Assert.Equal(11, occurrences[0]); 44 | } 45 | 46 | [Theory] 47 | [InlineData(null, "null")] 48 | [InlineData("null", null)] 49 | [InlineData("", "null")] 50 | [InlineData("null", "")] 51 | public void ShouldReturnEmptyOccurrences_WhenGivenNullOrEmpty(string text, string pattern) 52 | { 53 | var occurrences = BoyerMoore.FindAll(text, pattern); 54 | 55 | Assert.Empty(occurrences); 56 | } 57 | 58 | [Fact] 59 | public void GivenNoHit_ThenEmptyArray() 60 | { 61 | var occurrences = BoyerMoore.FindAll("Word", "Text"); 62 | 63 | Assert.Empty(occurrences); 64 | } 65 | 66 | [Fact] 67 | public void GivenPatternLongerThanText_EmptyArray() 68 | { 69 | var occurrences = BoyerMoore.FindAll("t", "longer").ToList(); 70 | 71 | Assert.Empty(occurrences); 72 | } 73 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations.UnitTests/Search/KnuthMorrisPrattTests.cs: -------------------------------------------------------------------------------- 1 | using System.Linq; 2 | using LinkDotNet.StringOperations.Search; 3 | using Xunit; 4 | 5 | namespace LinkDotNet.StringOperations.UnitTests.Search; 6 | 7 | public class KnuthMorrisPrattTests 8 | { 9 | [Fact] 10 | public void ShouldFindAllOccurrences() 11 | { 12 | const string text = "That is my text with the word text 3 times. That is why text again"; 13 | const string pattern = "Text"; 14 | 15 | var occurrences = KnuthMorrisPratt.FindAll(text, pattern, true).ToList(); 16 | 17 | Assert.Equal(3, occurrences.Count); 18 | Assert.Equal(11, occurrences[0]); 19 | Assert.Equal(30, occurrences[1]); 20 | Assert.Equal(56, occurrences[2]); 21 | } 22 | 23 | [Fact] 24 | public void ShouldAbortOnFirstOccurence() 25 | { 26 | const string text = "That is my text with the word text 3 times. That is why text again"; 27 | const string pattern = "Text"; 28 | 29 | var occurrences = KnuthMorrisPratt.FindAll(text, pattern, true, true).ToList(); 30 | 31 | Assert.Single(occurrences); 32 | Assert.Equal(11, occurrences[0]); 33 | } 34 | 35 | [Theory] 36 | [InlineData(null, "null")] 37 | [InlineData("null", null)] 38 | [InlineData("", "null")] 39 | [InlineData("null", "")] 40 | public void ShouldReturnEmptyOccurrences_WhenGivenNullOrEmpty(string text, string pattern) 41 | { 42 | var occurrences = KnuthMorrisPratt.FindAll(text, pattern); 43 | 44 | Assert.Empty(occurrences); 45 | } 46 | 47 | [Fact] 48 | public void ShouldReturnIfOccurrenceInText() 49 | { 50 | var occurrence = KnuthMorrisPratt.HasPattern("KnuthMorrisPratt", "t"); 51 | 52 | Assert.True(occurrence); 53 | } 54 | 55 | [Fact] 56 | public void GivenNoHit_ThenEmptyArray() 57 | { 58 | var occurrences = KnuthMorrisPratt.FindAll("Word", "Text"); 59 | 60 | Assert.Empty(occurrences); 61 | } 62 | 63 | [Fact] 64 | public void GivenPatternLongerThanText_EmptyArray() 65 | { 66 | var hasHit = KnuthMorrisPratt.HasPattern("t", "longer"); 67 | 68 | Assert.False(hasHit); 69 | } 70 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations.UnitTests/Search/ZAlgorithmTests.cs: -------------------------------------------------------------------------------- 1 | using System.Linq; 2 | using LinkDotNet.StringOperations.Search; 3 | using Xunit; 4 | 5 | namespace LinkDotNet.StringOperations.UnitTests.Search; 6 | 7 | public class ZAlgorithmTests 8 | { 9 | [Fact] 10 | public void ShouldFindAllOccurrences() 11 | { 12 | const string text = "That is my text with the word text 3 times. That is why text again"; 13 | const string pattern = "Text"; 14 | 15 | var occurrences = ZAlgorithm.FindAll(text, pattern, true).ToList(); 16 | 17 | Assert.Equal(3, occurrences.Count); 18 | Assert.Equal(11, occurrences[0]); 19 | Assert.Equal(30, occurrences[1]); 20 | Assert.Equal(56, occurrences[2]); 21 | } 22 | 23 | [Fact] 24 | public void ShouldAbortOnFirstOccurence() 25 | { 26 | const string text = "That is my text with the word text 3 times. That is why text again"; 27 | const string pattern = "Text"; 28 | 29 | var occurrences = ZAlgorithm.FindAll(text, pattern, true, true).ToList(); 30 | 31 | Assert.Single(occurrences); 32 | Assert.Equal(11, occurrences[0]); 33 | } 34 | 35 | [Theory] 36 | [InlineData(null, "null")] 37 | [InlineData("null", null)] 38 | [InlineData("", "null")] 39 | [InlineData("null", "")] 40 | public void ShouldReturnEmptyOccurrences_WhenGivenNullOrEmpty(string text, string pattern) 41 | { 42 | var occurrences = ZAlgorithm.FindAll(text, pattern); 43 | 44 | Assert.Empty(occurrences); 45 | } 46 | 47 | [Fact] 48 | public void ShouldReturnIfOccurrenceInText() 49 | { 50 | var occurrence = ZAlgorithm.HasPattern("KnuthMorrisPratt", "t"); 51 | 52 | Assert.True(occurrence); 53 | } 54 | 55 | [Fact] 56 | public void GivenNoHit_ThenEmptyArray() 57 | { 58 | var occurrences = ZAlgorithm.FindAll("Word", "Text"); 59 | 60 | Assert.Empty(occurrences); 61 | } 62 | 63 | [Fact] 64 | public void GivenPatternLongerThanText_EmptyArray() 65 | { 66 | var hasHit = ZAlgorithm.HasPattern("t", "longer"); 67 | 68 | Assert.False(hasHit); 69 | } 70 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LinkDotNet.StringOperations", "LinkDotNet.StringOperations\LinkDotNet.StringOperations.csproj", "{B5F2AC91-9F3F-481A-81F3-0FB56B88FBF9}" 4 | EndProject 5 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LinkDotNet.StringOperations.UnitTests", "LinkDotNet.StringOperations.UnitTests\LinkDotNet.StringOperations.UnitTests.csproj", "{CC119FDB-AE2C-4A0A-9224-6BC1B7C0E9A4}" 6 | EndProject 7 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LinkDotNet.Benchmarks", "LinkDotNet.Benchmarks\LinkDotNet.Benchmarks.csproj", "{AE6934D7-4CC0-4B77-B8A6-8C6195BA9500}" 8 | EndProject 9 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{9F4AB6BB-F2AA-4C44-B99D-86CB1FFEFDF2}" 10 | ProjectSection(SolutionItems) = preProject 11 | README.md = README.md 12 | EndProjectSection 13 | EndProject 14 | Global 15 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 16 | Debug|Any CPU = Debug|Any CPU 17 | Release|Any CPU = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 20 | {B5F2AC91-9F3F-481A-81F3-0FB56B88FBF9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 | {B5F2AC91-9F3F-481A-81F3-0FB56B88FBF9}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 | {B5F2AC91-9F3F-481A-81F3-0FB56B88FBF9}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 | {B5F2AC91-9F3F-481A-81F3-0FB56B88FBF9}.Release|Any CPU.Build.0 = Release|Any CPU 24 | {CC119FDB-AE2C-4A0A-9224-6BC1B7C0E9A4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 25 | {CC119FDB-AE2C-4A0A-9224-6BC1B7C0E9A4}.Debug|Any CPU.Build.0 = Debug|Any CPU 26 | {CC119FDB-AE2C-4A0A-9224-6BC1B7C0E9A4}.Release|Any CPU.ActiveCfg = Release|Any CPU 27 | {CC119FDB-AE2C-4A0A-9224-6BC1B7C0E9A4}.Release|Any CPU.Build.0 = Release|Any CPU 28 | {AE6934D7-4CC0-4B77-B8A6-8C6195BA9500}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 29 | {AE6934D7-4CC0-4B77-B8A6-8C6195BA9500}.Debug|Any CPU.Build.0 = Debug|Any CPU 30 | {AE6934D7-4CC0-4B77-B8A6-8C6195BA9500}.Release|Any CPU.ActiveCfg = Release|Any CPU 31 | {AE6934D7-4CC0-4B77-B8A6-8C6195BA9500}.Release|Any CPU.Build.0 = Release|Any CPU 32 | EndGlobalSection 33 | EndGlobal 34 | -------------------------------------------------------------------------------- /LinkDotNet.StringOperations/Compression/LempelZivWelch.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Collections.Immutable; 4 | using System.Text; 5 | 6 | namespace LinkDotNet.StringOperations.Compression; 7 | 8 | public static class LempelZivWelch 9 | { 10 | private const int AlphabetSize = 256; 11 | 12 | public static ImmutableArray Encode(ReadOnlySpan text) 13 | { 14 | if (text.IsEmpty) 15 | { 16 | return new ImmutableArray(); 17 | } 18 | 19 | var table = CreateEncodeTable(); 20 | 21 | var code = AlphabetSize; 22 | var outputArray = new List(); 23 | var p = text[0].ToString(); 24 | var c = string.Empty; 25 | 26 | for (var i = 0; i < text.Length; i++) 27 | { 28 | if (i != text.Length - 1) 29 | { 30 | c += text[i + 1]; 31 | } 32 | 33 | var isPatternKnown = table.ContainsKey(p + c); 34 | if (isPatternKnown) 35 | { 36 | p += c; 37 | } 38 | else 39 | { 40 | AddNewCombinationToDictionary(); 41 | } 42 | 43 | c = string.Empty; 44 | } 45 | 46 | outputArray.Add(table[p]); 47 | 48 | return outputArray.ToImmutableArray(); 49 | 50 | void AddNewCombinationToDictionary() 51 | { 52 | outputArray.Add(table[p]); 53 | table[p + c] = code; 54 | code++; 55 | p = c; 56 | } 57 | } 58 | 59 | public static string Decode(ImmutableArray decodedText) 60 | { 61 | var table = CreateDecodeTable(); 62 | var decodedTextSpan = decodedText.AsSpan(); 63 | var current = decodedTextSpan[0]; 64 | var outputString = new StringBuilder(); 65 | var decodedSubString = table[current]; 66 | outputString.Append(decodedSubString); 67 | var c = decodedSubString[0].ToString(); 68 | var count = AlphabetSize; 69 | 70 | for (var i = 0; i < decodedTextSpan.Length - 1; i++) 71 | { 72 | var code = decodedTextSpan[i + 1]; 73 | 74 | if (!table.ContainsKey(code)) 75 | { 76 | decodedSubString = table[current] + c; 77 | } 78 | else 79 | { 80 | decodedSubString = table[code]; 81 | } 82 | 83 | outputString.Append(decodedSubString); 84 | c = decodedSubString[0].ToString(); 85 | table[count] = table[current] + c; 86 | count++; 87 | current = code; 88 | } 89 | 90 | return outputString.ToString(); 91 | } 92 | 93 | private static Dictionary CreateEncodeTable() 94 | { 95 | var dictionary = new Dictionary(); 96 | 97 | for (var i = 0; i < AlphabetSize; i++) 98 | { 99 | dictionary[((char)i).ToString()] = i; 100 | } 101 | 102 | return dictionary; 103 | } 104 | 105 | private static Dictionary CreateDecodeTable() 106 | { 107 | var dictionary = new Dictionary(); 108 | 109 | for (var i = 0; i < AlphabetSize; i++) 110 | { 111 | dictionary[i] = ((char)i).ToString(); 112 | } 113 | 114 | return dictionary; 115 | } 116 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations/DataStructure/Rope.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Text; 3 | 4 | namespace LinkDotNet.StringOperations.DataStructure; 5 | 6 | public class Rope 7 | { 8 | private string _fragment; 9 | private bool _hasToRecalculateWeights; 10 | private Rope _left; 11 | private Rope _right; 12 | private int _weight; 13 | 14 | private Rope() {} 15 | 16 | public char this[int index] => GetIndex(index); 17 | 18 | public Tuple Split(int index) 19 | { 20 | if (index < 0) 21 | { 22 | throw new ArgumentOutOfRangeException(nameof(index), "Index can't be negative"); 23 | } 24 | 25 | CheckRecalculation(); 26 | 27 | return SplitRope(this, index); 28 | 29 | static Tuple SplitRope(Rope node, int index) 30 | { 31 | if (node._left == null) 32 | { 33 | if (index == node._weight - 1) 34 | { 35 | return new Tuple(node, null); 36 | } 37 | 38 | var item1 = Create(node._fragment.ToCharArray()[..(index+1)]); 39 | var item2 = Create(node._fragment.ToCharArray()[(index+1)..node._weight]); 40 | return new Tuple(item1, item2); 41 | } 42 | 43 | if (index == node._weight - 1) 44 | { 45 | return new Tuple(node._left, node._right); 46 | } 47 | 48 | if (index < node._weight) 49 | { 50 | var splitLeftSide = SplitRope(node._left, index); 51 | return new Tuple(splitLeftSide.Item1, splitLeftSide.Item2 + node._right); 52 | } 53 | 54 | var splitRightSide = SplitRope(node._right, index - node._weight); 55 | return new Tuple(node._left + splitRightSide.Item1, splitRightSide.Item2); 56 | } 57 | } 58 | 59 | public Rope Insert(Rope other, int index) 60 | { 61 | var pair = Split(index); 62 | var left = pair.Item1 + other; 63 | return pair.Item2 != null ? left + pair.Item2 : left; 64 | } 65 | 66 | public Rope Delete(int startIndex, int length) 67 | { 68 | if (startIndex < 0) 69 | { 70 | throw new ArgumentOutOfRangeException(nameof(startIndex), "Starting index can't be negative"); 71 | } 72 | 73 | if (length <= 0) 74 | { 75 | throw new ArgumentOutOfRangeException(nameof(length), "Length has to be bigger 0"); 76 | } 77 | 78 | CheckRecalculation(); 79 | 80 | var beforeStartIndex = Split(startIndex - 1).Item1; 81 | var afterStartPlusLength = Split(startIndex + length - 1).Item2; 82 | 83 | return beforeStartIndex + afterStartPlusLength; 84 | } 85 | 86 | public override string ToString() 87 | { 88 | var stringBuilder = new StringBuilder(); 89 | AppendStrings(this, stringBuilder); 90 | 91 | return stringBuilder.ToString(); 92 | } 93 | 94 | public static Rope operator +(Rope left, Rope right) 95 | { 96 | return Concat(left, right); 97 | } 98 | 99 | public static Rope operator +(Rope left, ReadOnlySpan right) 100 | { 101 | var rightRope = Create(right); 102 | 103 | return left + rightRope; 104 | } 105 | 106 | public static Rope operator +(ReadOnlySpan left, Rope right) 107 | { 108 | var leftRope = Create(left); 109 | 110 | return leftRope + right; 111 | } 112 | 113 | public static Rope Concat(Rope left, Rope right, bool recalculateWeights = false) 114 | { 115 | var rope = new Rope { _left = left, _right = right, _hasToRecalculateWeights = true }; 116 | 117 | if (recalculateWeights) 118 | { 119 | rope.CalculateAndSetWeight(); 120 | } 121 | 122 | return rope; 123 | } 124 | 125 | public static Rope Create(ReadOnlySpan text, int leafLength = 8) 126 | { 127 | return CreateInternal(text, leafLength, 0 , text.Length - 1); 128 | } 129 | 130 | private static Rope CreateInternal(ReadOnlySpan text, int leafLength, int leftIndex, int rightIndex) 131 | { 132 | var node = new Rope(); 133 | 134 | if (rightIndex - leftIndex > leafLength) 135 | { 136 | var center = (rightIndex + leftIndex + 1) / 2; 137 | node._left = CreateInternal(text, leafLength, leftIndex, center); 138 | node._right = CreateInternal(text, leafLength, center + 1, rightIndex); 139 | } 140 | else 141 | { 142 | var rightIndexInclusiveUpperBound = rightIndex + 1; 143 | node._fragment = text[leftIndex .. rightIndexInclusiveUpperBound].ToString(); 144 | } 145 | 146 | node.CalculateAndSetWeight(); 147 | 148 | return node; 149 | } 150 | 151 | private static void AppendStrings(Rope node, StringBuilder builder) 152 | { 153 | if (node == null) 154 | { 155 | return; 156 | } 157 | 158 | if (node._left == null && node._right == null) 159 | { 160 | builder.Append(node._fragment); 161 | } 162 | 163 | AppendStrings(node._left, builder); 164 | AppendStrings(node._right, builder); 165 | } 166 | 167 | private char GetIndex(int index) 168 | { 169 | if (_hasToRecalculateWeights) 170 | { 171 | CheckRecalculation(); 172 | } 173 | 174 | return GetIndexInternal(this, index); 175 | 176 | static char GetIndexInternal(Rope node, int index) 177 | { 178 | if (node._weight <= index && node._right != null) 179 | { 180 | return GetIndexInternal(node._right, index - node._weight); 181 | } 182 | 183 | if (node._left != null) 184 | { 185 | return GetIndexInternal(node._left, index); 186 | } 187 | 188 | return node._fragment[index]; 189 | } 190 | } 191 | 192 | private void CalculateAndSetWeight() 193 | { 194 | _weight = _left == null ? _fragment.Length : GetWeightInternal(_left); 195 | 196 | static int GetWeightInternal(Rope node) 197 | { 198 | if (node._left != null && node._right != null) 199 | { 200 | return GetWeightInternal(node._left) + GetWeightInternal(node._right); 201 | } 202 | 203 | return node._left != null ? GetWeightInternal(node._left) : node._fragment.Length; 204 | } 205 | } 206 | 207 | private void CheckRecalculation() 208 | { 209 | if (_hasToRecalculateWeights) 210 | { 211 | CalculateAndSetWeight(); 212 | _hasToRecalculateWeights = false; 213 | } 214 | } 215 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations/DataStructure/Trie.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | 5 | namespace LinkDotNet.StringOperations.DataStructure; 6 | 7 | public class Trie 8 | { 9 | private IDictionary Children { get; set; } = new Dictionary(); 10 | private bool _isLeaf; 11 | private readonly bool _ignoreCase; 12 | 13 | public Trie() : this(false) 14 | { 15 | } 16 | 17 | public Trie(bool ignoreCase) 18 | { 19 | _ignoreCase = ignoreCase; 20 | } 21 | 22 | public void Add(ReadOnlySpan word) 23 | { 24 | var current = Children; 25 | for (var i = 0; i < word.Length; i++) 26 | { 27 | var currentCharacter = _ignoreCase ? char.ToUpperInvariant(word[i]) : word[i]; 28 | 29 | var node = CreateOrGetNode(currentCharacter, current); 30 | current = node.Children; 31 | 32 | if (i == word.Length - 1) 33 | { 34 | node._isLeaf = true; 35 | } 36 | } 37 | } 38 | 39 | public bool Find(ReadOnlySpan word) 40 | { 41 | if (word.IsEmpty) 42 | { 43 | return false; 44 | } 45 | 46 | var node = FindNode(word); 47 | 48 | return node != null && node._isLeaf; 49 | } 50 | 51 | public bool StartsWith(ReadOnlySpan word) 52 | { 53 | if (word.IsEmpty) 54 | { 55 | return false; 56 | } 57 | 58 | return FindNode(word) != null; 59 | } 60 | 61 | public IEnumerable GetWordsWithPrefix(string prefix) 62 | { 63 | var node = FindNode(prefix); 64 | if (node == null) 65 | { 66 | yield break; 67 | } 68 | 69 | foreach (var word in Collect(node, prefix.ToList())) 70 | { 71 | yield return word; 72 | } 73 | 74 | static IEnumerable Collect(Trie node, List prefix) 75 | { 76 | if (node.Children.Count == 0) 77 | { 78 | yield return new string(prefix.ToArray()); 79 | } 80 | 81 | foreach (var child in node.Children) 82 | { 83 | prefix.Add(child.Key); 84 | foreach (var t in Collect(child.Value, prefix)) 85 | { 86 | yield return t; 87 | } 88 | prefix.RemoveAt(prefix.Count - 1); 89 | } 90 | } 91 | } 92 | 93 | private static Trie CreateOrGetNode(char currentCharacter, IDictionary children) 94 | { 95 | Trie trie; 96 | if (children.ContainsKey(currentCharacter)) 97 | { 98 | trie = children[currentCharacter]; 99 | } 100 | else 101 | { 102 | trie = new Trie(); 103 | children.Add(currentCharacter, trie); 104 | } 105 | 106 | return trie; 107 | } 108 | 109 | private Trie FindNode(ReadOnlySpan word) 110 | { 111 | var children = Children; 112 | Trie currentTrie = null; 113 | 114 | foreach (var character in word) 115 | { 116 | var currentCharacter = _ignoreCase ? char.ToUpperInvariant(character) : character; 117 | if (children.ContainsKey(currentCharacter)) 118 | { 119 | currentTrie = children[currentCharacter]; 120 | children = currentTrie.Children; 121 | } 122 | else 123 | { 124 | return null; 125 | } 126 | } 127 | 128 | return currentTrie; 129 | } 130 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations/EditDistance/HammingDistance.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace LinkDotNet.StringOperations.EditDistance; 4 | 5 | public static partial class EditDistances 6 | { 7 | public static int GetHammingDistance(this string one, string two, bool ignoreCase = false) 8 | { 9 | if (string.IsNullOrEmpty(one)) 10 | { 11 | throw new ArgumentNullException(nameof(one)); 12 | } 13 | 14 | if (string.IsNullOrEmpty(two)) 15 | { 16 | throw new ArgumentNullException(nameof(two)); 17 | } 18 | 19 | var cost = 0; 20 | for (var i = 0; i < one.Length; i++) 21 | { 22 | if (i >= two.Length) 23 | { 24 | cost++; 25 | continue; 26 | } 27 | 28 | var characterEqual = ignoreCase 29 | ? char.ToUpperInvariant(one[i]) == char.ToUpperInvariant(two[i]) 30 | : one[i] == two[i]; 31 | 32 | if (!characterEqual) 33 | { 34 | cost++; 35 | } 36 | } 37 | 38 | return cost; 39 | } 40 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations/EditDistance/Levenshtein.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace LinkDotNet.StringOperations.EditDistance; 4 | 5 | public static partial class EditDistances 6 | { 7 | public static int GetLevenshteinDistance(this string one, string two, bool ignoreCase = false, int substitutionCost = 1, int abortCost = int.MaxValue) 8 | { 9 | AssertValuesNotNull(one, two); 10 | if (one == string.Empty) 11 | { 12 | return two.Length; 13 | } 14 | 15 | if (two == string.Empty) 16 | { 17 | return one.Length; 18 | } 19 | 20 | var matrix = CreateLevenshteinMatrix(one, two); 21 | 22 | for (var i = 1; i <= one.Length; i++) 23 | { 24 | for (var j = 1; j <= two.Length; j++) 25 | { 26 | var characterEqual = CheckCharacterEqual(one, two, ignoreCase, i, j); 27 | 28 | var substituteCost = characterEqual ? 0 : substitutionCost; 29 | var deleteCost = matrix[i - 1, j] + 1; 30 | var insertCost = matrix[i, j - 1] + 1; 31 | var completeSubstitutionCost = matrix[i - 1, j - 1] + substituteCost; 32 | matrix[i, j] = Math.Min(Math.Min(deleteCost, insertCost), completeSubstitutionCost); 33 | 34 | if (matrix[i, j] >= abortCost) 35 | { 36 | return abortCost; 37 | } 38 | 39 | } 40 | } 41 | 42 | return matrix[one.Length, two.Length]; 43 | } 44 | 45 | private static void AssertValuesNotNull(string one, string two) 46 | { 47 | if (one == null) 48 | { 49 | throw new ArgumentNullException(nameof(one)); 50 | } 51 | 52 | if (two == null) 53 | { 54 | throw new ArgumentNullException(nameof(two)); 55 | } 56 | } 57 | 58 | private static int[,] CreateLevenshteinMatrix(string one, string two) 59 | { 60 | var matrix = new int[one.Length + 1, two.Length + 1]; 61 | 62 | for (var i = 0; i <= one.Length; i++) 63 | { 64 | matrix[i, 0] = i; 65 | } 66 | 67 | for (var j = 0; j <= two.Length; j++) 68 | { 69 | matrix[0, j] = j; 70 | } 71 | 72 | return matrix; 73 | } 74 | 75 | private static bool CheckCharacterEqual(string one, string two, bool ignoreCase, int i, int j) 76 | { 77 | var characterEqual = ignoreCase 78 | ? char.ToUpperInvariant(one[i - 1]) == char.ToUpperInvariant(two[j - 1]) 79 | : one[i - 1] == two[j - 1]; 80 | return characterEqual; 81 | } 82 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations/EditDistance/LongestCommonSubsequence.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | 5 | namespace LinkDotNet.StringOperations.EditDistance; 6 | 7 | public static partial class EditDistances 8 | { 9 | public static string GetClosestWord(this string input, bool ignoreCase, params string[] words) => 10 | input.GetClosestWords(1, ignoreCase, words).FirstOrDefault(); 11 | 12 | public static IEnumerable GetClosestWords(this string input, int count, bool ignoreCase, 13 | params string[] words) 14 | { 15 | if (input == null) 16 | { 17 | return Array.Empty(); 18 | } 19 | 20 | if (words == null || !words.Any()) 21 | { 22 | return Array.Empty(); 23 | } 24 | 25 | var wordToSimilarity = new Dictionary(); 26 | foreach (var word in words.Distinct().Where(w => w != null)) 27 | { 28 | wordToSimilarity[word] = word.GetLongestCommonSubsequence(input, ignoreCase).Length; 29 | } 30 | 31 | var sortedWords = wordToSimilarity.ToList(); 32 | sortedWords.Sort((a, b) => b.Value.CompareTo(a.Value)); 33 | 34 | return sortedWords.Select(s => s.Key).Take(count); 35 | } 36 | public static string GetLongestCommonSubsequence(this string one, string two, bool ignoreCase = false) 37 | { 38 | if (one == null || two == null) 39 | { 40 | return null; 41 | } 42 | 43 | var lcsMatrix = CreateLongestCommonSubsequenceMatrix(one, two, ignoreCase); 44 | return GetLongestCommonSubsequenceBackTrack(lcsMatrix, one, two, one.Length, two.Length, ignoreCase); 45 | } 46 | 47 | private static int[,] CreateLongestCommonSubsequenceMatrix(string one, string two, bool ignoreCase) 48 | { 49 | var lcsMatrix = new int[one.Length + 1, two.Length + 1]; 50 | 51 | for (var i = 1; i <= one.Length; i++) 52 | { 53 | for (var j = 1; j <= two.Length; j++) 54 | { 55 | var characterEqual = ignoreCase 56 | ? char.ToUpperInvariant(one[i - 1]) == char.ToUpperInvariant(two[j - 1]) 57 | : one[i - 1] == two[j - 1]; 58 | if (characterEqual) 59 | { 60 | lcsMatrix[i, j] = lcsMatrix[i - 1, j - 1] + 1; 61 | } 62 | else 63 | { 64 | lcsMatrix[i, j] = Math.Max(lcsMatrix[i - 1, j], lcsMatrix[i, j - 1]); 65 | } 66 | } 67 | } 68 | 69 | return lcsMatrix; 70 | } 71 | 72 | private static string GetLongestCommonSubsequenceBackTrack(int[,] lcsMatrix, string one, string two, 73 | int oneLength, int twoLength, bool ignoreCase) 74 | { 75 | if (oneLength == 0 || twoLength == 0) 76 | { 77 | return string.Empty; 78 | } 79 | 80 | var characterEqual = ignoreCase 81 | ? char.ToUpperInvariant(one[oneLength - 1]) == char.ToUpperInvariant(two[twoLength - 1]) 82 | : one[oneLength - 1] == two[twoLength - 1]; 83 | if (characterEqual) 84 | { 85 | return GetLongestCommonSubsequenceBackTrack(lcsMatrix, one, two, oneLength - 1, twoLength - 1, 86 | ignoreCase) + one[oneLength - 1]; 87 | } 88 | 89 | if (lcsMatrix[oneLength, twoLength - 1] > lcsMatrix[oneLength - 1, twoLength]) 90 | { 91 | return GetLongestCommonSubsequenceBackTrack(lcsMatrix, one, two, oneLength, twoLength - 1, ignoreCase); 92 | } 93 | 94 | return GetLongestCommonSubsequenceBackTrack(lcsMatrix, one, two, oneLength - 1, twoLength, ignoreCase); 95 | } 96 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations/EditDistance/LongestCommonSubstring.cs: -------------------------------------------------------------------------------- 1 | namespace LinkDotNet.StringOperations.EditDistance; 2 | 3 | public static partial class EditDistances 4 | { 5 | public static string GetLongestCommonSubstring(this string one, string two, bool ignoreCase = false) 6 | { 7 | if (one == null || two == null) 8 | { 9 | return null; 10 | } 11 | 12 | var lcsMatrix = CreateLongestCommonSubstringMatrix(one, two, ignoreCase); 13 | 14 | var length = -1; 15 | var index = -1; 16 | for (var i = 0; i <= one.Length; i++) 17 | { 18 | for (var j = 0; j <= two.Length; j++) 19 | { 20 | if (length < lcsMatrix[i, j]) 21 | { 22 | length = lcsMatrix[i, j]; 23 | index = i - length; 24 | } 25 | } 26 | } 27 | 28 | return length > 0 ? one.Substring(index, length) : string.Empty; 29 | } 30 | 31 | private static int[,] CreateLongestCommonSubstringMatrix(string one, string two, bool ignoreCase) 32 | { 33 | var lcsMatrix = new int[one.Length + 1, two.Length + 1]; 34 | 35 | for (var i = 1; i <= one.Length; i++) 36 | { 37 | for (var j = 1; j <= two.Length; j++) 38 | { 39 | var characterEqual = ignoreCase 40 | ? char.ToUpperInvariant(one[i - 1]) == char.ToUpperInvariant(two[j - 1]) 41 | : one[i - 1] == two[j - 1]; 42 | if (characterEqual) 43 | { 44 | lcsMatrix[i, j] = lcsMatrix[i - 1, j - 1] + 1; 45 | } 46 | else 47 | { 48 | lcsMatrix[i, j] = 0; 49 | } 50 | } 51 | } 52 | 53 | return lcsMatrix; 54 | } 55 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations/LinkDotNet.StringOperations.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net6.0 5 | 6 | 7 | -------------------------------------------------------------------------------- /LinkDotNet.StringOperations/Search/BoyerMoore.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | 5 | namespace LinkDotNet.StringOperations.Search; 6 | 7 | public static class BoyerMoore 8 | { 9 | private const int AlphabetSize = 256; 10 | 11 | public static bool HasPattern(string text, string word, bool ignoreCase = false) => 12 | FindAll(text, word, ignoreCase, true).Any(); 13 | 14 | public static IEnumerable FindAll(string text, string word, bool ignoreCase = false, bool abortOnFirstOccurrence = false) 15 | { 16 | if (string.IsNullOrEmpty(text) || string.IsNullOrEmpty(word)) 17 | { 18 | yield break; 19 | } 20 | 21 | if (text.Length < word.Length) 22 | { 23 | yield break; 24 | } 25 | 26 | var wordLength = word.Length; 27 | var textLength = text.Length; 28 | 29 | var badCharacterTable = GetBadCharacterTable(word, ignoreCase); 30 | 31 | var shift = 0; 32 | while (shift <= textLength - wordLength) 33 | { 34 | var index = word.Length - 1; 35 | 36 | index = ReduceIndexWhileMatchAtShift(text, word, ignoreCase, index, shift); 37 | 38 | if (index < 0) 39 | { 40 | yield return shift; 41 | if (abortOnFirstOccurrence) 42 | { 43 | yield break; 44 | } 45 | 46 | shift = ShiftPatternToNextCharacterWithLastOccurrenceOfPattern(text, shift, wordLength, textLength, badCharacterTable, ignoreCase); 47 | } 48 | else 49 | { 50 | shift = ShiftPatternAfterBadCharacter(text, shift, index, badCharacterTable, ignoreCase); 51 | } 52 | } 53 | } 54 | 55 | private static int[] GetBadCharacterTable(string text, bool ignoreCase) 56 | { 57 | var table = new int[AlphabetSize]; 58 | Array.Fill(table, -1); 59 | 60 | for (var i = 0; i < text.Length; i++) 61 | { 62 | var character = ignoreCase ? char.ToUpperInvariant(text[i]) : text[i]; 63 | table[character] = i; 64 | } 65 | 66 | return table; 67 | } 68 | 69 | private static int ReduceIndexWhileMatchAtShift(string text, string word, bool ignoreCase, int index, int shift) 70 | { 71 | while (index >= 0 && CharacterEqual(text, word, ignoreCase, shift + index, index)) 72 | { 73 | index--; 74 | } 75 | 76 | return index; 77 | } 78 | 79 | private static int ShiftPatternToNextCharacterWithLastOccurrenceOfPattern(string text, int shift, 80 | int wordLength, int textLength, Span badCharacterTable, bool ignoreCase) 81 | { 82 | return shift + (shift + wordLength < textLength 83 | ? wordLength - badCharacterTable[GetCharacter()] 84 | : 1); 85 | 86 | char GetCharacter() 87 | { 88 | return ignoreCase ? char.ToUpperInvariant(text[shift + wordLength]) : text[shift + wordLength]; 89 | } 90 | } 91 | 92 | private static int ShiftPatternAfterBadCharacter(string text, int shift, int index, int[] badCharacterTable, bool ignoreCase) 93 | { 94 | var character = ignoreCase ? char.ToUpperInvariant(text[shift + index]) : text[shift + index]; 95 | return shift + Math.Max(1, index - badCharacterTable[character]); 96 | } 97 | 98 | private static bool CharacterEqual(ReadOnlySpan text, ReadOnlySpan pattern, bool ignoreCase, int positionInText, 99 | int positionInPattern) 100 | { 101 | var characterEqual = ignoreCase 102 | ? char.ToUpperInvariant(text[positionInText]) == char.ToUpperInvariant(pattern[positionInPattern]) 103 | : text[positionInText] == pattern[positionInPattern]; 104 | return characterEqual; 105 | } 106 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations/Search/KnuthMorrisPratt.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Linq; 3 | 4 | namespace LinkDotNet.StringOperations.Search; 5 | 6 | public static class KnuthMorrisPratt 7 | { 8 | public static bool HasPattern(string text, string word, bool ignoreCase = false) => 9 | FindAll(text, word, ignoreCase, true).Any(); 10 | 11 | public static IEnumerable FindAll(string text, string pattern, 12 | bool ignoreCase = false, bool abortOnFirstOccurence = false) 13 | { 14 | if (string.IsNullOrEmpty(text) || string.IsNullOrEmpty(pattern)) 15 | { 16 | yield break; 17 | } 18 | 19 | if (text.Length < pattern.Length) 20 | { 21 | yield break; 22 | } 23 | 24 | var positionInText = 0; 25 | var positionInPattern = 0; 26 | 27 | var knuthMorrisPrattTable = CreateTable(text, ignoreCase); 28 | 29 | while (positionInText < text.Length) 30 | { 31 | var characterEqual = CharacterEqual(text, pattern, ignoreCase, positionInText, positionInPattern); 32 | 33 | if (characterEqual) 34 | { 35 | positionInText++; 36 | positionInPattern++; 37 | 38 | if (positionInPattern == pattern.Length) 39 | { 40 | var index = positionInText - positionInPattern; 41 | yield return index; 42 | 43 | positionInPattern = knuthMorrisPrattTable[positionInPattern]; 44 | 45 | if (abortOnFirstOccurence) 46 | { 47 | yield break; 48 | } 49 | } 50 | } 51 | else 52 | { 53 | positionInPattern = knuthMorrisPrattTable[positionInPattern]; 54 | if (positionInPattern < 0) 55 | { 56 | positionInText++; 57 | positionInPattern++; 58 | } 59 | } 60 | } 61 | } 62 | 63 | private static int[] CreateTable(string text, bool ignoreCase) 64 | { 65 | var table = new int[text.Length]; 66 | table[0] = -1; 67 | var position = 1; 68 | var candidate = 0; 69 | 70 | while (position < text.Length) 71 | { 72 | var characterEqual = CharacterEqual(text, text, ignoreCase, position, candidate); 73 | if (characterEqual) 74 | { 75 | table[position] = table[candidate]; 76 | } 77 | else 78 | { 79 | table[position] = candidate; 80 | while (candidate >= 0 && !CharacterEqual(text, text, ignoreCase, position, candidate)) 81 | { 82 | candidate = table[candidate]; 83 | } 84 | } 85 | 86 | position++; 87 | candidate++; 88 | } 89 | 90 | table[position - 1] = candidate; 91 | return table; 92 | } 93 | 94 | private static bool CharacterEqual(string text, string pattern, bool ignoreCase, int positionInText, 95 | int positionInPattern) 96 | { 97 | var characterEqual = ignoreCase 98 | ? char.ToUpperInvariant(text[positionInText]) == char.ToUpperInvariant(pattern[positionInPattern]) 99 | : text[positionInText] == pattern[positionInPattern]; 100 | return characterEqual; 101 | } 102 | } -------------------------------------------------------------------------------- /LinkDotNet.StringOperations/Search/ZAlgorithm.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | 5 | namespace LinkDotNet.StringOperations.Search; 6 | 7 | public static class ZAlgorithm 8 | { 9 | public static bool HasPattern(string text, string word, bool ignoreCase = false) => 10 | FindAll(text, word, ignoreCase, true).Any(); 11 | 12 | public static IEnumerable FindAll(string text, string pattern, 13 | bool ignoreCase = false, bool abortOnFirstOccurence = false) 14 | { 15 | if (string.IsNullOrEmpty(text) || string.IsNullOrEmpty(pattern)) 16 | { 17 | yield break; 18 | } 19 | 20 | if (text.Length < pattern.Length) 21 | { 22 | yield break; 23 | } 24 | 25 | var concat = ignoreCase 26 | ? (pattern + "$" + text).ToUpper() 27 | : pattern + "$" + text; 28 | 29 | var zArray = CreateZArray(concat.AsSpan()); 30 | 31 | for (var i = 0; i < concat.Length; i++) 32 | { 33 | if (zArray[i] == pattern.Length) 34 | { 35 | yield return i - pattern.Length - 1; 36 | 37 | if (abortOnFirstOccurence) 38 | { 39 | yield break; 40 | } 41 | } 42 | } 43 | 44 | } 45 | 46 | private static int[] CreateZArray(ReadOnlySpan concat) 47 | { 48 | var zArray = new int[concat.Length]; 49 | var left = 0; 50 | var right = 0; 51 | 52 | for (var current = 1; current < concat.Length; current++) 53 | { 54 | if (current > right) 55 | { 56 | left = right = current; 57 | 58 | while (right < concat.Length && concat[right - left] == concat[right]) 59 | { 60 | right++; 61 | } 62 | 63 | zArray[current] = right - left; 64 | right--; 65 | } 66 | else 67 | { 68 | var k = current - left; 69 | 70 | if (zArray[k] < right - current + 1) 71 | { 72 | zArray[current] = zArray[k]; 73 | } 74 | else 75 | { 76 | left = current; 77 | while (right < current && concat[right - left] == concat[right]) 78 | { 79 | right++; 80 | } 81 | 82 | zArray[current] = right - left; 83 | right--; 84 | } 85 | } 86 | } 87 | 88 | return zArray; 89 | } 90 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # String Operations for C# 2 | [![.NET Build & Test](https://github.com/linkdotnet/StringOperations/actions/workflows/dotnet.yml/badge.svg)](https://github.com/linkdotnet/StringOperations/actions/workflows/dotnet.yml) 3 | 4 | This library implements some basic string algorithm. The implementations are not optimized for performance but more for an educational purpose. 5 | 6 | ## Currently implemented algorithms 7 | ### Edit-Distances 8 | * Longest Common Subsequence 9 | * Longest Common Substring 10 | * Levenshtein Distance 11 | * Hamming Distance 12 | 13 | ### Search 14 | * Knuth-Morris-Pratt 15 | * Boyer-Moore 16 | * Z-Algorithm 17 | 18 | ### Data Structure 19 | * Trie 20 | * Rope 21 | 22 | ### Compression 23 | * Lempel-Ziv-Welch --------------------------------------------------------------------------------