├── .github
└── workflows
│ └── dotnet.yml
├── .gitignore
├── LICENSE
├── LinkDotNet.Benchmarks
├── 1000Words.txt
├── Benchmarks.cs
├── LinkDotNet.Benchmarks.csproj
├── RopeConcatTests.cs
├── SearchTests.cs
└── TrieVsHashSet.cs
├── LinkDotNet.StringOperations.UnitTests
├── Compression
│ └── LempelZivWelchTests.cs
├── DataStructure
│ ├── RopeTests.cs
│ └── TrieTests.cs
├── EditDistance
│ └── EditDistancesTests.cs
├── LinkDotNet.StringOperations.UnitTests.csproj
└── Search
│ ├── BoyerMooreTests.cs
│ ├── KnuthMorrisPrattTests.cs
│ └── ZAlgorithmTests.cs
├── LinkDotNet.StringOperations.sln
├── LinkDotNet.StringOperations
├── Compression
│ └── LempelZivWelch.cs
├── DataStructure
│ ├── Rope.cs
│ └── Trie.cs
├── EditDistance
│ ├── HammingDistance.cs
│ ├── Levenshtein.cs
│ ├── LongestCommonSubsequence.cs
│ └── LongestCommonSubstring.cs
├── LinkDotNet.StringOperations.csproj
└── Search
│ ├── BoyerMoore.cs
│ ├── KnuthMorrisPratt.cs
│ └── ZAlgorithm.cs
└── README.md
/.github/workflows/dotnet.yml:
--------------------------------------------------------------------------------
1 | name: .NET
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 |
9 | jobs:
10 | build:
11 |
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - uses: actions/checkout@v2
16 | - name: Setup .NET
17 | uses: actions/setup-dotnet@v1
18 | with:
19 | dotnet-version: 6.0.x
20 | - name: Restore dependencies
21 | run: dotnet restore
22 | - name: Build
23 | run: dotnet build --no-restore
24 | - name: Test
25 | run: dotnet test --no-build --verbosity normal
26 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bin/
2 | obj/
3 | /packages/
4 | riderModule.iml
5 | /_ReSharper.Caches/
6 | .idea/
7 | *.DotSettings.user
8 | .vs/
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Steven Giesel
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/LinkDotNet.Benchmarks/1000Words.txt:
--------------------------------------------------------------------------------
1 | a
2 | ability
3 | able
4 | about
5 | above
6 | accept
7 | according
8 | account
9 | across
10 | act
11 | action
12 | activity
13 | actually
14 | add
15 | address
16 | administration
17 | admit
18 | adult
19 | affect
20 | after
21 | again
22 | against
23 | age
24 | agency
25 | agent
26 | ago
27 | agree
28 | agreement
29 | ahead
30 | air
31 | all
32 | allow
33 | almost
34 | alone
35 | along
36 | already
37 | also
38 | although
39 | always
40 | American
41 | among
42 | amount
43 | analysis
44 | and
45 | animal
46 | another
47 | answer
48 | any
49 | anyone
50 | anything
51 | appear
52 | apply
53 | approach
54 | area
55 | argue
56 | arm
57 | around
58 | arrive
59 | art
60 | article
61 | artist
62 | as
63 | ask
64 | assume
65 | at
66 | attack
67 | attention
68 | attorney
69 | audience
70 | author
71 | authority
72 | available
73 | avoid
74 | away
75 | baby
76 | back
77 | bad
78 | bag
79 | ball
80 | bank
81 | bar
82 | base
83 | be
84 | beat
85 | beautiful
86 | because
87 | become
88 | bed
89 | before
90 | begin
91 | behavior
92 | behind
93 | believe
94 | benefit
95 | best
96 | better
97 | between
98 | beyond
99 | big
100 | bill
101 | billion
102 | bit
103 | black
104 | blood
105 | blue
106 | board
107 | body
108 | book
109 | born
110 | both
111 | box
112 | boy
113 | break
114 | bring
115 | brother
116 | budget
117 | build
118 | building
119 | business
120 | but
121 | buy
122 | by
123 | call
124 | camera
125 | campaign
126 | can
127 | cancer
128 | candidate
129 | capital
130 | car
131 | card
132 | care
133 | career
134 | carry
135 | case
136 | catch
137 | cause
138 | cell
139 | center
140 | central
141 | century
142 | certain
143 | certainly
144 | chair
145 | challenge
146 | chance
147 | change
148 | character
149 | charge
150 | check
151 | child
152 | choice
153 | choose
154 | church
155 | citizen
156 | city
157 | civil
158 | claim
159 | class
160 | clear
161 | clearly
162 | close
163 | coach
164 | cold
165 | collection
166 | college
167 | color
168 | come
169 | commercial
170 | common
171 | community
172 | company
173 | compare
174 | computer
175 | concern
176 | condition
177 | conference
178 | Congress
179 | consider
180 | consumer
181 | contain
182 | continue
183 | control
184 | cost
185 | could
186 | country
187 | couple
188 | course
189 | court
190 | cover
191 | create
192 | crime
193 | cultural
194 | culture
195 | cup
196 | current
197 | customer
198 | cut
199 | dark
200 | data
201 | daughter
202 | day
203 | dead
204 | deal
205 | death
206 | debate
207 | decade
208 | decide
209 | decision
210 | deep
211 | defense
212 | degree
213 | Democrat
214 | democratic
215 | describe
216 | design
217 | despite
218 | detail
219 | determine
220 | develop
221 | development
222 | die
223 | difference
224 | different
225 | difficult
226 | dinner
227 | direction
228 | director
229 | discover
230 | discuss
231 | discussion
232 | disease
233 | do
234 | doctor
235 | dog
236 | door
237 | down
238 | draw
239 | dream
240 | drive
241 | drop
242 | drug
243 | during
244 | each
245 | early
246 | east
247 | easy
248 | eat
249 | economic
250 | economy
251 | edge
252 | education
253 | effect
254 | effort
255 | eight
256 | either
257 | election
258 | else
259 | employee
260 | end
261 | energy
262 | enjoy
263 | enough
264 | enter
265 | entire
266 | environment
267 | environmental
268 | especially
269 | establish
270 | even
271 | evening
272 | event
273 | ever
274 | every
275 | everybody
276 | everyone
277 | everything
278 | evidence
279 | exactly
280 | example
281 | executive
282 | exist
283 | expect
284 | experience
285 | expert
286 | explain
287 | eye
288 | face
289 | fact
290 | factor
291 | fail
292 | fall
293 | family
294 | far
295 | fast
296 | father
297 | fear
298 | federal
299 | feel
300 | feeling
301 | few
302 | field
303 | fight
304 | figure
305 | fill
306 | film
307 | final
308 | finally
309 | financial
310 | find
311 | fine
312 | finger
313 | finish
314 | fire
315 | firm
316 | first
317 | fish
318 | five
319 | floor
320 | fly
321 | focus
322 | follow
323 | food
324 | foot
325 | for
326 | force
327 | foreign
328 | forget
329 | form
330 | former
331 | forward
332 | four
333 | free
334 | friend
335 | from
336 | front
337 | full
338 | fund
339 | future
340 | game
341 | garden
342 | gas
343 | general
344 | generation
345 | get
346 | girl
347 | give
348 | glass
349 | go
350 | goal
351 | good
352 | government
353 | great
354 | green
355 | ground
356 | group
357 | grow
358 | growth
359 | guess
360 | gun
361 | guy
362 | hair
363 | half
364 | hand
365 | hang
366 | happen
367 | happy
368 | hard
369 | have
370 | he
371 | head
372 | health
373 | hear
374 | heart
375 | heat
376 | heavy
377 | help
378 | her
379 | here
380 | herself
381 | high
382 | him
383 | himself
384 | his
385 | history
386 | hit
387 | hold
388 | home
389 | hope
390 | hospital
391 | hot
392 | hotel
393 | hour
394 | house
395 | how
396 | however
397 | huge
398 | human
399 | hundred
400 | husband
401 | I
402 | idea
403 | identify
404 | if
405 | image
406 | imagine
407 | impact
408 | important
409 | improve
410 | in
411 | include
412 | including
413 | increase
414 | indeed
415 | indicate
416 | individual
417 | industry
418 | information
419 | inside
420 | instead
421 | institution
422 | interest
423 | interesting
424 | international
425 | interview
426 | into
427 | investment
428 | involve
429 | issue
430 | it
431 | item
432 | its
433 | itself
434 | job
435 | join
436 | just
437 | keep
438 | key
439 | kid
440 | kill
441 | kind
442 | kitchen
443 | know
444 | knowledge
445 | land
446 | language
447 | large
448 | last
449 | late
450 | later
451 | laugh
452 | law
453 | lawyer
454 | lay
455 | lead
456 | leader
457 | learn
458 | least
459 | leave
460 | left
461 | leg
462 | legal
463 | less
464 | let
465 | letter
466 | level
467 | lie
468 | life
469 | light
470 | like
471 | likely
472 | line
473 | list
474 | listen
475 | little
476 | live
477 | local
478 | long
479 | look
480 | lose
481 | loss
482 | lot
483 | love
484 | low
485 | machine
486 | magazine
487 | main
488 | maintain
489 | major
490 | majority
491 | make
492 | man
493 | manage
494 | management
495 | manager
496 | many
497 | market
498 | marriage
499 | material
500 | matter
501 | may
502 | maybe
503 | me
504 | mean
505 | measure
506 | media
507 | medical
508 | meet
509 | meeting
510 | member
511 | memory
512 | mention
513 | message
514 | method
515 | middle
516 | might
517 | military
518 | million
519 | mind
520 | minute
521 | miss
522 | mission
523 | model
524 | modern
525 | moment
526 | money
527 | month
528 | more
529 | morning
530 | most
531 | mother
532 | mouth
533 | move
534 | movement
535 | movie
536 | Mr
537 | Mrs
538 | much
539 | music
540 | must
541 | my
542 | myself
543 | name
544 | nation
545 | national
546 | natural
547 | nature
548 | near
549 | nearly
550 | necessary
551 | need
552 | network
553 | never
554 | new
555 | news
556 | newspaper
557 | next
558 | nice
559 | night
560 | no
561 | none
562 | nor
563 | north
564 | not
565 | note
566 | nothing
567 | notice
568 | now
569 | n't
570 | number
571 | occur
572 | of
573 | off
574 | offer
575 | office
576 | officer
577 | official
578 | often
579 | oh
580 | oil
581 | ok
582 | old
583 | on
584 | once
585 | one
586 | only
587 | onto
588 | open
589 | operation
590 | opportunity
591 | option
592 | or
593 | order
594 | organization
595 | other
596 | others
597 | our
598 | out
599 | outside
600 | over
601 | own
602 | owner
603 | page
604 | pain
605 | painting
606 | paper
607 | parent
608 | part
609 | participant
610 | particular
611 | particularly
612 | partner
613 | party
614 | pass
615 | past
616 | patient
617 | pattern
618 | pay
619 | peace
620 | people
621 | per
622 | perform
623 | performance
624 | perhaps
625 | period
626 | person
627 | personal
628 | phone
629 | physical
630 | pick
631 | picture
632 | piece
633 | place
634 | plan
635 | plant
636 | play
637 | player
638 | PM
639 | point
640 | police
641 | policy
642 | political
643 | politics
644 | poor
645 | popular
646 | population
647 | position
648 | positive
649 | possible
650 | power
651 | practice
652 | prepare
653 | present
654 | president
655 | pressure
656 | pretty
657 | prevent
658 | price
659 | private
660 | probably
661 | problem
662 | process
663 | produce
664 | product
665 | production
666 | professional
667 | professor
668 | program
669 | project
670 | property
671 | protect
672 | prove
673 | provide
674 | public
675 | pull
676 | purpose
677 | push
678 | put
679 | quality
680 | question
681 | quickly
682 | quite
683 | race
684 | radio
685 | raise
686 | range
687 | rate
688 | rather
689 | reach
690 | read
691 | ready
692 | real
693 | reality
694 | realize
695 | really
696 | reason
697 | receive
698 | recent
699 | recently
700 | recognize
701 | record
702 | red
703 | reduce
704 | reflect
705 | region
706 | relate
707 | relationship
708 | religious
709 | remain
710 | remember
711 | remove
712 | report
713 | represent
714 | Republican
715 | require
716 | research
717 | resource
718 | respond
719 | response
720 | responsibility
721 | rest
722 | result
723 | return
724 | reveal
725 | rich
726 | right
727 | rise
728 | risk
729 | road
730 | rock
731 | role
732 | room
733 | rule
734 | run
735 | safe
736 | same
737 | save
738 | say
739 | scene
740 | school
741 | science
742 | scientist
743 | score
744 | sea
745 | season
746 | seat
747 | second
748 | section
749 | security
750 | see
751 | seek
752 | seem
753 | sell
754 | send
755 | senior
756 | sense
757 | series
758 | serious
759 | serve
760 | service
761 | set
762 | seven
763 | several
764 | sex
765 | sexual
766 | shake
767 | share
768 | she
769 | shoot
770 | short
771 | shot
772 | should
773 | shoulder
774 | show
775 | side
776 | sign
777 | significant
778 | similar
779 | simple
780 | simply
781 | since
782 | sing
783 | single
784 | sister
785 | sit
786 | site
787 | situation
788 | six
789 | size
790 | skill
791 | skin
792 | small
793 | smile
794 | so
795 | social
796 | society
797 | soldier
798 | some
799 | somebody
800 | someone
801 | something
802 | sometimes
803 | son
804 | song
805 | soon
806 | sort
807 | sound
808 | source
809 | south
810 | southern
811 | space
812 | speak
813 | special
814 | specific
815 | speech
816 | spend
817 | sport
818 | spring
819 | staff
820 | stage
821 | stand
822 | standard
823 | star
824 | start
825 | state
826 | statement
827 | station
828 | stay
829 | step
830 | still
831 | stock
832 | stop
833 | store
834 | story
835 | strategy
836 | street
837 | strong
838 | structure
839 | student
840 | study
841 | stuff
842 | style
843 | subject
844 | success
845 | successful
846 | such
847 | suddenly
848 | suffer
849 | suggest
850 | summer
851 | support
852 | sure
853 | surface
854 | system
855 | table
856 | take
857 | talk
858 | task
859 | tax
860 | teach
861 | teacher
862 | team
863 | technology
864 | television
865 | tell
866 | ten
867 | tend
868 | term
869 | test
870 | than
871 | thank
872 | that
873 | the
874 | their
875 | them
876 | themselves
877 | then
878 | theory
879 | there
880 | these
881 | they
882 | thing
883 | think
884 | third
885 | this
886 | those
887 | though
888 | thought
889 | thousand
890 | threat
891 | three
892 | through
893 | throughout
894 | throw
895 | thus
896 | time
897 | to
898 | today
899 | together
900 | tonight
901 | too
902 | top
903 | total
904 | tough
905 | toward
906 | town
907 | trade
908 | traditional
909 | training
910 | travel
911 | treat
912 | treatment
913 | tree
914 | trial
915 | trip
916 | trouble
917 | true
918 | truth
919 | try
920 | turn
921 | TV
922 | two
923 | type
924 | under
925 | understand
926 | unit
927 | until
928 | up
929 | upon
930 | us
931 | use
932 | usually
933 | value
934 | various
935 | very
936 | victim
937 | view
938 | violence
939 | visit
940 | voice
941 | vote
942 | wait
943 | walk
944 | wall
945 | want
946 | war
947 | watch
948 | water
949 | way
950 | we
951 | weapon
952 | wear
953 | week
954 | weight
955 | well
956 | west
957 | western
958 | what
959 | whatever
960 | when
961 | where
962 | whether
963 | which
964 | while
965 | white
966 | who
967 | whole
968 | whom
969 | whose
970 | why
971 | wide
972 | wife
973 | will
974 | win
975 | wind
976 | window
977 | wish
978 | with
979 | within
980 | without
981 | woman
982 | wonder
983 | word
984 | work
985 | worker
986 | world
987 | worry
988 | would
989 | write
990 | writer
991 | wrong
992 | yard
993 | yeah
994 | year
995 | yes
996 | yet
997 | you
998 | young
999 | your
1000 | yourself
--------------------------------------------------------------------------------
/LinkDotNet.Benchmarks/Benchmarks.cs:
--------------------------------------------------------------------------------
1 | using BenchmarkDotNet.Running;
2 |
3 | namespace LinkDotNet.Benchmarks;
4 |
5 | internal static class Benchmarks
6 | {
7 | internal static void Main()
8 | {
9 | BenchmarkSwitcher.FromAssembly(typeof(Benchmarks).Assembly).Run();
10 | }
11 | }
--------------------------------------------------------------------------------
/LinkDotNet.Benchmarks/LinkDotNet.Benchmarks.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net6.0
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | PreserveNewest
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/LinkDotNet.Benchmarks/RopeConcatTests.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 | using BenchmarkDotNet.Attributes;
3 | using LinkDotNet.StringOperations.DataStructure;
4 |
5 | namespace LinkDotNet.Benchmarks;
6 |
7 | public class RopeConcatTests
8 | {
9 | [Benchmark(Baseline = true)]
10 | public void ConcatenateCLRStrings()
11 | {
12 | var clrString = "Test";
13 | for (var i = 0; i < 10000; i++)
14 | {
15 | clrString += $"some string{i}";
16 | }
17 | }
18 |
19 | [Benchmark]
20 | public void ConcatenateStringBuilder()
21 | {
22 | var stringBuilder = new StringBuilder();
23 | for (var i = 0; i < 10000; i++)
24 | {
25 | stringBuilder.Append($"some string{i}");
26 | }
27 | }
28 |
29 | [Benchmark]
30 | public void ConcatenateRope()
31 | {
32 | var rope = Rope.Create("Test");
33 | for (var i = 0; i < 10000; i++)
34 | {
35 | rope += $"some string{i}";
36 | }
37 | }
38 | }
--------------------------------------------------------------------------------
/LinkDotNet.Benchmarks/SearchTests.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 | using System.Linq;
3 | using BenchmarkDotNet.Attributes;
4 | using LinkDotNet.StringOperations.Search;
5 |
6 | namespace LinkDotNet.Benchmarks;
7 |
8 | public class SearchTests
9 | {
10 | private const string Text = "The quick brown fox jumps over the lazy dog maybe also a cat a sheep and another dog";
11 | private const string Word = "dog";
12 |
13 | [Benchmark]
14 | public bool KnuthMorrisPrattContains() => KnuthMorrisPratt.HasPattern(Text, Word);
15 |
16 | [Benchmark]
17 | public bool BoyerMooreContains() => BoyerMoore.HasPattern(Text, Word);
18 |
19 | [Benchmark]
20 | public bool ZAlgorithmContains() => ZAlgorithm.HasPattern(Text, Word);
21 |
22 | [Benchmark]
23 | public IList KnuthMorrisPrattPrattFindAll() => KnuthMorrisPratt.FindAll(Text, Word).ToList();
24 |
25 | [Benchmark]
26 | public IList BoyerMooreFindAll() => BoyerMoore.FindAll(Text, Word).ToList();
27 |
28 | [Benchmark]
29 | public IList ZAlgorithmFindAll() => ZAlgorithm.FindAll(Text, Word).ToList();
30 |
31 |
32 | }
--------------------------------------------------------------------------------
/LinkDotNet.Benchmarks/TrieVsHashSet.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 | using System.IO;
3 | using System.Linq;
4 | using BenchmarkDotNet.Attributes;
5 | using LinkDotNet.StringOperations.DataStructure;
6 |
7 | namespace LinkDotNet.Benchmarks;
8 |
9 | [MemoryDiagnoser]
10 | public class TrieVsHashSet
11 | {
12 | private readonly HashSet _hashSet = new();
13 | private readonly Trie _trie = new();
14 |
15 | [GlobalSetup]
16 | public void Setup()
17 | {
18 | var wordsToAdd = File.ReadAllLines("1000words.txt");
19 |
20 | foreach (var word in wordsToAdd)
21 | {
22 | _hashSet.Add(word);
23 | _trie.Add(word);
24 | }
25 | }
26 |
27 | [Benchmark]
28 | public IList FindAllInHashSet() => _hashSet.Where(h => h.StartsWith("Hel")).ToList();
29 |
30 | [Benchmark]
31 | public IList FindAllInTrie() => _trie.GetWordsWithPrefix("Hel").ToList();
32 |
33 | [Benchmark]
34 | public bool FindOneInHashSet() => _hashSet.Any(h => h == "happy");
35 |
36 | [Benchmark]
37 | public bool FindOneInTrie() => _trie.Find("happy");
38 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations.UnitTests/Compression/LempelZivWelchTests.cs:
--------------------------------------------------------------------------------
1 | using LinkDotNet.StringOperations.Compression;
2 | using Xunit;
3 |
4 | namespace LinkDotNet.StringOperations.UnitTests.Compression;
5 |
6 | public class LempelZivWelchTests
7 | {
8 | [Fact]
9 | public void ShouldEncodeAndDecode()
10 | {
11 | const string sentence = "Hey my name is Steven";
12 | var encoded = LempelZivWelch.Encode(sentence);
13 |
14 | var output = LempelZivWelch.Decode(encoded);
15 |
16 | Assert.Equal(sentence, output);
17 | }
18 |
19 | [Fact]
20 | public void ShouldCompressText()
21 | {
22 | const string sentence = "Here is your text, which consists out of multiple words. They, the words, can appear again";
23 | var output = LempelZivWelch.Encode(sentence);
24 |
25 | Assert.True(output.Length < sentence.Length);
26 | }
27 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations.UnitTests/DataStructure/RopeTests.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using LinkDotNet.StringOperations.DataStructure;
3 | using Xunit;
4 |
5 | namespace LinkDotNet.StringOperations.UnitTests.DataStructure;
6 |
7 | public class RopeTests
8 | {
9 | [Fact]
10 | public void ShouldCreateAndDisplayRope()
11 | {
12 | const string sentence = "Hello_my_name_is_Simon";
13 | var rope = Rope.Create(sentence, 4);
14 |
15 | var output = rope.ToString();
16 |
17 | Assert.Equal(sentence, output);
18 | }
19 |
20 | [Fact]
21 | public void ShouldConcat()
22 | {
23 | var left = Rope.Create("Hello");
24 | var right = Rope.Create("World");
25 |
26 | var concat = (left + right).ToString();
27 |
28 | Assert.Equal("HelloWorld", concat);
29 | }
30 |
31 | [Fact]
32 | public void ShouldConcatWithStrings()
33 | {
34 | var left = Rope.Create("Hello");
35 | var right = Rope.Create("World");
36 |
37 | var first = left + "World";
38 | var second = "Hello" + right;
39 |
40 | Assert.Equal("HelloWorld", first.ToString());
41 | Assert.Equal("HelloWorld", second.ToString());
42 | }
43 |
44 | [Fact]
45 | public void ShouldGetIndex()
46 | {
47 | const string text = "0123456789";
48 | var rope = Rope.Create(text, 2);
49 |
50 | Assert.Equal(text[5], rope[5]);
51 | }
52 |
53 | [Fact]
54 | public void ShouldGetIndexAfterRebalance()
55 | {
56 | var rope1 = Rope.Create("012");
57 | var rope2 = Rope.Create("345");
58 | var rope = rope1 + rope2;
59 |
60 | var index = rope[3];
61 |
62 | Assert.Equal('3', index);
63 | }
64 |
65 | [Theory]
66 | [InlineData("HelloWorld", 4, "Hello", "World")]
67 | [InlineData("HelloWorld", 5, "HelloW", "orld")]
68 | [InlineData("HelloWorld", 6, "HelloWo", "rld")]
69 | [InlineData("0123456789", 2, "012", "3456789")]
70 | [InlineData("0123456789", 8, "012345678", "9")]
71 | [InlineData("0123456789", 0, "0", "123456789")]
72 | public void ShouldSplitRope(string word, int indexToSplit, string expectedLeftSide, string expectedRightSide)
73 | {
74 | var rope = Rope.Create(word);
75 |
76 | var splitPair = rope.Split(indexToSplit);
77 |
78 | Assert.Equal(expectedLeftSide, splitPair.Item1.ToString());
79 | Assert.Equal(expectedRightSide, splitPair.Item2.ToString());
80 | }
81 |
82 | [Fact]
83 | public void ShouldThrowExceptionWhenNegativeIndex()
84 | {
85 | Assert.Throws(() => Rope.Create("a").Split(-1));
86 | }
87 |
88 | [Fact]
89 | public void ShouldReturnLeftPartWhenCompleteLength()
90 | {
91 | var rope = Rope.Create("01234567");
92 |
93 | var pair = rope.Split(7);
94 |
95 | Assert.Equal("01234567", pair.Item1.ToString());
96 | Assert.Null(pair.Item2);
97 | }
98 |
99 | [Fact]
100 | public void ShouldInsertRope()
101 | {
102 | var rope1 = Rope.Create("Hello World");
103 | var rope2 = Rope.Create(" dear");
104 |
105 | var newRope = rope1.Insert(rope2, 4);
106 |
107 | Assert.Equal("Hello dear World", newRope.ToString());
108 | }
109 |
110 | [Fact]
111 | public void ShouldSplitAfterConcat()
112 | {
113 | var split = (Rope.Create("Hello") + "World").Split(6);
114 |
115 | Assert.Equal("HelloWo", split.Item1.ToString());
116 | Assert.Equal("rld", split.Item2.ToString());
117 | }
118 |
119 | [Fact]
120 | public void ShouldDelete()
121 | {
122 | var rope = Rope.Create("0123456789");
123 |
124 | var newRope = rope.Delete(3, 3);
125 |
126 | Assert.Equal("0126789", newRope.ToString());
127 | }
128 |
129 | [Fact]
130 | public void ShouldHavePositiveIndexAndLength()
131 | {
132 | var rope = Rope.Create("1");
133 |
134 | Assert.Throws(() => rope.Delete(-1, 2));
135 | Assert.Throws(() => rope.Delete(1, 0));
136 | }
137 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations.UnitTests/DataStructure/TrieTests.cs:
--------------------------------------------------------------------------------
1 | using System.Linq;
2 | using LinkDotNet.StringOperations.DataStructure;
3 | using Xunit;
4 |
5 | namespace LinkDotNet.StringOperations.UnitTests.DataStructure;
6 |
7 | public class TrieTests
8 | {
9 | [Theory]
10 | [InlineData("csharp", "csharp", false, true)]
11 | [InlineData("cccc", "ccccc", false, false)]
12 | [InlineData("words", "word", false, false)]
13 | [InlineData("WOrd", "word", true, true)]
14 | [InlineData("Word", "", true, false)]
15 | [InlineData("Word", null, true, false)]
16 | public void ShouldFindEntries(string wordToAdd, string wordToSearch, bool ignoreCase, bool expectedHit)
17 | {
18 | var trie = new Trie(ignoreCase);
19 | trie.Add(wordToAdd);
20 |
21 | var actualHit = trie.Find(wordToSearch);
22 |
23 | Assert.Equal(expectedHit, actualHit);
24 | }
25 |
26 | [Fact]
27 | public void GivenMultipleWords_ShouldFindNotSubstring()
28 | {
29 | var trie = new Trie();
30 | trie.Add("abcde");
31 | trie.Add("abcdefg");
32 | trie.Add("efgh");
33 |
34 | var hasHit = trie.Find("efg");
35 |
36 | Assert.False(hasHit);
37 | }
38 |
39 | [Theory]
40 | [InlineData("text", "te", false, true)]
41 | [InlineData("Text", "tE", true, true)]
42 | [InlineData("Word", "", true, false)]
43 | [InlineData("Word", null, true, false)]
44 | [InlineData("word", "word", false, true)]
45 | [InlineData("word", "words", false, false)]
46 | [InlineData("word", "odr", false, false)]
47 | public void ShouldStartsWithEntries(string wordToAdd, string wordToSearch, bool ignoreCase, bool expectedHit)
48 | {
49 | var trie = new Trie(ignoreCase);
50 | trie.Add(wordToAdd);
51 |
52 | var actualHit = trie.StartsWith(wordToSearch);
53 |
54 | Assert.Equal(expectedHit, actualHit);
55 | }
56 |
57 | [Fact]
58 | public void GivenMultipleWords_ShouldNotFindStartsWithSubstring()
59 | {
60 | var trie = new Trie();
61 | trie.Add("abcde");
62 | trie.Add("abcdefg");
63 | trie.Add("efgh");
64 |
65 | var hasHit = trie.StartsWith("def");
66 |
67 | Assert.False(hasHit);
68 | }
69 |
70 | [Fact]
71 | public void ShouldReturnAllWordsWithStartingPrefix()
72 | {
73 | var trie = new Trie();
74 | trie.Add("Hello");
75 | trie.Add("Helsinki");
76 |
77 | var hits = trie.GetWordsWithPrefix("Hel").ToList();
78 |
79 | Assert.Equal(2, hits.Count);
80 | }
81 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations.UnitTests/EditDistance/EditDistancesTests.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Linq;
3 | using LinkDotNet.StringOperations.EditDistance;
4 | using Xunit;
5 |
6 | namespace LinkDotNet.StringOperations.UnitTests.EditDistance;
7 |
8 | public class EditDistancesTests
9 | {
10 | [Theory]
11 | [InlineData("Hello", "Hallo", false, "Hllo")]
12 | [InlineData("HeLlO", "hallo", true, "HLlO")]
13 | [InlineData("Hello", "hel", true, "Hel")]
14 | [InlineData("abc", "cbe", false, "b")]
15 | [InlineData("", "", false, "")]
16 | [InlineData("Test", "", false, "")]
17 | public void CheckLongestCommonSubsequent(string one, string two, bool ignoreCase, string expected)
18 | {
19 | var actual = one.GetLongestCommonSubsequence(two, ignoreCase);
20 |
21 | Assert.Equal(expected, actual);
22 | }
23 |
24 | [Fact]
25 | public void ReturnNullOnNullWhen_WhenCallingGetLongestCommonSubsequent()
26 | {
27 | Assert.Null("string".GetLongestCommonSubsequence(null));
28 | }
29 |
30 | [Fact]
31 | public void ShouldReturnNull_WhenNullValueForLongestCommonSubsequence()
32 | {
33 | Assert.Null("test".GetLongestCommonSubsequence(null));
34 | Assert.Null(((string)null).GetLongestCommonSubsequence("null"));
35 | }
36 |
37 | [Theory]
38 | [InlineData("Hallo", "Hello", false, 1)]
39 | [InlineData("hALLO", "Hello", true, 1)]
40 | [InlineData("", "Hello", false, 5)]
41 | [InlineData("Hallo", "", false, 5)]
42 | [InlineData("olleH", "Hello", false, 4)]
43 | [InlineData("ABCDEF", "abcdef", false, 6)]
44 | public void CheckLevenshteinDistance(string one, string two, bool ignoreCase, int expectedDistance)
45 | {
46 | var actual = one.GetLevenshteinDistance(two, ignoreCase);
47 |
48 | Assert.Equal(expectedDistance, actual);
49 | }
50 |
51 | [Fact]
52 | public void ShouldReturn_WhenAbortCostHasReached()
53 | {
54 | const int abortCost = 3;
55 | var cost = "ABCDEFGHIKLMN".GetLevenshteinDistance("abcdefghijlkm", abortCost: abortCost);
56 |
57 | Assert.Equal(cost, abortCost);
58 | }
59 |
60 | [Fact]
61 | public void ShouldThrow_WhenNullValueForLevenshtein()
62 | {
63 | Assert.Throws(() => "test".GetLevenshteinDistance(null));
64 | Assert.Throws(() => ((string) null).GetLevenshteinDistance("Test"));
65 | }
66 |
67 | [Theory]
68 | [InlineData("ThatIsAWord", "Word", false, "Word")]
69 | [InlineData("WordLonger", "LongerWord", false, "Longer")]
70 | public void CheckLongestSubstring(string one, string two, bool ignoreCase, string expectedSubstring)
71 | {
72 | var longestCommonSubstring = one.GetLongestCommonSubstring(two, ignoreCase);
73 |
74 | Assert.Equal(expectedSubstring, longestCommonSubstring);
75 | }
76 |
77 | [Fact]
78 | public void ShouldReturnNull_WhenNullValueForLongestCommonSubstring()
79 | {
80 | Assert.Null("test".GetLongestCommonSubstring(null));
81 | Assert.Null(((string)null).GetLongestCommonSubstring("null"));
82 | }
83 |
84 | [Theory]
85 | [InlineData("Hallo", "Hello", false, 1)]
86 | [InlineData("a", "abc", false, 0)]
87 | [InlineData("abc", "a", false, 2)]
88 | [InlineData("ABC", "abc", true, 0)]
89 | [InlineData("ABC", "abc", false, 3)]
90 | public void ShouldCalculateHammingDistance(string one, string two, bool ignoreCase, int expectedCost)
91 | {
92 | var actualCost = one.GetHammingDistance(two, ignoreCase);
93 |
94 | Assert.Equal(expectedCost, actualCost);
95 | }
96 |
97 | [Fact]
98 | public void ShouldThrow_WhenNullValueForHammingDistance()
99 | {
100 | Assert.Throws(() => "test".GetHammingDistance(null));
101 | Assert.Throws(() => ((string) null).GetHammingDistance("Test"));
102 | }
103 |
104 | [Fact]
105 | public void ShouldGetClosestWords()
106 | {
107 | var actual = "Hallo".GetClosestWords(2, false, "Hallo", "Auto", "Something else", "Haribo");
108 |
109 | Assert.NotNull(actual);
110 | var collection = actual.ToArray();
111 | Assert.NotEmpty(collection);
112 | Assert.Equal(2, collection.Length);
113 | Assert.Equal("Hallo", collection[0]);
114 | Assert.Equal("Haribo", collection[1]);
115 | }
116 |
117 | [Fact]
118 | public void ShouldReturnEmptyArrayWhenNoInput()
119 | {
120 | var actual = ((string) null).GetClosestWords(1, false, "H");
121 |
122 | Assert.Empty(actual);
123 | }
124 |
125 | [Fact]
126 | public void ShouldReturnEmptyArrayWhenWordsEmpty()
127 | {
128 | var actual = "Test".GetClosestWords(1, false);
129 |
130 | Assert.Empty(actual);
131 | }
132 |
133 | [Fact]
134 | public void ShouldCheckIfWordIsNull()
135 | {
136 | var actual = "Hallo".GetClosestWords(2, false, "Hallo", null).ToArray();
137 |
138 | Assert.Single(actual);
139 | Assert.Equal("Hallo", actual[0]);
140 | }
141 |
142 | [Fact]
143 | public void ShouldGetClosestWord()
144 | {
145 | var actual = "Hallo".GetClosestWord(false, "Hello", "Helbo");
146 |
147 | Assert.Equal("Hello", actual);
148 | }
149 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations.UnitTests/LinkDotNet.StringOperations.UnitTests.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net6.0
5 |
6 | false
7 |
8 |
9 |
10 |
11 |
12 |
13 | runtime; build; native; contentfiles; analyzers; buildtransitive
14 | all
15 |
16 |
17 | runtime; build; native; contentfiles; analyzers; buildtransitive
18 | all
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations.UnitTests/Search/BoyerMooreTests.cs:
--------------------------------------------------------------------------------
1 | using System.Linq;
2 | using LinkDotNet.StringOperations.Search;
3 | using Xunit;
4 |
5 | namespace LinkDotNet.StringOperations.UnitTests.Search;
6 |
7 | public class BoyerMooreTests
8 | {
9 | [Fact]
10 | public void ShouldFindAllOccurrences()
11 | {
12 | const string text = "That is my text with the word text 3 times. That is why text again";
13 | const string pattern = "Text";
14 |
15 | var occurrences = BoyerMoore.FindAll(text, pattern, true).ToList();
16 |
17 | Assert.Equal(3, occurrences.Count);
18 | Assert.Equal(11, occurrences[0]);
19 | Assert.Equal(30, occurrences[1]);
20 | Assert.Equal(56, occurrences[2]);
21 | }
22 |
23 | [Fact]
24 | public void DoNotGoOutOfBounds()
25 | {
26 | const string text = "The quick brown fox jumps over the lazy dog maybe also a cat a sheep and another dog";
27 | const string word = "dog";
28 |
29 | var occurrences = BoyerMoore.FindAll(text, word).ToList();
30 |
31 | Assert.Equal(2, occurrences.Count);
32 | }
33 |
34 | [Fact]
35 | public void ShouldAbortOnFirstOccurence()
36 | {
37 | const string text = "That is my text with the word text 3 times. That is why text again";
38 | const string pattern = "Text";
39 |
40 | var occurrences = BoyerMoore.FindAll(text, pattern, true, true).ToList();
41 |
42 | Assert.Single(occurrences);
43 | Assert.Equal(11, occurrences[0]);
44 | }
45 |
46 | [Theory]
47 | [InlineData(null, "null")]
48 | [InlineData("null", null)]
49 | [InlineData("", "null")]
50 | [InlineData("null", "")]
51 | public void ShouldReturnEmptyOccurrences_WhenGivenNullOrEmpty(string text, string pattern)
52 | {
53 | var occurrences = BoyerMoore.FindAll(text, pattern);
54 |
55 | Assert.Empty(occurrences);
56 | }
57 |
58 | [Fact]
59 | public void GivenNoHit_ThenEmptyArray()
60 | {
61 | var occurrences = BoyerMoore.FindAll("Word", "Text");
62 |
63 | Assert.Empty(occurrences);
64 | }
65 |
66 | [Fact]
67 | public void GivenPatternLongerThanText_EmptyArray()
68 | {
69 | var occurrences = BoyerMoore.FindAll("t", "longer").ToList();
70 |
71 | Assert.Empty(occurrences);
72 | }
73 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations.UnitTests/Search/KnuthMorrisPrattTests.cs:
--------------------------------------------------------------------------------
1 | using System.Linq;
2 | using LinkDotNet.StringOperations.Search;
3 | using Xunit;
4 |
5 | namespace LinkDotNet.StringOperations.UnitTests.Search;
6 |
7 | public class KnuthMorrisPrattTests
8 | {
9 | [Fact]
10 | public void ShouldFindAllOccurrences()
11 | {
12 | const string text = "That is my text with the word text 3 times. That is why text again";
13 | const string pattern = "Text";
14 |
15 | var occurrences = KnuthMorrisPratt.FindAll(text, pattern, true).ToList();
16 |
17 | Assert.Equal(3, occurrences.Count);
18 | Assert.Equal(11, occurrences[0]);
19 | Assert.Equal(30, occurrences[1]);
20 | Assert.Equal(56, occurrences[2]);
21 | }
22 |
23 | [Fact]
24 | public void ShouldAbortOnFirstOccurence()
25 | {
26 | const string text = "That is my text with the word text 3 times. That is why text again";
27 | const string pattern = "Text";
28 |
29 | var occurrences = KnuthMorrisPratt.FindAll(text, pattern, true, true).ToList();
30 |
31 | Assert.Single(occurrences);
32 | Assert.Equal(11, occurrences[0]);
33 | }
34 |
35 | [Theory]
36 | [InlineData(null, "null")]
37 | [InlineData("null", null)]
38 | [InlineData("", "null")]
39 | [InlineData("null", "")]
40 | public void ShouldReturnEmptyOccurrences_WhenGivenNullOrEmpty(string text, string pattern)
41 | {
42 | var occurrences = KnuthMorrisPratt.FindAll(text, pattern);
43 |
44 | Assert.Empty(occurrences);
45 | }
46 |
47 | [Fact]
48 | public void ShouldReturnIfOccurrenceInText()
49 | {
50 | var occurrence = KnuthMorrisPratt.HasPattern("KnuthMorrisPratt", "t");
51 |
52 | Assert.True(occurrence);
53 | }
54 |
55 | [Fact]
56 | public void GivenNoHit_ThenEmptyArray()
57 | {
58 | var occurrences = KnuthMorrisPratt.FindAll("Word", "Text");
59 |
60 | Assert.Empty(occurrences);
61 | }
62 |
63 | [Fact]
64 | public void GivenPatternLongerThanText_EmptyArray()
65 | {
66 | var hasHit = KnuthMorrisPratt.HasPattern("t", "longer");
67 |
68 | Assert.False(hasHit);
69 | }
70 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations.UnitTests/Search/ZAlgorithmTests.cs:
--------------------------------------------------------------------------------
1 | using System.Linq;
2 | using LinkDotNet.StringOperations.Search;
3 | using Xunit;
4 |
5 | namespace LinkDotNet.StringOperations.UnitTests.Search;
6 |
7 | public class ZAlgorithmTests
8 | {
9 | [Fact]
10 | public void ShouldFindAllOccurrences()
11 | {
12 | const string text = "That is my text with the word text 3 times. That is why text again";
13 | const string pattern = "Text";
14 |
15 | var occurrences = ZAlgorithm.FindAll(text, pattern, true).ToList();
16 |
17 | Assert.Equal(3, occurrences.Count);
18 | Assert.Equal(11, occurrences[0]);
19 | Assert.Equal(30, occurrences[1]);
20 | Assert.Equal(56, occurrences[2]);
21 | }
22 |
23 | [Fact]
24 | public void ShouldAbortOnFirstOccurence()
25 | {
26 | const string text = "That is my text with the word text 3 times. That is why text again";
27 | const string pattern = "Text";
28 |
29 | var occurrences = ZAlgorithm.FindAll(text, pattern, true, true).ToList();
30 |
31 | Assert.Single(occurrences);
32 | Assert.Equal(11, occurrences[0]);
33 | }
34 |
35 | [Theory]
36 | [InlineData(null, "null")]
37 | [InlineData("null", null)]
38 | [InlineData("", "null")]
39 | [InlineData("null", "")]
40 | public void ShouldReturnEmptyOccurrences_WhenGivenNullOrEmpty(string text, string pattern)
41 | {
42 | var occurrences = ZAlgorithm.FindAll(text, pattern);
43 |
44 | Assert.Empty(occurrences);
45 | }
46 |
47 | [Fact]
48 | public void ShouldReturnIfOccurrenceInText()
49 | {
50 | var occurrence = ZAlgorithm.HasPattern("KnuthMorrisPratt", "t");
51 |
52 | Assert.True(occurrence);
53 | }
54 |
55 | [Fact]
56 | public void GivenNoHit_ThenEmptyArray()
57 | {
58 | var occurrences = ZAlgorithm.FindAll("Word", "Text");
59 |
60 | Assert.Empty(occurrences);
61 | }
62 |
63 | [Fact]
64 | public void GivenPatternLongerThanText_EmptyArray()
65 | {
66 | var hasHit = ZAlgorithm.HasPattern("t", "longer");
67 |
68 | Assert.False(hasHit);
69 | }
70 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LinkDotNet.StringOperations", "LinkDotNet.StringOperations\LinkDotNet.StringOperations.csproj", "{B5F2AC91-9F3F-481A-81F3-0FB56B88FBF9}"
4 | EndProject
5 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LinkDotNet.StringOperations.UnitTests", "LinkDotNet.StringOperations.UnitTests\LinkDotNet.StringOperations.UnitTests.csproj", "{CC119FDB-AE2C-4A0A-9224-6BC1B7C0E9A4}"
6 | EndProject
7 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LinkDotNet.Benchmarks", "LinkDotNet.Benchmarks\LinkDotNet.Benchmarks.csproj", "{AE6934D7-4CC0-4B77-B8A6-8C6195BA9500}"
8 | EndProject
9 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{9F4AB6BB-F2AA-4C44-B99D-86CB1FFEFDF2}"
10 | ProjectSection(SolutionItems) = preProject
11 | README.md = README.md
12 | EndProjectSection
13 | EndProject
14 | Global
15 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
16 | Debug|Any CPU = Debug|Any CPU
17 | Release|Any CPU = Release|Any CPU
18 | EndGlobalSection
19 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
20 | {B5F2AC91-9F3F-481A-81F3-0FB56B88FBF9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21 | {B5F2AC91-9F3F-481A-81F3-0FB56B88FBF9}.Debug|Any CPU.Build.0 = Debug|Any CPU
22 | {B5F2AC91-9F3F-481A-81F3-0FB56B88FBF9}.Release|Any CPU.ActiveCfg = Release|Any CPU
23 | {B5F2AC91-9F3F-481A-81F3-0FB56B88FBF9}.Release|Any CPU.Build.0 = Release|Any CPU
24 | {CC119FDB-AE2C-4A0A-9224-6BC1B7C0E9A4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
25 | {CC119FDB-AE2C-4A0A-9224-6BC1B7C0E9A4}.Debug|Any CPU.Build.0 = Debug|Any CPU
26 | {CC119FDB-AE2C-4A0A-9224-6BC1B7C0E9A4}.Release|Any CPU.ActiveCfg = Release|Any CPU
27 | {CC119FDB-AE2C-4A0A-9224-6BC1B7C0E9A4}.Release|Any CPU.Build.0 = Release|Any CPU
28 | {AE6934D7-4CC0-4B77-B8A6-8C6195BA9500}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
29 | {AE6934D7-4CC0-4B77-B8A6-8C6195BA9500}.Debug|Any CPU.Build.0 = Debug|Any CPU
30 | {AE6934D7-4CC0-4B77-B8A6-8C6195BA9500}.Release|Any CPU.ActiveCfg = Release|Any CPU
31 | {AE6934D7-4CC0-4B77-B8A6-8C6195BA9500}.Release|Any CPU.Build.0 = Release|Any CPU
32 | EndGlobalSection
33 | EndGlobal
34 |
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations/Compression/LempelZivWelch.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Collections.Immutable;
4 | using System.Text;
5 |
6 | namespace LinkDotNet.StringOperations.Compression;
7 |
8 | public static class LempelZivWelch
9 | {
10 | private const int AlphabetSize = 256;
11 |
12 | public static ImmutableArray Encode(ReadOnlySpan text)
13 | {
14 | if (text.IsEmpty)
15 | {
16 | return new ImmutableArray();
17 | }
18 |
19 | var table = CreateEncodeTable();
20 |
21 | var code = AlphabetSize;
22 | var outputArray = new List();
23 | var p = text[0].ToString();
24 | var c = string.Empty;
25 |
26 | for (var i = 0; i < text.Length; i++)
27 | {
28 | if (i != text.Length - 1)
29 | {
30 | c += text[i + 1];
31 | }
32 |
33 | var isPatternKnown = table.ContainsKey(p + c);
34 | if (isPatternKnown)
35 | {
36 | p += c;
37 | }
38 | else
39 | {
40 | AddNewCombinationToDictionary();
41 | }
42 |
43 | c = string.Empty;
44 | }
45 |
46 | outputArray.Add(table[p]);
47 |
48 | return outputArray.ToImmutableArray();
49 |
50 | void AddNewCombinationToDictionary()
51 | {
52 | outputArray.Add(table[p]);
53 | table[p + c] = code;
54 | code++;
55 | p = c;
56 | }
57 | }
58 |
59 | public static string Decode(ImmutableArray decodedText)
60 | {
61 | var table = CreateDecodeTable();
62 | var decodedTextSpan = decodedText.AsSpan();
63 | var current = decodedTextSpan[0];
64 | var outputString = new StringBuilder();
65 | var decodedSubString = table[current];
66 | outputString.Append(decodedSubString);
67 | var c = decodedSubString[0].ToString();
68 | var count = AlphabetSize;
69 |
70 | for (var i = 0; i < decodedTextSpan.Length - 1; i++)
71 | {
72 | var code = decodedTextSpan[i + 1];
73 |
74 | if (!table.ContainsKey(code))
75 | {
76 | decodedSubString = table[current] + c;
77 | }
78 | else
79 | {
80 | decodedSubString = table[code];
81 | }
82 |
83 | outputString.Append(decodedSubString);
84 | c = decodedSubString[0].ToString();
85 | table[count] = table[current] + c;
86 | count++;
87 | current = code;
88 | }
89 |
90 | return outputString.ToString();
91 | }
92 |
93 | private static Dictionary CreateEncodeTable()
94 | {
95 | var dictionary = new Dictionary();
96 |
97 | for (var i = 0; i < AlphabetSize; i++)
98 | {
99 | dictionary[((char)i).ToString()] = i;
100 | }
101 |
102 | return dictionary;
103 | }
104 |
105 | private static Dictionary CreateDecodeTable()
106 | {
107 | var dictionary = new Dictionary();
108 |
109 | for (var i = 0; i < AlphabetSize; i++)
110 | {
111 | dictionary[i] = ((char)i).ToString();
112 | }
113 |
114 | return dictionary;
115 | }
116 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations/DataStructure/Rope.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Text;
3 |
4 | namespace LinkDotNet.StringOperations.DataStructure;
5 |
6 | public class Rope
7 | {
8 | private string _fragment;
9 | private bool _hasToRecalculateWeights;
10 | private Rope _left;
11 | private Rope _right;
12 | private int _weight;
13 |
14 | private Rope() {}
15 |
16 | public char this[int index] => GetIndex(index);
17 |
18 | public Tuple Split(int index)
19 | {
20 | if (index < 0)
21 | {
22 | throw new ArgumentOutOfRangeException(nameof(index), "Index can't be negative");
23 | }
24 |
25 | CheckRecalculation();
26 |
27 | return SplitRope(this, index);
28 |
29 | static Tuple SplitRope(Rope node, int index)
30 | {
31 | if (node._left == null)
32 | {
33 | if (index == node._weight - 1)
34 | {
35 | return new Tuple(node, null);
36 | }
37 |
38 | var item1 = Create(node._fragment.ToCharArray()[..(index+1)]);
39 | var item2 = Create(node._fragment.ToCharArray()[(index+1)..node._weight]);
40 | return new Tuple(item1, item2);
41 | }
42 |
43 | if (index == node._weight - 1)
44 | {
45 | return new Tuple(node._left, node._right);
46 | }
47 |
48 | if (index < node._weight)
49 | {
50 | var splitLeftSide = SplitRope(node._left, index);
51 | return new Tuple(splitLeftSide.Item1, splitLeftSide.Item2 + node._right);
52 | }
53 |
54 | var splitRightSide = SplitRope(node._right, index - node._weight);
55 | return new Tuple(node._left + splitRightSide.Item1, splitRightSide.Item2);
56 | }
57 | }
58 |
59 | public Rope Insert(Rope other, int index)
60 | {
61 | var pair = Split(index);
62 | var left = pair.Item1 + other;
63 | return pair.Item2 != null ? left + pair.Item2 : left;
64 | }
65 |
66 | public Rope Delete(int startIndex, int length)
67 | {
68 | if (startIndex < 0)
69 | {
70 | throw new ArgumentOutOfRangeException(nameof(startIndex), "Starting index can't be negative");
71 | }
72 |
73 | if (length <= 0)
74 | {
75 | throw new ArgumentOutOfRangeException(nameof(length), "Length has to be bigger 0");
76 | }
77 |
78 | CheckRecalculation();
79 |
80 | var beforeStartIndex = Split(startIndex - 1).Item1;
81 | var afterStartPlusLength = Split(startIndex + length - 1).Item2;
82 |
83 | return beforeStartIndex + afterStartPlusLength;
84 | }
85 |
86 | public override string ToString()
87 | {
88 | var stringBuilder = new StringBuilder();
89 | AppendStrings(this, stringBuilder);
90 |
91 | return stringBuilder.ToString();
92 | }
93 |
94 | public static Rope operator +(Rope left, Rope right)
95 | {
96 | return Concat(left, right);
97 | }
98 |
99 | public static Rope operator +(Rope left, ReadOnlySpan right)
100 | {
101 | var rightRope = Create(right);
102 |
103 | return left + rightRope;
104 | }
105 |
106 | public static Rope operator +(ReadOnlySpan left, Rope right)
107 | {
108 | var leftRope = Create(left);
109 |
110 | return leftRope + right;
111 | }
112 |
113 | public static Rope Concat(Rope left, Rope right, bool recalculateWeights = false)
114 | {
115 | var rope = new Rope { _left = left, _right = right, _hasToRecalculateWeights = true };
116 |
117 | if (recalculateWeights)
118 | {
119 | rope.CalculateAndSetWeight();
120 | }
121 |
122 | return rope;
123 | }
124 |
125 | public static Rope Create(ReadOnlySpan text, int leafLength = 8)
126 | {
127 | return CreateInternal(text, leafLength, 0 , text.Length - 1);
128 | }
129 |
130 | private static Rope CreateInternal(ReadOnlySpan text, int leafLength, int leftIndex, int rightIndex)
131 | {
132 | var node = new Rope();
133 |
134 | if (rightIndex - leftIndex > leafLength)
135 | {
136 | var center = (rightIndex + leftIndex + 1) / 2;
137 | node._left = CreateInternal(text, leafLength, leftIndex, center);
138 | node._right = CreateInternal(text, leafLength, center + 1, rightIndex);
139 | }
140 | else
141 | {
142 | var rightIndexInclusiveUpperBound = rightIndex + 1;
143 | node._fragment = text[leftIndex .. rightIndexInclusiveUpperBound].ToString();
144 | }
145 |
146 | node.CalculateAndSetWeight();
147 |
148 | return node;
149 | }
150 |
151 | private static void AppendStrings(Rope node, StringBuilder builder)
152 | {
153 | if (node == null)
154 | {
155 | return;
156 | }
157 |
158 | if (node._left == null && node._right == null)
159 | {
160 | builder.Append(node._fragment);
161 | }
162 |
163 | AppendStrings(node._left, builder);
164 | AppendStrings(node._right, builder);
165 | }
166 |
167 | private char GetIndex(int index)
168 | {
169 | if (_hasToRecalculateWeights)
170 | {
171 | CheckRecalculation();
172 | }
173 |
174 | return GetIndexInternal(this, index);
175 |
176 | static char GetIndexInternal(Rope node, int index)
177 | {
178 | if (node._weight <= index && node._right != null)
179 | {
180 | return GetIndexInternal(node._right, index - node._weight);
181 | }
182 |
183 | if (node._left != null)
184 | {
185 | return GetIndexInternal(node._left, index);
186 | }
187 |
188 | return node._fragment[index];
189 | }
190 | }
191 |
192 | private void CalculateAndSetWeight()
193 | {
194 | _weight = _left == null ? _fragment.Length : GetWeightInternal(_left);
195 |
196 | static int GetWeightInternal(Rope node)
197 | {
198 | if (node._left != null && node._right != null)
199 | {
200 | return GetWeightInternal(node._left) + GetWeightInternal(node._right);
201 | }
202 |
203 | return node._left != null ? GetWeightInternal(node._left) : node._fragment.Length;
204 | }
205 | }
206 |
207 | private void CheckRecalculation()
208 | {
209 | if (_hasToRecalculateWeights)
210 | {
211 | CalculateAndSetWeight();
212 | _hasToRecalculateWeights = false;
213 | }
214 | }
215 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations/DataStructure/Trie.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 |
5 | namespace LinkDotNet.StringOperations.DataStructure;
6 |
7 | public class Trie
8 | {
9 | private IDictionary Children { get; set; } = new Dictionary();
10 | private bool _isLeaf;
11 | private readonly bool _ignoreCase;
12 |
13 | public Trie() : this(false)
14 | {
15 | }
16 |
17 | public Trie(bool ignoreCase)
18 | {
19 | _ignoreCase = ignoreCase;
20 | }
21 |
22 | public void Add(ReadOnlySpan word)
23 | {
24 | var current = Children;
25 | for (var i = 0; i < word.Length; i++)
26 | {
27 | var currentCharacter = _ignoreCase ? char.ToUpperInvariant(word[i]) : word[i];
28 |
29 | var node = CreateOrGetNode(currentCharacter, current);
30 | current = node.Children;
31 |
32 | if (i == word.Length - 1)
33 | {
34 | node._isLeaf = true;
35 | }
36 | }
37 | }
38 |
39 | public bool Find(ReadOnlySpan word)
40 | {
41 | if (word.IsEmpty)
42 | {
43 | return false;
44 | }
45 |
46 | var node = FindNode(word);
47 |
48 | return node != null && node._isLeaf;
49 | }
50 |
51 | public bool StartsWith(ReadOnlySpan word)
52 | {
53 | if (word.IsEmpty)
54 | {
55 | return false;
56 | }
57 |
58 | return FindNode(word) != null;
59 | }
60 |
61 | public IEnumerable GetWordsWithPrefix(string prefix)
62 | {
63 | var node = FindNode(prefix);
64 | if (node == null)
65 | {
66 | yield break;
67 | }
68 |
69 | foreach (var word in Collect(node, prefix.ToList()))
70 | {
71 | yield return word;
72 | }
73 |
74 | static IEnumerable Collect(Trie node, List prefix)
75 | {
76 | if (node.Children.Count == 0)
77 | {
78 | yield return new string(prefix.ToArray());
79 | }
80 |
81 | foreach (var child in node.Children)
82 | {
83 | prefix.Add(child.Key);
84 | foreach (var t in Collect(child.Value, prefix))
85 | {
86 | yield return t;
87 | }
88 | prefix.RemoveAt(prefix.Count - 1);
89 | }
90 | }
91 | }
92 |
93 | private static Trie CreateOrGetNode(char currentCharacter, IDictionary children)
94 | {
95 | Trie trie;
96 | if (children.ContainsKey(currentCharacter))
97 | {
98 | trie = children[currentCharacter];
99 | }
100 | else
101 | {
102 | trie = new Trie();
103 | children.Add(currentCharacter, trie);
104 | }
105 |
106 | return trie;
107 | }
108 |
109 | private Trie FindNode(ReadOnlySpan word)
110 | {
111 | var children = Children;
112 | Trie currentTrie = null;
113 |
114 | foreach (var character in word)
115 | {
116 | var currentCharacter = _ignoreCase ? char.ToUpperInvariant(character) : character;
117 | if (children.ContainsKey(currentCharacter))
118 | {
119 | currentTrie = children[currentCharacter];
120 | children = currentTrie.Children;
121 | }
122 | else
123 | {
124 | return null;
125 | }
126 | }
127 |
128 | return currentTrie;
129 | }
130 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations/EditDistance/HammingDistance.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace LinkDotNet.StringOperations.EditDistance;
4 |
5 | public static partial class EditDistances
6 | {
7 | public static int GetHammingDistance(this string one, string two, bool ignoreCase = false)
8 | {
9 | if (string.IsNullOrEmpty(one))
10 | {
11 | throw new ArgumentNullException(nameof(one));
12 | }
13 |
14 | if (string.IsNullOrEmpty(two))
15 | {
16 | throw new ArgumentNullException(nameof(two));
17 | }
18 |
19 | var cost = 0;
20 | for (var i = 0; i < one.Length; i++)
21 | {
22 | if (i >= two.Length)
23 | {
24 | cost++;
25 | continue;
26 | }
27 |
28 | var characterEqual = ignoreCase
29 | ? char.ToUpperInvariant(one[i]) == char.ToUpperInvariant(two[i])
30 | : one[i] == two[i];
31 |
32 | if (!characterEqual)
33 | {
34 | cost++;
35 | }
36 | }
37 |
38 | return cost;
39 | }
40 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations/EditDistance/Levenshtein.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace LinkDotNet.StringOperations.EditDistance;
4 |
5 | public static partial class EditDistances
6 | {
7 | public static int GetLevenshteinDistance(this string one, string two, bool ignoreCase = false, int substitutionCost = 1, int abortCost = int.MaxValue)
8 | {
9 | AssertValuesNotNull(one, two);
10 | if (one == string.Empty)
11 | {
12 | return two.Length;
13 | }
14 |
15 | if (two == string.Empty)
16 | {
17 | return one.Length;
18 | }
19 |
20 | var matrix = CreateLevenshteinMatrix(one, two);
21 |
22 | for (var i = 1; i <= one.Length; i++)
23 | {
24 | for (var j = 1; j <= two.Length; j++)
25 | {
26 | var characterEqual = CheckCharacterEqual(one, two, ignoreCase, i, j);
27 |
28 | var substituteCost = characterEqual ? 0 : substitutionCost;
29 | var deleteCost = matrix[i - 1, j] + 1;
30 | var insertCost = matrix[i, j - 1] + 1;
31 | var completeSubstitutionCost = matrix[i - 1, j - 1] + substituteCost;
32 | matrix[i, j] = Math.Min(Math.Min(deleteCost, insertCost), completeSubstitutionCost);
33 |
34 | if (matrix[i, j] >= abortCost)
35 | {
36 | return abortCost;
37 | }
38 |
39 | }
40 | }
41 |
42 | return matrix[one.Length, two.Length];
43 | }
44 |
45 | private static void AssertValuesNotNull(string one, string two)
46 | {
47 | if (one == null)
48 | {
49 | throw new ArgumentNullException(nameof(one));
50 | }
51 |
52 | if (two == null)
53 | {
54 | throw new ArgumentNullException(nameof(two));
55 | }
56 | }
57 |
58 | private static int[,] CreateLevenshteinMatrix(string one, string two)
59 | {
60 | var matrix = new int[one.Length + 1, two.Length + 1];
61 |
62 | for (var i = 0; i <= one.Length; i++)
63 | {
64 | matrix[i, 0] = i;
65 | }
66 |
67 | for (var j = 0; j <= two.Length; j++)
68 | {
69 | matrix[0, j] = j;
70 | }
71 |
72 | return matrix;
73 | }
74 |
75 | private static bool CheckCharacterEqual(string one, string two, bool ignoreCase, int i, int j)
76 | {
77 | var characterEqual = ignoreCase
78 | ? char.ToUpperInvariant(one[i - 1]) == char.ToUpperInvariant(two[j - 1])
79 | : one[i - 1] == two[j - 1];
80 | return characterEqual;
81 | }
82 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations/EditDistance/LongestCommonSubsequence.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 |
5 | namespace LinkDotNet.StringOperations.EditDistance;
6 |
7 | public static partial class EditDistances
8 | {
9 | public static string GetClosestWord(this string input, bool ignoreCase, params string[] words) =>
10 | input.GetClosestWords(1, ignoreCase, words).FirstOrDefault();
11 |
12 | public static IEnumerable GetClosestWords(this string input, int count, bool ignoreCase,
13 | params string[] words)
14 | {
15 | if (input == null)
16 | {
17 | return Array.Empty();
18 | }
19 |
20 | if (words == null || !words.Any())
21 | {
22 | return Array.Empty();
23 | }
24 |
25 | var wordToSimilarity = new Dictionary();
26 | foreach (var word in words.Distinct().Where(w => w != null))
27 | {
28 | wordToSimilarity[word] = word.GetLongestCommonSubsequence(input, ignoreCase).Length;
29 | }
30 |
31 | var sortedWords = wordToSimilarity.ToList();
32 | sortedWords.Sort((a, b) => b.Value.CompareTo(a.Value));
33 |
34 | return sortedWords.Select(s => s.Key).Take(count);
35 | }
36 | public static string GetLongestCommonSubsequence(this string one, string two, bool ignoreCase = false)
37 | {
38 | if (one == null || two == null)
39 | {
40 | return null;
41 | }
42 |
43 | var lcsMatrix = CreateLongestCommonSubsequenceMatrix(one, two, ignoreCase);
44 | return GetLongestCommonSubsequenceBackTrack(lcsMatrix, one, two, one.Length, two.Length, ignoreCase);
45 | }
46 |
47 | private static int[,] CreateLongestCommonSubsequenceMatrix(string one, string two, bool ignoreCase)
48 | {
49 | var lcsMatrix = new int[one.Length + 1, two.Length + 1];
50 |
51 | for (var i = 1; i <= one.Length; i++)
52 | {
53 | for (var j = 1; j <= two.Length; j++)
54 | {
55 | var characterEqual = ignoreCase
56 | ? char.ToUpperInvariant(one[i - 1]) == char.ToUpperInvariant(two[j - 1])
57 | : one[i - 1] == two[j - 1];
58 | if (characterEqual)
59 | {
60 | lcsMatrix[i, j] = lcsMatrix[i - 1, j - 1] + 1;
61 | }
62 | else
63 | {
64 | lcsMatrix[i, j] = Math.Max(lcsMatrix[i - 1, j], lcsMatrix[i, j - 1]);
65 | }
66 | }
67 | }
68 |
69 | return lcsMatrix;
70 | }
71 |
72 | private static string GetLongestCommonSubsequenceBackTrack(int[,] lcsMatrix, string one, string two,
73 | int oneLength, int twoLength, bool ignoreCase)
74 | {
75 | if (oneLength == 0 || twoLength == 0)
76 | {
77 | return string.Empty;
78 | }
79 |
80 | var characterEqual = ignoreCase
81 | ? char.ToUpperInvariant(one[oneLength - 1]) == char.ToUpperInvariant(two[twoLength - 1])
82 | : one[oneLength - 1] == two[twoLength - 1];
83 | if (characterEqual)
84 | {
85 | return GetLongestCommonSubsequenceBackTrack(lcsMatrix, one, two, oneLength - 1, twoLength - 1,
86 | ignoreCase) + one[oneLength - 1];
87 | }
88 |
89 | if (lcsMatrix[oneLength, twoLength - 1] > lcsMatrix[oneLength - 1, twoLength])
90 | {
91 | return GetLongestCommonSubsequenceBackTrack(lcsMatrix, one, two, oneLength, twoLength - 1, ignoreCase);
92 | }
93 |
94 | return GetLongestCommonSubsequenceBackTrack(lcsMatrix, one, two, oneLength - 1, twoLength, ignoreCase);
95 | }
96 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations/EditDistance/LongestCommonSubstring.cs:
--------------------------------------------------------------------------------
1 | namespace LinkDotNet.StringOperations.EditDistance;
2 |
3 | public static partial class EditDistances
4 | {
5 | public static string GetLongestCommonSubstring(this string one, string two, bool ignoreCase = false)
6 | {
7 | if (one == null || two == null)
8 | {
9 | return null;
10 | }
11 |
12 | var lcsMatrix = CreateLongestCommonSubstringMatrix(one, two, ignoreCase);
13 |
14 | var length = -1;
15 | var index = -1;
16 | for (var i = 0; i <= one.Length; i++)
17 | {
18 | for (var j = 0; j <= two.Length; j++)
19 | {
20 | if (length < lcsMatrix[i, j])
21 | {
22 | length = lcsMatrix[i, j];
23 | index = i - length;
24 | }
25 | }
26 | }
27 |
28 | return length > 0 ? one.Substring(index, length) : string.Empty;
29 | }
30 |
31 | private static int[,] CreateLongestCommonSubstringMatrix(string one, string two, bool ignoreCase)
32 | {
33 | var lcsMatrix = new int[one.Length + 1, two.Length + 1];
34 |
35 | for (var i = 1; i <= one.Length; i++)
36 | {
37 | for (var j = 1; j <= two.Length; j++)
38 | {
39 | var characterEqual = ignoreCase
40 | ? char.ToUpperInvariant(one[i - 1]) == char.ToUpperInvariant(two[j - 1])
41 | : one[i - 1] == two[j - 1];
42 | if (characterEqual)
43 | {
44 | lcsMatrix[i, j] = lcsMatrix[i - 1, j - 1] + 1;
45 | }
46 | else
47 | {
48 | lcsMatrix[i, j] = 0;
49 | }
50 | }
51 | }
52 |
53 | return lcsMatrix;
54 | }
55 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations/LinkDotNet.StringOperations.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net6.0
5 |
6 |
7 |
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations/Search/BoyerMoore.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 |
5 | namespace LinkDotNet.StringOperations.Search;
6 |
7 | public static class BoyerMoore
8 | {
9 | private const int AlphabetSize = 256;
10 |
11 | public static bool HasPattern(string text, string word, bool ignoreCase = false) =>
12 | FindAll(text, word, ignoreCase, true).Any();
13 |
14 | public static IEnumerable FindAll(string text, string word, bool ignoreCase = false, bool abortOnFirstOccurrence = false)
15 | {
16 | if (string.IsNullOrEmpty(text) || string.IsNullOrEmpty(word))
17 | {
18 | yield break;
19 | }
20 |
21 | if (text.Length < word.Length)
22 | {
23 | yield break;
24 | }
25 |
26 | var wordLength = word.Length;
27 | var textLength = text.Length;
28 |
29 | var badCharacterTable = GetBadCharacterTable(word, ignoreCase);
30 |
31 | var shift = 0;
32 | while (shift <= textLength - wordLength)
33 | {
34 | var index = word.Length - 1;
35 |
36 | index = ReduceIndexWhileMatchAtShift(text, word, ignoreCase, index, shift);
37 |
38 | if (index < 0)
39 | {
40 | yield return shift;
41 | if (abortOnFirstOccurrence)
42 | {
43 | yield break;
44 | }
45 |
46 | shift = ShiftPatternToNextCharacterWithLastOccurrenceOfPattern(text, shift, wordLength, textLength, badCharacterTable, ignoreCase);
47 | }
48 | else
49 | {
50 | shift = ShiftPatternAfterBadCharacter(text, shift, index, badCharacterTable, ignoreCase);
51 | }
52 | }
53 | }
54 |
55 | private static int[] GetBadCharacterTable(string text, bool ignoreCase)
56 | {
57 | var table = new int[AlphabetSize];
58 | Array.Fill(table, -1);
59 |
60 | for (var i = 0; i < text.Length; i++)
61 | {
62 | var character = ignoreCase ? char.ToUpperInvariant(text[i]) : text[i];
63 | table[character] = i;
64 | }
65 |
66 | return table;
67 | }
68 |
69 | private static int ReduceIndexWhileMatchAtShift(string text, string word, bool ignoreCase, int index, int shift)
70 | {
71 | while (index >= 0 && CharacterEqual(text, word, ignoreCase, shift + index, index))
72 | {
73 | index--;
74 | }
75 |
76 | return index;
77 | }
78 |
79 | private static int ShiftPatternToNextCharacterWithLastOccurrenceOfPattern(string text, int shift,
80 | int wordLength, int textLength, Span badCharacterTable, bool ignoreCase)
81 | {
82 | return shift + (shift + wordLength < textLength
83 | ? wordLength - badCharacterTable[GetCharacter()]
84 | : 1);
85 |
86 | char GetCharacter()
87 | {
88 | return ignoreCase ? char.ToUpperInvariant(text[shift + wordLength]) : text[shift + wordLength];
89 | }
90 | }
91 |
92 | private static int ShiftPatternAfterBadCharacter(string text, int shift, int index, int[] badCharacterTable, bool ignoreCase)
93 | {
94 | var character = ignoreCase ? char.ToUpperInvariant(text[shift + index]) : text[shift + index];
95 | return shift + Math.Max(1, index - badCharacterTable[character]);
96 | }
97 |
98 | private static bool CharacterEqual(ReadOnlySpan text, ReadOnlySpan pattern, bool ignoreCase, int positionInText,
99 | int positionInPattern)
100 | {
101 | var characterEqual = ignoreCase
102 | ? char.ToUpperInvariant(text[positionInText]) == char.ToUpperInvariant(pattern[positionInPattern])
103 | : text[positionInText] == pattern[positionInPattern];
104 | return characterEqual;
105 | }
106 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations/Search/KnuthMorrisPratt.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 | using System.Linq;
3 |
4 | namespace LinkDotNet.StringOperations.Search;
5 |
6 | public static class KnuthMorrisPratt
7 | {
8 | public static bool HasPattern(string text, string word, bool ignoreCase = false) =>
9 | FindAll(text, word, ignoreCase, true).Any();
10 |
11 | public static IEnumerable FindAll(string text, string pattern,
12 | bool ignoreCase = false, bool abortOnFirstOccurence = false)
13 | {
14 | if (string.IsNullOrEmpty(text) || string.IsNullOrEmpty(pattern))
15 | {
16 | yield break;
17 | }
18 |
19 | if (text.Length < pattern.Length)
20 | {
21 | yield break;
22 | }
23 |
24 | var positionInText = 0;
25 | var positionInPattern = 0;
26 |
27 | var knuthMorrisPrattTable = CreateTable(text, ignoreCase);
28 |
29 | while (positionInText < text.Length)
30 | {
31 | var characterEqual = CharacterEqual(text, pattern, ignoreCase, positionInText, positionInPattern);
32 |
33 | if (characterEqual)
34 | {
35 | positionInText++;
36 | positionInPattern++;
37 |
38 | if (positionInPattern == pattern.Length)
39 | {
40 | var index = positionInText - positionInPattern;
41 | yield return index;
42 |
43 | positionInPattern = knuthMorrisPrattTable[positionInPattern];
44 |
45 | if (abortOnFirstOccurence)
46 | {
47 | yield break;
48 | }
49 | }
50 | }
51 | else
52 | {
53 | positionInPattern = knuthMorrisPrattTable[positionInPattern];
54 | if (positionInPattern < 0)
55 | {
56 | positionInText++;
57 | positionInPattern++;
58 | }
59 | }
60 | }
61 | }
62 |
63 | private static int[] CreateTable(string text, bool ignoreCase)
64 | {
65 | var table = new int[text.Length];
66 | table[0] = -1;
67 | var position = 1;
68 | var candidate = 0;
69 |
70 | while (position < text.Length)
71 | {
72 | var characterEqual = CharacterEqual(text, text, ignoreCase, position, candidate);
73 | if (characterEqual)
74 | {
75 | table[position] = table[candidate];
76 | }
77 | else
78 | {
79 | table[position] = candidate;
80 | while (candidate >= 0 && !CharacterEqual(text, text, ignoreCase, position, candidate))
81 | {
82 | candidate = table[candidate];
83 | }
84 | }
85 |
86 | position++;
87 | candidate++;
88 | }
89 |
90 | table[position - 1] = candidate;
91 | return table;
92 | }
93 |
94 | private static bool CharacterEqual(string text, string pattern, bool ignoreCase, int positionInText,
95 | int positionInPattern)
96 | {
97 | var characterEqual = ignoreCase
98 | ? char.ToUpperInvariant(text[positionInText]) == char.ToUpperInvariant(pattern[positionInPattern])
99 | : text[positionInText] == pattern[positionInPattern];
100 | return characterEqual;
101 | }
102 | }
--------------------------------------------------------------------------------
/LinkDotNet.StringOperations/Search/ZAlgorithm.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 |
5 | namespace LinkDotNet.StringOperations.Search;
6 |
7 | public static class ZAlgorithm
8 | {
9 | public static bool HasPattern(string text, string word, bool ignoreCase = false) =>
10 | FindAll(text, word, ignoreCase, true).Any();
11 |
12 | public static IEnumerable FindAll(string text, string pattern,
13 | bool ignoreCase = false, bool abortOnFirstOccurence = false)
14 | {
15 | if (string.IsNullOrEmpty(text) || string.IsNullOrEmpty(pattern))
16 | {
17 | yield break;
18 | }
19 |
20 | if (text.Length < pattern.Length)
21 | {
22 | yield break;
23 | }
24 |
25 | var concat = ignoreCase
26 | ? (pattern + "$" + text).ToUpper()
27 | : pattern + "$" + text;
28 |
29 | var zArray = CreateZArray(concat.AsSpan());
30 |
31 | for (var i = 0; i < concat.Length; i++)
32 | {
33 | if (zArray[i] == pattern.Length)
34 | {
35 | yield return i - pattern.Length - 1;
36 |
37 | if (abortOnFirstOccurence)
38 | {
39 | yield break;
40 | }
41 | }
42 | }
43 |
44 | }
45 |
46 | private static int[] CreateZArray(ReadOnlySpan concat)
47 | {
48 | var zArray = new int[concat.Length];
49 | var left = 0;
50 | var right = 0;
51 |
52 | for (var current = 1; current < concat.Length; current++)
53 | {
54 | if (current > right)
55 | {
56 | left = right = current;
57 |
58 | while (right < concat.Length && concat[right - left] == concat[right])
59 | {
60 | right++;
61 | }
62 |
63 | zArray[current] = right - left;
64 | right--;
65 | }
66 | else
67 | {
68 | var k = current - left;
69 |
70 | if (zArray[k] < right - current + 1)
71 | {
72 | zArray[current] = zArray[k];
73 | }
74 | else
75 | {
76 | left = current;
77 | while (right < current && concat[right - left] == concat[right])
78 | {
79 | right++;
80 | }
81 |
82 | zArray[current] = right - left;
83 | right--;
84 | }
85 | }
86 | }
87 |
88 | return zArray;
89 | }
90 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # String Operations for C#
2 | [](https://github.com/linkdotnet/StringOperations/actions/workflows/dotnet.yml)
3 |
4 | This library implements some basic string algorithm. The implementations are not optimized for performance but more for an educational purpose.
5 |
6 | ## Currently implemented algorithms
7 | ### Edit-Distances
8 | * Longest Common Subsequence
9 | * Longest Common Substring
10 | * Levenshtein Distance
11 | * Hamming Distance
12 |
13 | ### Search
14 | * Knuth-Morris-Pratt
15 | * Boyer-Moore
16 | * Z-Algorithm
17 |
18 | ### Data Structure
19 | * Trie
20 | * Rope
21 |
22 | ### Compression
23 | * Lempel-Ziv-Welch
--------------------------------------------------------------------------------