├── .editorconfig ├── .github └── workflows │ └── crystal.yml ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── shard.yml ├── spec ├── Chain_spec.cr ├── README_spec.cr ├── TransitionMatrix_spec.cr ├── TransitionTable_spec.cr ├── markov_spec.cr └── spec_helper.cr └── src ├── markov.cr └── markov ├── chain.cr ├── exceptions.cr ├── transition_matrix.cr ├── transition_table.cr └── version.cr /.editorconfig: -------------------------------------------------------------------------------- 1 | [*.cr] 2 | charset = utf-8 3 | end_of_line = lf 4 | insert_final_newline = true 5 | indent_style = space 6 | indent_size = 2 7 | trim_trailing_whitespace = true 8 | -------------------------------------------------------------------------------- /.github/workflows/crystal.yml: -------------------------------------------------------------------------------- 1 | name: Crystal CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | container: 15 | image: crystallang/crystal 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Install dependencies 20 | run: shards install 21 | - name: Run tests 22 | run: crystal spec 23 | - name: Build Docs 24 | run: crystal docs 25 | 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /doc/ 2 | /docs/ 3 | /lib/ 4 | /bin/ 5 | /.shards/ 6 | .DS_Store 7 | .vscode 8 | # Libraries don't need dependency lock 9 | # Dependencies will be locked in application that uses them 10 | /shard.lock 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: crystal 2 | crystal: 3 | - latest 4 | - nightly 5 | script: 6 | - crystal spec 7 | - crystal docs 8 | deploy: 9 | provider: pages 10 | skip_cleanup: true 11 | github_token: $GITHUB_TOKEN 12 | project_name: markov 13 | on: 14 | branch: master 15 | local_dir: docs 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 McCall Alexander 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ⛓ Markov 2 | 3 | A Crystal library for building Markov Chains and running Markov Processes. 4 | 5 | [![Build Status](https://travis-ci.org/mccallofthewild/markov.svg?branch=master)](https://travis-ci.org/mccallofthewild/markov) [![Docs](https://img.shields.io/badge/docs-available-brightgreen.svg)](https://mccallofthewild.github.io/markov/) [![GitHub release](https://img.shields.io/github/release/mccallofthewild/markov.svg)](https://github.com/mccallofthewild/markov/releases) 6 | 7 | ### _What is a Markov Chain?_ 8 | 9 | A Markov Chain is essentially a mechanism for guessing probable future events based on a sample of past events. 10 | For a great explanation, watch [this Khan Academy video](https://www.khanacademy.org/computing/computer-science/informationtheory/moderninfotheory/v/markov_chains). 11 | 12 | ### Visit the [API Documentation](https://mccallofthewild.github.io/markov/) for a more in-depth look at the library's functionality. 13 | 14 | ## Installation 15 | 16 | Add this to your application's `shard.yml`: 17 | 18 | ```yaml 19 | dependencies: 20 | markov: 21 | github: mccallofthewild/markov 22 | ``` 23 | In your terminal, install Crystal dependencies with: 24 | ```bash 25 | $ shards install 26 | ``` 27 | or 28 | ```bash 29 | $ crystal deps 30 | ``` 31 | 32 | ## Usage 33 | Begin by requiring the `Markov` module: 34 | ```crystal 35 | require "markov" 36 | ``` 37 | ### Basic -- Hello Markov 38 | A classic Markov text generator. This example will work well for small (array-sized) data sets. 39 | 40 | NOTE: `Markov::Chain` is a generic type which contains, receives and generates elements of `LinkType`. 41 | 42 | We'll start with the sample text: 43 | ```crystal 44 | example_string = "how much wood would a woodchuck chuck if a woodchuck could chuck wood" 45 | ``` 46 | There are several `Markov::Chain` constructors to choose from. The simplest one takes in a `LinkType` array of elements as `sample` and a `seed` of `LinkType`. `seed` is the element in `sample` you want to start the chain with. If not provided, a random element will be chosen. 47 | ```crystal 48 | example_arr = example_string.split(" ") #=> ["how","much","wood","would","a","woodchuck","chuck","if","a","woodchuck","could","chuck","wood"] 49 | seed = example_arr[0] #=> "how" 50 | 51 | example_chain = Markov::Chain(String).new sample: example_arr, seed: seed 52 | ``` 53 | Finally, we'll generate a probable sequence of elements with the `Markov::Chain#generate` method: 54 | ```crystal 55 | puts example_chain.generate(10) 56 | ``` 57 | Output: 58 | ```bash 59 | ["much", "wood", "would", "a", "woodchuck", "could", "chuck", "if", "a", "woodchuck"] 60 | ``` 61 | That's it! 62 | 63 | If we wanted to get the elements one at a time, we could use the `Markov::Chain#next` method instead: 64 | ```crystal 65 | puts example_chain.next #=> "much" 66 | puts example_chain.next #=> "wood" 67 | puts example_chain.next #=> "would" 68 | ``` 69 | 70 | ### Advanced 71 | This implementation was built for larger data sets, with asynchronous input in mind. 72 | 73 | In this example, we will create a `Markov::Chain` which can generate realistic movie titles. 74 | 75 | To begin, we instantiate a `Markov::TransitionTable`. A `TransitionTable` is a mechanism for training and implementing Markov processes. 76 | 77 | ```crystal 78 | example_table = Markov::TransitionTable(String).new 79 | ``` 80 | 81 | #### `Markov::TransitionTable#add` 82 | Now we'll add a movie title using the `Markov::TransitionTable#add` method: 83 | 84 | ```crystal 85 | movie_one = %w(the great gatsby) # shortcut syntax for ["the","great","gatsby"] 86 | 87 | movie_one.each do |word| 88 | example_table.add(word) 89 | end 90 | ``` 91 | `Markov::TransitionTable#add` adds elements one at a time. At a deeper level, it's adding each new word to the previous word's [Transition Matrix](https://en.wikipedia.org/wiki/Stochastic_matrix) (`Markov::TransitionMatrix`). 92 | 93 | #### `Markov::TransitionTable#fill` 94 | For syntactic sugar, if we have an array of elements, we can avoid looping through and `#add`-ing them by using the `Markov::TransitionTable#fill` method instead: 95 | 96 | ```crystal 97 | movie_one = %w(the great gatsby) # shortcut syntax for ["the","great","gatsby"] 98 | 99 | example_table.fill table_with: movie_one 100 | ``` 101 | 102 | #### `Markov::TransitionTable#reset` 103 | A problem arises at this point: 104 | ```crystal 105 | movie_two = %w(great expectations) 106 | example_table.fill table_with: movie_two 107 | ``` 108 | The above code sequentially adds each word to the `TransitionTable`. But _The Great Gatsby_ and _Great Expectations_ are two separate movie titles; the "Great" at the beginning of _Great Expectations_ is not a probable transition from the "Gatsby" at the end of _The Great Gatsby_. 109 | 110 | To solve this, use `Markov::TransitionTable#reset`. `#reset` clears the `TransitionTable`'s last added key, allowing us to separate titles like so: 111 | 112 | ```crystal 113 | movie_one = %w(the great gatsby) 114 | example_table.fill table_with: movie_one 115 | 116 | example_table.reset 117 | movie_two = %w(great expectations) 118 | example_table.fill table_with: movie_two 119 | 120 | example_table.reset 121 | movie_three = %w(the great escape) 122 | example_table.fill table_with: movie_three 123 | ``` 124 | 125 | #### Implementing the `TransitionTable` with a `Markov::Chain` 126 | Finally, we can put the `TransitionTable` to use by passing it to a `Markov::Chain` constructor as `transition_table`: 127 | 128 | ```crystal 129 | example_chain = Markov::Chain(String).new transition_table: example_table, seed: "great" 130 | ``` 131 | 132 | #### Handling Dead Ends 133 | With small and/or unique data sets, Markov chains are fallible to reaching dead ends. That is, they can often reach a point where there is nothing to transition to. 134 | 135 | When this happens in the `Markov` module, `Markov::Exceptions::EmptyTransitionMatrixException` is raised. 136 | 137 | For example: 138 | 139 | ```crystal 140 | dead_end_array = %w(some say the world will end in fire) 141 | dead_end_chain = Markov::Chain(String).new sample: dead_end_array, seed: "fire" 142 | # nothing comes after "fire", so the chain is at a dead end. 143 | dead_end_chain.next # raises `EmptyTransitionMatrixException` 144 | ``` 145 | 146 | To prevent this, use the `Markov::Chain#on_dead_end` exception handler. 147 | 148 | This method takes in a callback block with arguments of: the `Markov::Chain`'s `@transition_table`, the `Markov::Chain` instance, and the `EmptyTransitionMatrixException` raised. 149 | 150 | The block's return value of `LinkType` fills in as the next item in the chain. 151 | 152 | ```crystal 153 | dead_end_array = %w(some say the world will end in fire) 154 | dead_end_chain = Markov::Chain(String).new sample: dead_end_array, seed: "fire" 155 | 156 | dead_end_chain.on_dead_end do |transition_table, chain, exception| 157 | "some" 158 | end 159 | 160 | dead_end_chain.next #=> "some" 161 | dead_end_chain.next #=> "say" 162 | dead_end_chain.next #=> "the" 163 | ``` 164 | 165 | ## Contributing 166 | 167 | 1. Fork it ( https://github.com/mccallofthewild/markov/fork ) 168 | 2. Create your feature branch (git checkout -b my-new-feature) 169 | 3. Commit your changes (git commit -am 'Add some feature') 170 | 4. Push to the branch (git push origin my-new-feature) 171 | 5. Create a new Pull Request 172 | 173 | ## Contributors 174 | 175 | - [McCall Alexander](https://github.com/mccallofthewild) mccallofthewild - creator, maintainer 176 | -------------------------------------------------------------------------------- /shard.yml: -------------------------------------------------------------------------------- 1 | 2 | name: markov 3 | 4 | description: | 5 | ⛓ Build Markov Chains and run Markov Processes with Crystal! 6 | 7 | version: 0.1.1 8 | 9 | authors: 10 | - McCall Alexander 11 | 12 | crystal: 0.35.1 13 | 14 | license: MIT 15 | -------------------------------------------------------------------------------- /spec/Chain_spec.cr: -------------------------------------------------------------------------------- 1 | require "./spec_helper" 2 | 3 | describe Markov::Chain do 4 | describe "new" do 5 | it "initializes with `sample` and `@seed`" do 6 | c = Markov::Chain(String).new sample: ["Koala", "Kangaroo"] of String, seed: "Koala" 7 | typeof(c).should eq(Markov::Chain(String)) 8 | end 9 | 10 | it "initializes with `sample` and no `@seed`" do 11 | c = Markov::Chain(String).new sample: ["Hippo", "Giraffe"] 12 | typeof(c).should eq(Markov::Chain(String)) 13 | end 14 | 15 | it "raises `InvalidSeedException` with `sample` and invalid `seed`" do 16 | sample = ["Hippo", "Giraffe"] 17 | ex_raised = false 18 | begin 19 | c = Markov::Chain(String).new sample: sample, seed: "Monkey" 20 | rescue Markov::Exceptions::InvalidSeedException 21 | ex_raised = true 22 | end 23 | ex_raised.should eq true 24 | end 25 | 26 | it "initializes with `@transition_table` and `seed`" do 27 | tt = Markov::TransitionTable(String).new 28 | tt.fill table_with: ["Hippo", "Giraffe"] 29 | c = Markov::Chain(String).new transition_table: tt, seed: "Hippo" 30 | end 31 | 32 | it "initializes with `@transition_table` and no `seed`" do 33 | tt = Markov::TransitionTable(String).new 34 | tt.fill table_with: ["Hippo", "Giraffe"] 35 | c = Markov::Chain(String).new transition_table: tt 36 | end 37 | 38 | it "raises `InvalidSeedException` with `@transition_table` and invalid `seed`" do 39 | tt = Markov::TransitionTable(String).new 40 | tt.fill table_with: ["Hippo", "Giraffe"] 41 | ex_raised = false 42 | begin 43 | c = Markov::Chain(String).new transition_table: tt, seed: "Monkey" 44 | rescue Markov::Exceptions::InvalidSeedException 45 | ex_raised = true 46 | end 47 | ex_raised.should eq true 48 | end 49 | end 50 | 51 | describe "getters" do 52 | describe "#generated" do 53 | it "raises `EmptyTransitionMatrixException` when seed has nothing to transition to" do 54 | begin 55 | c = Markov::Chain(String).new sample: ["Hippo", "Giraffe"], seed: "Giraffe" 56 | rescue Markov::Exceptions::EmptyTransitionMatrixException 57 | (true).should eq(true) 58 | end 59 | end 60 | end 61 | 62 | it "#transition_table" do 63 | c = Markov::Chain(String).new sample: ["Hippo", "Giraffe"] 64 | typeof(c.transition_table).should eq(Markov::TransitionTable(String)) 65 | end 66 | end 67 | 68 | it "#next" do 69 | c = Markov::Chain(String).new sample: ["Koala", "Kangaroo"] of String, seed: "Koala" 70 | c.next.should eq "Kangaroo" 71 | end 72 | 73 | it "#on_dead_end" do 74 | c = Markov::Chain(String).new sample: ["Koala", "Kangaroo"] of String, seed: "Kangaroo" 75 | c.on_dead_end do |transition_table, chain, exception| 76 | "Koala" 77 | end 78 | c.next.should eq("Koala") 79 | c.next.should eq("Kangaroo") 80 | c.next.should eq("Koala") 81 | end 82 | end 83 | -------------------------------------------------------------------------------- /spec/README_spec.cr: -------------------------------------------------------------------------------- 1 | require "./spec_helper" 2 | 3 | describe "README" do 4 | puts "\n\n :::BEGIN README_SPEC CONSOLE OUTPUTS::: \n __________________________________________________________ \n\n" 5 | describe "Basic" do 6 | it "#generate" do 7 | example_string = "how much wood would a woodchuck chuck if a woodchuck could chuck wood" 8 | example_arr = example_string.split(" ") # => ["how","much","wood","would","a","woodchuck","chuck","if","a","woodchuck","could","chuck","wood"] 9 | seed = example_arr[0] # => "how" 10 | example_chain = Markov::Chain(String).new sample: example_arr, seed: seed 11 | puts example_chain.generate(10) # => ["much", "wood", "would", "a", "woodchuck", "could", "chuck", "if", "a", "woodchuck"] 12 | 13 | end 14 | 15 | it "#next" do 16 | example_string = "how much wood would a woodchuck chuck if a woodchuck could chuck wood" 17 | example_arr = example_string.split(" ") # => ["how","much","wood","would","a","woodchuck","chuck","if","a","woodchuck","could","chuck","wood"] 18 | seed = example_arr[0] # => "how" 19 | example_chain = Markov::Chain(String).new sample: example_arr, seed: seed 20 | puts example_chain.next 21 | puts example_chain.next 22 | puts example_chain.next 23 | end 24 | end 25 | 26 | describe "Advanced" do 27 | it "#adds" do 28 | example_table = Markov::TransitionTable(String).new 29 | movie_one = %w(the great gatsby) # shortcut syntax for ["the","great","gatsby"] 30 | 31 | movie_one.each do |word| 32 | example_table.add(word) 33 | end 34 | end 35 | 36 | it "#fills" do 37 | example_table = Markov::TransitionTable(String).new 38 | movie_one = %w(the great gatsby) 39 | example_table.fill table_with: movie_one 40 | end 41 | 42 | it "#resets" do 43 | example_table = Markov::TransitionTable(String).new 44 | 45 | movie_one = %w(the great gatsby) 46 | example_table.fill table_with: movie_one 47 | 48 | example_table.reset 49 | movie_two = %w(great expectations) 50 | example_table.fill table_with: movie_two 51 | 52 | example_table.reset 53 | movie_three = %w(the great escape) 54 | example_table.fill table_with: movie_three 55 | 56 | example_table["gatsby"]["great"]?.should eq nil 57 | end 58 | 59 | it "Handling Dead Ends" do 60 | dead_end_array = %w(some say the world will end in fire) 61 | dead_end_chain = Markov::Chain(String).new sample: dead_end_array, seed: "fire" 62 | 63 | dead_end_chain.on_dead_end do |transition_table, chain, exception| 64 | "some" 65 | end 66 | 67 | dead_end_chain.next.should eq "some" # => "some" 68 | dead_end_chain.next.should eq "say" # => "say" 69 | dead_end_chain.next.should eq "the" # => "the" 70 | end 71 | end 72 | 73 | puts "\n\n __________________________________________________________ \n\n :::END README_SPEC CONSOLE OUTPUTS::: \n\n" 74 | end 75 | -------------------------------------------------------------------------------- /spec/TransitionMatrix_spec.cr: -------------------------------------------------------------------------------- 1 | require "./spec_helper" 2 | 3 | describe Markov::TransitionMatrix do 4 | it "#initialize" do 5 | t = Markov::TransitionMatrix(Range(Int32, Int32)).new 6 | true.should eq true 7 | end 8 | 9 | it "#to_json, #from_json with String" do 10 | t = Markov::TransitionMatrix(String).new 11 | t.add "I" 12 | t.add "just" 13 | t.add "met" 14 | t.add "you" 15 | j_t = t.to_json 16 | t_j = Markov::TransitionMatrix(String).from_json j_t 17 | t["I"].should eq t_j["I"] 18 | end 19 | 20 | it "#to_json, #from_json with Int32" do 21 | t = Markov::TransitionMatrix(Int32).new 22 | t.add 1 23 | t.add 2 24 | t.add 3 25 | t.add 4 26 | j_t = t.to_json 27 | puts j_t 28 | t_j = Markov::TransitionMatrix(Int32).from_json j_t 29 | t[2].should eq t_j[2] 30 | end 31 | 32 | it "#add" do 33 | t = Markov::TransitionMatrix(String).new 34 | t.add "hey" 35 | t["hey"].should eq 1 36 | end 37 | 38 | it "#probabilities" do 39 | t = Markov::TransitionMatrix(String).new 40 | t.add "hello" 41 | t.add "welcome" 42 | t.add "hello" 43 | two_thirds = 2.to_f32 / 3.to_f32 44 | t.probabilities["hello"].should eq two_thirds 45 | t.probabilities["not included word"].should eq 0.to_f32 46 | end 47 | 48 | it "#sum" do 49 | t = Markov::TransitionMatrix(String).new 50 | t.add "hello" 51 | t.add "welcome" 52 | t.add "hello" 53 | 54 | t.sum.should eq(3) 55 | end 56 | 57 | describe "#probable_transition" do 58 | it "returns TransitionMatrix when not empty" do 59 | t = Markov::TransitionMatrix(String).new 60 | t.add "hello" 61 | t.add "hello" 62 | t.add "welcome" 63 | 64 | hello_occurrences = 0 65 | welcome_occurrences = 0 66 | 67 | iterations = 0 68 | while iterations < 100 69 | transition = t.probable_transition 70 | case transition 71 | when "hello" 72 | hello_occurrences = hello_occurrences + 1 73 | when "welcome" 74 | welcome_occurrences = welcome_occurrences + 1 75 | end 76 | iterations = iterations + 1 77 | end 78 | 79 | # yes, it's POSSIBLE that every transition is one or the other, but 80 | # probability would suggest that `hello_occurrences` are twice as 81 | # prevalent as `welcome_occurrences`, though random selection makes 82 | # this unpredictable, so we just test for `hello_occurrences` to be greater 83 | 84 | (hello_occurrences > welcome_occurrences).should eq(true) 85 | end 86 | 87 | it "throws `Markov::Exceptions::EmptyTransitionMatrixException` when empty" do 88 | t = Markov::TransitionMatrix(String).new 89 | begin 90 | transition = t.probable_transition 91 | rescue Markov::Exceptions::EmptyTransitionMatrixException 92 | true.should eq(true) 93 | end 94 | end 95 | end 96 | end 97 | -------------------------------------------------------------------------------- /spec/TransitionTable_spec.cr: -------------------------------------------------------------------------------- 1 | require "./spec_helper" 2 | 3 | describe Markov::TransitionTable do 4 | it "initializes" do 5 | typeof(Markov::TransitionTable(String).new).should eq(Markov::TransitionTable(String)) 6 | end 7 | 8 | it "#add" do 9 | tt = Markov::TransitionTable(String).new 10 | tt.add("string") 11 | tt["string"] = tt["string"] 12 | typeof(tt["string"]).should eq(Markov::TransitionMatrix(String)) 13 | end 14 | 15 | it "#fill" do 16 | string_array = %w(some say the world will end in fire) 17 | tt = Markov::TransitionTable(String).new 18 | tt.fill table_with: string_array 19 | 20 | has_all_elements_as_keys = true 21 | string_array.each do |s| 22 | if !tt.has_key? s 23 | has_all_elements_as_keys = false 24 | break 25 | end 26 | end 27 | has_all_elements_as_keys.should eq(true) 28 | end 29 | 30 | it "#probable" do 31 | string_array = %w(some say the world will end in fire) 32 | tt = Markov::TransitionTable(String).new 33 | tt.fill table_with: string_array 34 | 35 | typeof(tt.probable after: "some").should eq(String) 36 | end 37 | 38 | it "#probable?" do 39 | string_array = %w(some say the world will end in fire) 40 | tt = Markov::TransitionTable(String).new 41 | tt.fill table_with: string_array 42 | 43 | (tt.probable? after: "fire").should eq(nil) 44 | end 45 | 46 | it "#random_key" do 47 | string_array = %w(some say the world will end in fire) 48 | tt = Markov::TransitionTable(String).new 49 | tt.fill table_with: string_array 50 | rnd = tt.random_key 51 | is_string = typeof(rnd) == String 52 | is_string.should eq(true) 53 | end 54 | 55 | it "#random_matrix" do 56 | string_array = %w(some say the world will end in fire) 57 | tt = Markov::TransitionTable(String).new 58 | tt.fill table_with: string_array 59 | rnd = tt.random_matrix 60 | is_transition_matrix = typeof(rnd) == Markov::TransitionMatrix(String) 61 | is_transition_matrix.should eq(true) 62 | end 63 | 64 | it "#to_json, #from_json with strings" do 65 | string_array = %w(some say the world will end in fire) 66 | normal_init_table = Markov::TransitionTable(String).new 67 | normal_init_table.fill string_array 68 | 69 | normal_init_table_json = normal_init_table.to_json 70 | from_json_init_table = Markov::TransitionTable(String).from_json normal_init_table_json 71 | from_json_init_table["some"].should eq normal_init_table["some"] 72 | end 73 | 74 | it "#to_json, #from_json with integers" do 75 | int_array = [0, 1, 2, 3, 4, 5] 76 | normal_init_table = Markov::TransitionTable(Int32).new 77 | normal_init_table.fill int_array 78 | 79 | normal_init_table_json = normal_init_table.to_json 80 | from_json_init_table = Markov::TransitionTable(Int32).from_json normal_init_table_json 81 | from_json_init_table[1].should eq normal_init_table[1] 82 | end 83 | 84 | it "#reset" do 85 | movie_one = %w(the great gatsby) 86 | movie_two = %w(great expectations) 87 | tt = Markov::TransitionTable(String).new 88 | tt.fill table_with: movie_one 89 | tt.reset 90 | tt.fill table_with: movie_two 91 | 92 | (tt.probable? after: "gatsby").should eq nil 93 | end 94 | end 95 | -------------------------------------------------------------------------------- /spec/markov_spec.cr: -------------------------------------------------------------------------------- 1 | require "./spec_helper" 2 | 3 | describe Markov do 4 | end 5 | -------------------------------------------------------------------------------- /spec/spec_helper.cr: -------------------------------------------------------------------------------- 1 | require "spec" 2 | require "../src/markov" 3 | -------------------------------------------------------------------------------- /src/markov.cr: -------------------------------------------------------------------------------- 1 | require "./markov/*" 2 | -------------------------------------------------------------------------------- /src/markov/chain.cr: -------------------------------------------------------------------------------- 1 | require "json" 2 | require "./transition_table" 3 | 4 | # Module `Markov` contains the means for creating Markov Chains and executing Markov Processes. 5 | module Markov 6 | # A `Chain` is a vehicle for generating probable sequences of type `LinkType` 7 | class Chain(LinkType) 8 | include JSON::Serializable 9 | 10 | # Returns an ordered `Array(LinkType)` of all `LinkType` elements generated 11 | getter generated : Array(LinkType) 12 | 13 | # Returns the trained instance of `TransitionTable` 14 | getter transition_table : TransitionTable(LinkType) 15 | 16 | # Returns `seed` element. 17 | getter seed : LinkType 18 | 19 | @generated : Array(LinkType) = Array(LinkType).new 20 | 21 | @custom_dead_end_handler = false 22 | 23 | @dead_end_handler : Proc(TransitionTable(LinkType), Chain(LinkType), Exception, LinkType) 24 | 25 | @seed : LinkType 26 | 27 | # For larger processes, you'll want to externally train a `TransitionTable` then 28 | # pass it in as an argument. 29 | # If `seed` is not provided, it will default to a random item chosen with `TransitionTable#random_key` 30 | def initialize( 31 | @transition_table : TransitionTable(LinkType), 32 | seed : LinkType | Nil = nil 33 | ) 34 | if @transition_table.empty? 35 | raise Markov::Exceptions::EmptyTransitionTableException.new( 36 | method: "#new", 37 | message: "Add elements to your `TransitionTable` or try another constructor" 38 | ) 39 | end 40 | if seed 41 | @seed = seed 42 | else 43 | @seed = @transition_table.random_key 44 | end 45 | validate_seed seed: @seed, rule: "`seed` must be an existing key in provided `transition_table`!" 46 | @dead_end_handler = default_dead_end_handler 47 | end 48 | 49 | # Makes it possible to use `#to_json` and `#from_json` (see Crystal docs) 50 | def initialize(pull : JSON::PullParser) 51 | @transition_table = TransitionTable(LinkType).new 52 | @seed = @transition_table.first_key 53 | @dead_end_handler = default_dead_end_handler 54 | 55 | hash = self 56 | pull.read_object do |key| 57 | if pull.kind == :null 58 | pull.read_next 59 | else 60 | hash[key] = TransitionMatrix(typeof(key)).new(pull) # V is the value type, as in `Hash(K, V)` 61 | end 62 | end 63 | hash 64 | end 65 | 66 | # 67 | # If you have a small (`Array`-sized) data set, you can pass it as `sample` 68 | # and a `TransitionTable` will be constructed for you with the sample data. 69 | # 70 | # `seed` should be the element in `sample` which you would like to begin the sequence. 71 | # If no `seed` is provided, a random element will be selected from `sample`. 72 | def initialize( 73 | sample : Array(LinkType), 74 | @seed : LinkType = sample.sample(1).first 75 | ) 76 | @transition_table = TransitionTable(LinkType).new 77 | @transition_table.fill sample 78 | validate_seed seed: @seed, rule: "`seed` must be an existing item in `sample`!" 79 | @dead_end_handler = default_dead_end_handler 80 | end 81 | 82 | # Validates provided `seed` for initializers 83 | private def validate_seed(seed : LinkType | Nil, rule : String) : Bool 84 | if seed && @transition_table[seed]? 85 | return true 86 | else 87 | raise Exceptions::InvalidSeedException.new message: rule 88 | return false 89 | end 90 | end 91 | 92 | # Creates a default `Proc` for dead end `Exception` handlers. 93 | private def default_dead_end_handler 94 | Proc(TransitionTable(LinkType), Chain(LinkType), Exception, LinkType).new { |_| return @transition_table.first_key } 95 | end 96 | 97 | # Generates a probable, sequential `Array` of `LinkType` elements of `count` length 98 | def generate(count : Int32) 99 | i = 0 100 | temp_generated = [] of LinkType 101 | 102 | while i < count 103 | el = self.next 104 | temp_generated.push(el) 105 | i = i + 1 106 | end 107 | @generated.concat(temp_generated) 108 | 109 | temp_generated 110 | end 111 | 112 | # Sets an exception handler for `EmptyTransitionMatrixException` when `Chain` instance reaches a dead end 113 | # while using `Chain#generate` or `Chain#next`. Returned value is inserted as the next probable element. 114 | # 115 | # Usage: 116 | # 117 | # ``` 118 | # c = Markov::Chain(String).new sample: ["Koala", "Kangaroo"] of String, seed: "Kangaroo" 119 | # c.on_dead_end do |transition_table, chain, exception| 120 | # "Koala" 121 | # end 122 | # c.next # => "Koala" 123 | # c.next # => "Kangaroo" 124 | # c.next # => "Koala" 125 | # ``` 126 | def on_dead_end(&block : Proc(TransitionTable(LinkType), Chain(LinkType), Exception, LinkType)) : Proc(TransitionTable(LinkType), Chain(LinkType), Exception, LinkType) 127 | @dead_end_handler = block 128 | @custom_dead_end_handler = true 129 | block 130 | end 131 | 132 | # Generates the next probable `LinkType` element 133 | def next : LinkType 134 | seed = @seed 135 | begin 136 | seed = @transition_table.probable after: @seed 137 | rescue ex : Markov::Exceptions::EmptyTransitionMatrixException 138 | if @custom_dead_end_handler 139 | seed = @dead_end_handler.call(@transition_table, self, ex) 140 | else 141 | raise ex 142 | end 143 | end 144 | validate_seed seed: seed, rule: "`@seed` must be an existing key in `@transition_table`" 145 | @seed = seed 146 | @generated.push(@seed) 147 | @seed 148 | end 149 | end 150 | end 151 | -------------------------------------------------------------------------------- /src/markov/exceptions.cr: -------------------------------------------------------------------------------- 1 | # Holds all custom exceptions in the `Markov` module. 2 | module Markov::Exceptions 3 | # Thrown when a method cannot execute due to a `TransitionMatrix` being empty. 4 | class EmptyTransitionMatrixException < Exception 5 | def initialize(method : String, message : String = "") 6 | super "Cannot complete ##{method} with an empty `TransitionMatrix`. \n \t #{message}" 7 | end 8 | end 9 | 10 | # Thrown when a method cannot execute due to a `TransitionTable` being empty. 11 | class EmptyTransitionTableException < Exception 12 | def initialize(method : String, message : String = "") 13 | super "Cannot complete method #{method} with an empty `TransitionTable`. \n \t #{message}" 14 | end 15 | end 16 | 17 | # Thrown when a method cannot execute due to an invalid `seed`. 18 | class InvalidSeedException < Exception 19 | def initialize(message : String = "") 20 | super "`seed` not valid! \n \t #{message}" 21 | end 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /src/markov/transition_matrix.cr: -------------------------------------------------------------------------------- 1 | require "json" 2 | 3 | module Markov 4 | # A `TransitionMatrix` is an object for storing and selecting transitions in a `Markov::Chain`. 5 | # 6 | # See [https://en.wikipedia.org/wiki/Stochastic_matrix](https://en.wikipedia.org/wiki/Stochastic_matrix) 7 | class TransitionMatrix(LinkType) < Hash(LinkType, Int32) 8 | # { ELEMENT => OCCURRENCE_COUNT } 9 | 10 | # Creates a new empty `TransitionMatrix`. 11 | def initialize 12 | super 13 | end 14 | 15 | # Makes it possible to use `#to_json` and `#from_json` (see Crystal docs) 16 | def initialize(pull : JSON::PullParser) 17 | super() 18 | hash = self 19 | pull.read_object do |key| 20 | if pull.kind == :null 21 | pull.read_next 22 | else 23 | key = LinkType == String ? %("#{key}") : key # makes String compatible for json parsing 24 | key_of_type = LinkType.from_json key 25 | hash[key_of_type] = V.new(pull) # V is the value type, as in `Hash(K, V)` 26 | end 27 | end 28 | hash 29 | end 30 | 31 | # Adds item to `TransitionMatrix` 32 | def add(link : LinkType) 33 | count : Int32 34 | if self.has_key? link 35 | count = self[link] + 1 36 | else 37 | count = 1.to_i32 38 | end 39 | self[link] = count 40 | end 41 | 42 | # Returns decimal probability of each transition in the matrix 43 | def probabilities : Hash(LinkType, Float32) 44 | probs = Hash(LinkType, Float32).new(default_value: 0.to_f32) 45 | total : Int32 = sum() 46 | self.each do |key, value| 47 | probs[key] = value.to_f32 / sum.to_f32 48 | end 49 | probs 50 | end 51 | 52 | # Returns sum of all values (occurrences) in the matrix 53 | def sum : Int32 54 | total : Int32 = 0.to_i32 55 | self.each_value do |num| 56 | total = total + num 57 | end 58 | total 59 | end 60 | 61 | # Chooses a random, probable transition from the transitions in the matrix. 62 | # If matrix is empty, will throw `Markov::Exceptions::EmptyTransitionMatrixException` 63 | def probable_transition : LinkType 64 | if self.size == 0 65 | raise Markov::Exceptions::EmptyTransitionMatrixException.new( 66 | method: "probable_transition", 67 | message: "No transitions availiable!" 68 | ) 69 | end 70 | probable = nil 71 | 72 | success_params = {} of LinkType => Range(Int32, Int32) 73 | low : Int32 = 0 74 | high : Int32 = 0 75 | 76 | initial_low = low 77 | 78 | self.each do |key, count| 79 | low = high 80 | high = low + count 81 | # exclusive range (high not included) 82 | success_params[key] = low...high 83 | end 84 | 85 | final_high = high 86 | 87 | exclusive_capturing_range = initial_low...final_high 88 | random_selection : Int32 = Random.rand(exclusive_capturing_range) 89 | 90 | success_params.each do |key, capturing_range| 91 | if capturing_range.includes? random_selection 92 | probable = key 93 | end 94 | end 95 | 96 | if !probable 97 | raise Markov::Exceptions::EmptyTransitionMatrixException.new( 98 | method: "probable_transition", 99 | message: "Transition not found!" 100 | ) 101 | else 102 | return probable 103 | end 104 | end 105 | end 106 | end 107 | -------------------------------------------------------------------------------- /src/markov/transition_table.cr: -------------------------------------------------------------------------------- 1 | require "./transition_matrix" 2 | 3 | module Markov 4 | # A `TransitionTable` represents a mapping of keys to `TransitionMatrix`'s. 5 | class TransitionTable(LinkType) < Hash(LinkType, TransitionMatrix(LinkType)) 6 | @last_added_key : LinkType | Nil 7 | 8 | # Creates a new empty `TransitionMatrix`. 9 | def initialize 10 | super 11 | end 12 | 13 | # Makes it possible to use `#to_json` and `#from_json` (see Crystal docs) 14 | def initialize(pull : JSON::PullParser) 15 | super() 16 | hash = self 17 | pull.read_object do |key| 18 | if pull.kind == :null 19 | pull.read_next 20 | else 21 | key = LinkType == String ? %("#{key}") : key # makes String compatible for json parsing 22 | key_of_type = LinkType.from_json key 23 | hash[key_of_type] = TransitionMatrix(LinkType).new(pull) # V is the value type, as in `Hash(K, V)` 24 | end 25 | end 26 | hash 27 | end 28 | 29 | # Inserts `key` into last added `key`'s `TransitionMatrix`, if applicable, 30 | # and creates new `TransitionMatrix` for `key` if not already there. 31 | def add(key : LinkType) 32 | if @last_added_key 33 | last_matrix = self[@last_added_key] 34 | last_matrix.add(key) 35 | end 36 | if !self.has_key? key 37 | self[key] = TransitionMatrix(LinkType).new 38 | end 39 | @last_added_key = key 40 | end 41 | 42 | # Sequentially fills `TransitionTable` with values in given `Array` using `#add` method. 43 | # Just a shortcut for looping through array and `#add`ing elements. 44 | # ``` 45 | # string_array = %w(some say the world will end in fire) 46 | # tt = Markov::TransitionTable(String).new 47 | # tt.fill table_with: string_array 48 | # ``` 49 | def fill(table_with sample : Array(LinkType)) 50 | sample.each do |key| 51 | add(key) 52 | end 53 | end 54 | 55 | # Returns probable transition from the `TransitionMatrix` associated with key provided. 56 | # Will raise `EmptyTransitionMatrixException` if no probable transition is available. 57 | # ``` 58 | # string_array = %w(some say the world will end in fire) 59 | # tt = Markov::TransitionTable(String).new 60 | # tt.fill table_with: string_array 61 | # 62 | # tt.probable? after: "world" # => "will" 63 | # tt.probable? after: "fire" # raises `EmptyTransitionMatrixException` 64 | # ``` 65 | def probable(after key : LinkType) : LinkType 66 | self[key].probable_transition 67 | end 68 | 69 | # Returns probable transition from the `TransitionMatrix` associated with key provided. 70 | # Returns `nil` if no probable transition is available. 71 | # ``` 72 | # string_array = %w(some say the world will end in fire) 73 | # tt = Markov::TransitionTable(String).new 74 | # tt.fill table_with: string_array 75 | # 76 | # tt.probable? after: "world" # => "will" 77 | # tt.probable? after: "fire" # => nil 78 | # ``` 79 | def probable?(after key : LinkType) : LinkType | Nil 80 | begin 81 | return probable key 82 | rescue Markov::Exceptions::EmptyTransitionMatrixException 83 | return nil 84 | end 85 | end 86 | 87 | # Returns random key. 88 | # Will raise `EmptyTransitionTableException` if `TransitionTable` is empty. 89 | def random_key : LinkType 90 | begin 91 | self.keys.sample(1).first 92 | rescue IndexError 93 | raise Exceptions::EmptyTransitionTableException.new( 94 | method: "random_key", 95 | message: "Use TransitionTable#add or TransitionTable#fill to populate the TransitionTable instance and try again." 96 | ) 97 | end 98 | end 99 | 100 | # Returns random `TransitionMatrix` from table. 101 | def random_matrix : TransitionMatrix(LinkType) 102 | self[random_key] 103 | end 104 | 105 | # Resets the `TransitionTable`'s last added key between non-sequential sets of training data. 106 | # ``` 107 | # movie_one = %w(the great gatsby) 108 | # movie_two = %w(great expectations) 109 | # tt = Markov::TransitionTable(String).new 110 | # tt.fill table_with: movie_one 111 | # tt.reset() 112 | # tt.fill table_with: movie_two 113 | 114 | # tt.probable? after: "gatsby" #=> nil 115 | # tt.probable? after: "great" #=> "expectations" or "gatsby" 116 | # ``` 117 | def reset 118 | @last_added_key = nil 119 | end 120 | end 121 | end 122 | -------------------------------------------------------------------------------- /src/markov/version.cr: -------------------------------------------------------------------------------- 1 | # :nodoc: 2 | module Markov::Crystal 3 | VERSION = "0.1.1" 4 | end 5 | --------------------------------------------------------------------------------