├── .gitignore ├── .github └── workflows │ └── test.yml ├── META6.json ├── t └── basic.rakutest ├── lib └── Text │ └── Markov.rakumod ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | .precomp 2 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | on: [ push, workflow_dispatch ] 3 | jobs: 4 | test: 5 | runs-on: ubuntu-latest 6 | container: 7 | image: rakudo-star:alpine 8 | steps: 9 | - name: Checkout 10 | uses: actions/checkout@v3 11 | - name: Install dependencies 12 | run: zef install --force --/test App::Prove6 13 | - name: Run tests 14 | run: prove6 15 | -------------------------------------------------------------------------------- /META6.json: -------------------------------------------------------------------------------- 1 | { 2 | "name" : "Text::Markov", 3 | "version" : "2.0.0", 4 | "description" : "Generate superficially real-looking text.", 5 | "authors" : [ "Paweł bbkr Pabian" ], 6 | "auth": "zef:bbkr", 7 | "provides": { 8 | "Text::Markov" : "lib/Text/Markov.rakumod" 9 | }, 10 | "license" : "Artistic-2.0", 11 | "source-url": "git://github.com/bbkr/Text-Markov.git" 12 | } 13 | -------------------------------------------------------------------------------- /t/basic.rakutest: -------------------------------------------------------------------------------- 1 | use lib 'lib'; 2 | 3 | use Test; 4 | use Text::Markov; 5 | 6 | plan 18; 7 | 8 | # WARNING: Some tests are not deterministic! 9 | # They check if expected chain _eventually_ appear 10 | # so there is no guarantee that they will take finite time. 11 | 12 | my ( $mc, %stats ); 13 | 14 | { 15 | lives-ok { $mc = Text::Markov.new }, 'constructor with default order'; 16 | 17 | # lack of objects should generate 0-length chain 18 | ok $mc.feed( ), 'empty feed'; 19 | is-deeply $mc.read( ), ( ), 'empty read'; 20 | 21 | # single object should be always picked as first chain element 22 | ok $mc.feed( 'foo' ), '"foo" feed'; 23 | is-deeply $mc.read( ), ( 'foo', ), '"foo" read'; 24 | 25 | # increase weights of the same object 26 | # it should still be picked as first chain element 27 | ok $mc.feed( 'foo' ), '"foo" feed again'; 28 | is-deeply $mc.read( ), ( 'foo', ), '"foo" read again'; 29 | 30 | # feed another element 31 | # which may start chain in 1/3 of cases 32 | ok $mc.feed( 'bar' ), '"bar" feed'; 33 | loop { 34 | FIRST %stats = ( ); 35 | %stats{ $mc.read( )[ 0 ] }++; 36 | last if %stats{ 'foo' } and %stats{ 'bar' }; 37 | } 38 | pass '"foo" and "bar" eventually read'; 39 | } 40 | 41 | { 42 | lives-ok { $mc = Text::Markov.new }, 'constructor with default order'; 43 | 44 | # ability to generate endless chain 45 | ok $mc.feed( 'foo', 'foo' ), '"foo" "foo" feed'; 46 | is-deeply $mc.read( 8 ), ( 'foo', 'foo', 'foo', 'foo', 'foo', 'foo', 'foo', 'foo' ), '"foo" endless chain'; 47 | } 48 | 49 | { 50 | lives-ok { $mc = Text::Markov.new( order => 3 ) }, 'constructor for order of 3'; 51 | 52 | ok $mc.feed( qw{easy things should be easy and hard things should be possible} ), 'Larry quote feed'; 53 | loop { 54 | FIRST %stats = ( ); 55 | %stats{ $mc.read( ).join( ' ' ) }++; 56 | last if %stats{ 'easy things should be possible' } 57 | and %stats{ 'easy things should be easy and hard things should be possible' } 58 | and %stats{ 'easy things should be easy and hard things should be easy and hard things should be possible' }; 59 | } 60 | pass 'Larry quote eventually read in three ways' 61 | } 62 | 63 | { 64 | lives-ok { $mc = Text::Markov.new( order => 8 ) }, 'constructor for order of 8'; 65 | 66 | # feed shorter than order 67 | ok $mc.feed( 'foo', 'bar', 'baz' ), '"foo" "bar" "baz" feed'; 68 | is-deeply $mc.read( ), ( 'foo', 'bar', 'baz' ), '"foo" "bar" "baz" read'; 69 | } 70 | -------------------------------------------------------------------------------- /lib/Text/Markov.rakumod: -------------------------------------------------------------------------------- 1 | unit class Text::Markov; 2 | 3 | has Int:D $.order = 1; 4 | has %!graph; 5 | 6 | method feeder ( Seq:D $states ) returns Bool { 7 | 8 | my @predecessors; 9 | 10 | for $states.List -> $successor { 11 | 12 | # get successors location, 13 | # this will also pad Array if shorter than chain order 14 | my $successors := self!successors( @predecessors ); 15 | 16 | # successors BagHash may not be created yet 17 | $successors //= BagHash.new; 18 | 19 | # increase occurrence weight for current successor 20 | $successors{ $successor.Str }++; 21 | 22 | # newest successor pushes out oldest predecessor 23 | @predecessors.shift; 24 | @predecessors.push( $successor ); 25 | 26 | } 27 | 28 | return True; 29 | } 30 | 31 | multi method feed ( *@states ) returns Bool { 32 | 33 | return self.feeder( @states.Seq ); 34 | } 35 | 36 | method reader ( *@predecessors is copy where { .elems <= $.order } ) returns Seq { 37 | 38 | return lazy gather loop { 39 | 40 | # take provided predecessors to include them in sequence 41 | FIRST .take for @predecessors; 42 | 43 | # get successors location, 44 | # this will also pad Array if shorter than chain order 45 | my $successors := self!successors( @predecessors ); 46 | 47 | # no successors are available 48 | last unless $successors ~~ BagHash; 49 | 50 | # choose successor based on occurrence weights 51 | my $successor = $successors.roll( ); 52 | 53 | # add successor to sequence 54 | take $successor; 55 | 56 | # newest successor pushes out oldest predecessor 57 | @predecessors.shift; 58 | @predecessors.push( $successor ); 59 | 60 | } 61 | 62 | } 63 | 64 | method read ( Int:D $length where { $length >= 1 } = 1024 ) returns List { 65 | 66 | return self.reader[ ^$length ]:v; 67 | } 68 | 69 | method !successors ( @predecessors ) { 70 | 71 | # left pad predecessors Array with empty strings 72 | # if provided amount is lesser than chain order 73 | @predecessors.unshift( '' ) while @predecessors.elems < $.order; 74 | 75 | # pointer starts at the beginning of predecessors Hash 76 | # and will eventually reach successors expected BagHash location 77 | my $p := %!graph; 78 | 79 | for ^$.order -> $i { 80 | 81 | # move pointer to next Hash level 82 | $p := $p{ @predecessors[ $i ].Str }; 83 | } 84 | 85 | # return successors location, may be not initialized yet 86 | return-rw $p; 87 | } 88 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Markov-chain based text generator for [Raku](https://www.raku.org) language 2 | 3 | [![.github/workflows/test.yml](https://github.com/bbkr/Text-Markov/actions/workflows/test.yml/badge.svg)](https://github.com/bbkr/Text-Markov/actions/workflows/test.yml) 4 | 5 | ## SYNOPSIS 6 | 7 | ```raku 8 | use Text::Markov; 9 | 10 | my $mc = Text::Markov.new; 11 | 12 | $mc.feed( qw{Easy things should be easy and hard things should be possible.} ); 13 | $mc.feed( qw{People who live in glass houses should not throw stones.} ); 14 | $mc.feed( qw{Live and let live.} ); 15 | 16 | say $mc.read( ); 17 | # People who live in glass houses should be easy and let live. 18 | ``` 19 | 20 | ## METHODS 21 | 22 | Markov chain is a mathematical system. 23 | 24 | To understand terminology used below read [OPERATING PRINCIPLE](#operating-principle) paragraph first. 25 | 26 | ### new( order => 2 ) 27 | 28 | Order (optional, default ```1```) controls how many past states determine possibe future states. 29 | 30 | ### feed( 'I', 'like', 'pancakes.' ) 31 | 32 | Add transitions of states. 33 | 34 | State can be represented by any object that can be identified by **nonempty** String. 35 | 36 | ### feeder( $sequence ) 37 | 38 | Stream version of `feed( )`, allows to load transition of states from Sequence. 39 | Useful for feeding large inputs on the fly, like for example whole book word by word. 40 | 41 | ### read( ) / read( 128 ) 42 | 43 | Generate chain of states up to requested length (optional, default ```1024```). 44 | 45 | ### reader( ) / reader( 'I', 'like' ) 46 | 47 | Stream version of `read( )`. Returns lazy Sequence that will provide states. 48 | Useful for generating large (potentially infinite) outputs on the fly. 49 | 50 | Accepts initial states, provided list must be no longer than chain `order`. 51 | 52 | ## OPERATING PRINCIPLE 53 | 54 | Let's put abstract hat on and imagine that ___each word represents state___. 55 | 56 | Therefore sentence made of words can be represented as ___transitions between states___. 57 | 58 | 59 | For example sentence ```I like what I see``` is expressed by the following graph: 60 | 61 | 62 | ``` 63 | 4 +------+ 64 | +------------| what |<----+ 65 | | +------+ | 66 | | | 67 | v | 3 68 | +-------+ 1 +---+ 2 +------+ | 69 | | START |--------->| I |--------->| like |-----+ 70 | +-------+ +---+ +------+ 71 | | 72 | | 73 | | 5 +-----+ 6 +-----+ 74 | +----------->| see |--------->| END | 75 | +-----+ +-----+ 76 | ``` 77 | 78 | It may be surprising but transition number is not important for [feed](#feed-foo-bar-baz-) and can be discarded. 79 | 80 | Instead of that transitions counters are stored (in this example each transition occured only once): 81 | 82 | 83 | ``` 84 | 1x +------+ 85 | +------------| what |<----+ 86 | | +------+ | 87 | | | 88 | v | 1x 89 | +-------+ 1x +---+ 1x +------+ | 90 | | START |--------->| I |--------->| like |-----+ 91 | +-------+ +---+ +------+ 92 | | 93 | | 94 | | 1x +-----+ 1x +-----+ 95 | +----------->| see |--------->| END | 96 | +-----+ +-----+ 97 | ``` 98 | 99 | Next sentence```Now I see you like cookies``` when passed to [feed](#feed-foo-bar-baz-) 100 | will simply add new transitions or increase counters of already existing ones in the same graph: 101 | 102 | 103 | ``` 104 | 1x +------+ 105 | +------------| what |<----+ 106 | | +------+ | 107 | | | 108 | v | 1x 109 | +-------+ 1x +---+ 1x +------+ | 110 | | START |--------->| I |--------->| like |-----+ 111 | +-------+ +---+ +------+ 112 | | ^ | ^ | 113 | | | | 1x | | 114 | 1x | 1x | | | | 1x 115 | | +-----+ | | +-----+ | +---------+ 116 | +-->| Now |-----+ | | you | +------->| cookies | 117 | +-----+ | +-----+ +---------+ 118 | | ^ | 119 | | | 1x | 1x 120 | | | v 121 | | 2x +-----+ 1x +-----+ 122 | +---------->| see |--------->| END | 123 | +-----+ +-----+ 124 | 125 | ``` 126 | 127 | [Markov chain](http://en.wikipedia.org/wiki/Markov_chain) is generated 128 | by making transitions from the current state to one of the next possible future states 129 | with respecting probability assigned to each transition. 130 | The higher the counter the more probable transition is. 131 | 132 | 133 | Let's generate: 134 | 135 | * From ```START``` transition can be made to ```I``` [50% chance] or ```Now``` [50% chance] - ```I``` is rolled. 136 | * From ```I``` transition can be made to ```like``` [33.(3)% chance] or ```see``` [66.(6)% chance] - ```like``` is rolled. 137 | * From ```like``` transition can be made to ```what``` [50% chance] or ```cookies``` [50% chance] - ```cookies``` is rolled. 138 | * From ```cookies``` transition can be made only to ```END``` [100% chance]. 139 | 140 | New sentence ```I like cookies``` is generated! 141 | 142 | 143 | Note that it is not subpart of any sentence that was used by [feed](#feed-foo-bar-baz-) to create graph, 144 | yet it has correct grammar and makes sense. 145 | 146 | ### Improving output quality 147 | 148 | Default setup will produce a lot of nonsense. From sentences... 149 | 150 | * ```I was tired.``` 151 | * ```It was snowing.``` 152 | * ```Today I was going to do something useful.``` 153 | 154 | ...new sentence ```I was snowing.``` may be generated. 155 | 156 | 157 | It happens because single ```was``` word does not give enough context to make rational transitions only. 158 | Param ```order => 2``` in constructor restricts possible transitions to those which appears after two past states. 159 | So from ```I was``` only two transitions are possible and more reasonable ```Today I was tired.``` sentence may be generated. 160 | 161 | This is called [Markov chain of order m](http://en.wikipedia.org/wiki/Markov_chain#Variations). 162 | 163 | 164 | The higher the order the more sensible output but more feed is also required. You have to experiment :) 165 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The Artistic License 2.0 2 | 3 | Copyright (c) 2000-2006, The Perl Foundation. 4 | 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | This license establishes the terms under which a given free software 11 | Package may be copied, modified, distributed, and/or redistributed. 12 | The intent is that the Copyright Holder maintains some artistic 13 | control over the development of that Package while still keeping the 14 | Package available as open source and free software. 15 | 16 | You are always permitted to make arrangements wholly outside of this 17 | license directly with the Copyright Holder of a given Package. If the 18 | terms of this license do not permit the full use that you propose to 19 | make of the Package, you should contact the Copyright Holder and seek 20 | a different licensing arrangement. 21 | 22 | Definitions 23 | 24 | "Copyright Holder" means the individual(s) or organization(s) 25 | named in the copyright notice for the entire Package. 26 | 27 | "Contributor" means any party that has contributed code or other 28 | material to the Package, in accordance with the Copyright Holder's 29 | procedures. 30 | 31 | "You" and "your" means any person who would like to copy, 32 | distribute, or modify the Package. 33 | 34 | "Package" means the collection of files distributed by the 35 | Copyright Holder, and derivatives of that collection and/or of 36 | those files. A given Package may consist of either the Standard 37 | Version, or a Modified Version. 38 | 39 | "Distribute" means providing a copy of the Package or making it 40 | accessible to anyone else, or in the case of a company or 41 | organization, to others outside of your company or organization. 42 | 43 | "Distributor Fee" means any fee that you charge for Distributing 44 | this Package or providing support for this Package to another 45 | party. It does not mean licensing fees. 46 | 47 | "Standard Version" refers to the Package if it has not been 48 | modified, or has been modified only in ways explicitly requested 49 | by the Copyright Holder. 50 | 51 | "Modified Version" means the Package, if it has been changed, and 52 | such changes were not explicitly requested by the Copyright 53 | Holder. 54 | 55 | "Original License" means this Artistic License as Distributed with 56 | the Standard Version of the Package, in its current version or as 57 | it may be modified by The Perl Foundation in the future. 58 | 59 | "Source" form means the source code, documentation source, and 60 | configuration files for the Package. 61 | 62 | "Compiled" form means the compiled bytecode, object code, binary, 63 | or any other form resulting from mechanical transformation or 64 | translation of the Source form. 65 | 66 | 67 | Permission for Use and Modification Without Distribution 68 | 69 | (1) You are permitted to use the Standard Version and create and use 70 | Modified Versions for any purpose without restriction, provided that 71 | you do not Distribute the Modified Version. 72 | 73 | 74 | Permissions for Redistribution of the Standard Version 75 | 76 | (2) You may Distribute verbatim copies of the Source form of the 77 | Standard Version of this Package in any medium without restriction, 78 | either gratis or for a Distributor Fee, provided that you duplicate 79 | all of the original copyright notices and associated disclaimers. At 80 | your discretion, such verbatim copies may or may not include a 81 | Compiled form of the Package. 82 | 83 | (3) You may apply any bug fixes, portability changes, and other 84 | modifications made available from the Copyright Holder. The resulting 85 | Package will still be considered the Standard Version, and as such 86 | will be subject to the Original License. 87 | 88 | 89 | Distribution of Modified Versions of the Package as Source 90 | 91 | (4) You may Distribute your Modified Version as Source (either gratis 92 | or for a Distributor Fee, and with or without a Compiled form of the 93 | Modified Version) provided that you clearly document how it differs 94 | from the Standard Version, including, but not limited to, documenting 95 | any non-standard features, executables, or modules, and provided that 96 | you do at least ONE of the following: 97 | 98 | (a) make the Modified Version available to the Copyright Holder 99 | of the Standard Version, under the Original License, so that the 100 | Copyright Holder may include your modifications in the Standard 101 | Version. 102 | 103 | (b) ensure that installation of your Modified Version does not 104 | prevent the user installing or running the Standard Version. In 105 | addition, the Modified Version must bear a name that is different 106 | from the name of the Standard Version. 107 | 108 | (c) allow anyone who receives a copy of the Modified Version to 109 | make the Source form of the Modified Version available to others 110 | under 111 | 112 | (i) the Original License or 113 | 114 | (ii) a license that permits the licensee to freely copy, 115 | modify and redistribute the Modified Version using the same 116 | licensing terms that apply to the copy that the licensee 117 | received, and requires that the Source form of the Modified 118 | Version, and of any works derived from it, be made freely 119 | available in that license fees are prohibited but Distributor 120 | Fees are allowed. 121 | 122 | 123 | Distribution of Compiled Forms of the Standard Version 124 | or Modified Versions without the Source 125 | 126 | (5) You may Distribute Compiled forms of the Standard Version without 127 | the Source, provided that you include complete instructions on how to 128 | get the Source of the Standard Version. Such instructions must be 129 | valid at the time of your distribution. If these instructions, at any 130 | time while you are carrying out such distribution, become invalid, you 131 | must provide new instructions on demand or cease further distribution. 132 | If you provide valid instructions or cease distribution within thirty 133 | days after you become aware that the instructions are invalid, then 134 | you do not forfeit any of your rights under this license. 135 | 136 | (6) You may Distribute a Modified Version in Compiled form without 137 | the Source, provided that you comply with Section 4 with respect to 138 | the Source of the Modified Version. 139 | 140 | 141 | Aggregating or Linking the Package 142 | 143 | (7) You may aggregate the Package (either the Standard Version or 144 | Modified Version) with other packages and Distribute the resulting 145 | aggregation provided that you do not charge a licensing fee for the 146 | Package. Distributor Fees are permitted, and licensing fees for other 147 | components in the aggregation are permitted. The terms of this license 148 | apply to the use and Distribution of the Standard or Modified Versions 149 | as included in the aggregation. 150 | 151 | (8) You are permitted to link Modified and Standard Versions with 152 | other works, to embed the Package in a larger work of your own, or to 153 | build stand-alone binary or bytecode versions of applications that 154 | include the Package, and Distribute the result without restriction, 155 | provided the result does not expose a direct interface to the Package. 156 | 157 | 158 | Items That are Not Considered Part of a Modified Version 159 | 160 | (9) Works (including, but not limited to, modules and scripts) that 161 | merely extend or make use of the Package, do not, by themselves, cause 162 | the Package to be a Modified Version. In addition, such works are not 163 | considered parts of the Package itself, and are not subject to the 164 | terms of this license. 165 | 166 | 167 | General Provisions 168 | 169 | (10) Any use, modification, and distribution of the Standard or 170 | Modified Versions is governed by this Artistic License. By using, 171 | modifying or distributing the Package, you accept this license. Do not 172 | use, modify, or distribute the Package, if you do not accept this 173 | license. 174 | 175 | (11) If your Modified Version has been derived from a Modified 176 | Version made by someone other than you, you are nevertheless required 177 | to ensure that your Modified Version complies with the requirements of 178 | this license. 179 | 180 | (12) This license does not grant you the right to use any trademark, 181 | service mark, tradename, or logo of the Copyright Holder. 182 | 183 | (13) This license includes the non-exclusive, worldwide, 184 | free-of-charge patent license to make, have made, use, offer to sell, 185 | sell, import and otherwise transfer the Package with respect to any 186 | patent claims licensable by the Copyright Holder that are necessarily 187 | infringed by the Package. If you institute patent litigation 188 | (including a cross-claim or counterclaim) against any party alleging 189 | that the Package constitutes direct or contributory patent 190 | infringement, then this Artistic License to you shall terminate on the 191 | date that such litigation is filed. 192 | 193 | (14) Disclaimer of Warranty: 194 | THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS 195 | IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED 196 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR 197 | NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL 198 | LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL 199 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL 200 | DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, EVEN IF 201 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 202 | --------------------------------------------------------------------------------