├── .travis.yml ├── LICENSE ├── margopher_test.go ├── README.md └── margopher.go /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - 1.6 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Ahmad Saleh 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /margopher_test.go: -------------------------------------------------------------------------------- 1 | package margopher 2 | 3 | import "testing" 4 | 5 | func TestGetRandomWord(t *testing.T) { 6 | if getRandomWord([]string{"1", "2", "3"}) == "" { 7 | t.Error("getRandomWord: it should return a string element from slice.") 8 | } 9 | } 10 | 11 | func TestIsTerminalWord(t *testing.T) { 12 | if isTerminalWord("Hey.") == false { 13 | t.Error("isTerminalWord: it should return true for words ending in `.`") 14 | } 15 | 16 | if isTerminalWord("Hey,") == false { 17 | t.Error("isTerminalWord: it should return true for words ending in `,`") 18 | } 19 | 20 | if isTerminalWord("Hey:") == false { 21 | t.Error("isTerminalWord: it should return true for words ending in `:`") 22 | } 23 | 24 | if isTerminalWord("Hey;") == false { 25 | t.Error("isTerminalWord: it should return true for words ending in `;`") 26 | } 27 | 28 | if isTerminalWord("Hey?") == false { 29 | t.Error("isTerminalWord: it should return true for words ending in `?`") 30 | } 31 | 32 | if isTerminalWord("Hey!") == false { 33 | t.Error("isTerminalWord: it should return true for words ending in `!`") 34 | } 35 | } 36 | 37 | func TestParse(t *testing.T) { 38 | m := New() 39 | m.parse("Cats are nice. Cats love pizza, and Cats hates dogs.") 40 | 41 | if m.states == nil { 42 | t.Error("ParseText: it should initialize states.") 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | margopher 2 | --- 3 | 4 | [![Build Status](https://travis-ci.org/aonemd/margopher.svg?branch=master)](https://travis-ci.org/aonemd/margopher) 5 | 6 | [Markov chain](http://www.wikiwand.com/en/Markov_chain) random text generator 7 | 8 | ## How it Works 9 | 10 | **TL;DR** You give it some text, it gives you a random (and sane) chunk out of this text. 11 | 12 | Basically margopher is based on Markov chain statistical model, except that it 13 | starts by choosing a random prefix from the states dictionary to ensure more 14 | randomness. 15 | 16 | It starts by parsing the input text and putting it into states dictionary then 17 | starts generating the output sentence. 18 | 19 | The generator will keep generating words till it encounters a terminal word (a 20 | word that ends in any of `.,:;?!`) 21 | 22 | **States** is a map contains prefix as keys and suffix as values. 23 | 24 | **Prefix** is an array of two consecutive words from the original text. 25 | 26 | **Suffix** is a slice of all the words that occur after a given prefix. 27 | 28 | 29 | ## Installation 30 | 31 | ```sh 32 | go get github.com/aonemd/margopher 33 | ``` 34 | 35 | ## Usage 36 | 37 | 1. Import the package 38 | 39 | ```go 40 | import "github.com/aonemd/margopher" 41 | ``` 42 | 43 | 2. Create new margopher object 44 | 45 | ```go 46 | m := margopher.New() 47 | ``` 48 | 49 | 3. Read input text using one of three parsing methods: 50 | 51 | - ReadText(text string) 52 | 53 | ```go 54 | text := "Cats are nice. Cats love pizza, and Cats hates dogs." 55 | fmt.Println(m.ReadText(text)) 56 | ``` 57 | 58 | - ReadFile(filePath string) 59 | 60 | ```go 61 | filePath := "../file.txt" 62 | fmt.Println(m.ReadFile(filePath)) 63 | ``` 64 | 65 | - ReadURL(url string) 66 | 67 | ```go 68 | url := "https://github.com/aonemd/margopher" 69 | fmt.Println(m.ReadURL(url)) 70 | ``` 71 | 72 | 4. You can see the input parsed into a dictionary of States using `margopher.StateDictionary()`: 73 | 74 | - It returns a dicitonary of this signature `map[[2]string][]string`. 75 | - The words are unordered because Go maps do not keep order. 76 | 77 | ```go 78 | fmt.Println(m.StateDictionary()) 79 | ``` 80 | 81 | ## Feedback 82 | 83 | I wrote this simple project particularly to learn Go so any feedback is more 84 | than welcome. If you have any, please open an issue or send a pull request. 85 | 86 | ## License 87 | 88 | See [LICENSE](https://github.com/aonemd/margopher/blob/master/LICENSE). 89 | -------------------------------------------------------------------------------- /margopher.go: -------------------------------------------------------------------------------- 1 | package margopher 2 | 3 | import ( 4 | "bytes" 5 | "github.com/PuerkitoBio/goquery" 6 | "io/ioutil" 7 | "log" 8 | "math/rand" 9 | "os" 10 | "regexp" 11 | "strings" 12 | ) 13 | 14 | type margopher struct { 15 | states map[[2]string][]string 16 | } 17 | 18 | func New() *margopher { 19 | return &margopher{} 20 | } 21 | 22 | func (m *margopher) ReadText(text string) string { 23 | m.parse(text) 24 | 25 | return m.generate() 26 | } 27 | 28 | func (m *margopher) ReadFile(filePath string) string { 29 | // Open the file 30 | file, err := os.Open(filePath) 31 | if err != nil { 32 | log.Fatal(err) 33 | } 34 | 35 | // Read data from the file 36 | text, err := ioutil.ReadAll(file) 37 | if err != nil { 38 | log.Fatal(err) 39 | } 40 | 41 | m.parse(string(text)) 42 | 43 | return m.generate() 44 | } 45 | 46 | func (m *margopher) ReadURL(URL string) string { 47 | // Open web page 48 | doc, err := goquery.NewDocument(URL) 49 | if err != nil { 50 | log.Fatal(err) 51 | } 52 | 53 | // Search for

under
tags 54 | doc.Find("article").Each(func(i int, s *goquery.Selection) { 55 | text := s.Find("p").Text() 56 | m.parse(text) 57 | }) 58 | 59 | return m.generate() 60 | } 61 | 62 | func (m *margopher) StateDictionary() map[[2]string][]string { 63 | return m.states 64 | } 65 | 66 | // Parse input text into states map 67 | func (m *margopher) parse(text string) { 68 | // Initialize margopher.states map 69 | m.states = make(map[[2]string][]string) 70 | 71 | words := strings.Split(text, " ") 72 | 73 | for i := 0; i < len(words)-2; i++ { 74 | // Initialize prefix with two consecutive words as the key 75 | prefix := [2]string{words[i], words[i+1]} 76 | 77 | // Assign the third word as value to the prefix 78 | if _, ok := m.states[prefix]; ok { 79 | m.states[prefix] = append(m.states[prefix], words[i+2]) 80 | } else { 81 | m.states[prefix] = []string{words[i+2]} 82 | } 83 | } 84 | } 85 | 86 | // Generate margopher senetence based on a given length 87 | func (m *margopher) generate() string { 88 | var sentence bytes.Buffer 89 | 90 | // Initialize prefix with a random key 91 | prefix := m.getRandomPrefix([2]string{"", ""}) 92 | sentence.WriteString(strings.Join(prefix[:], " ") + " ") 93 | 94 | for { 95 | suffix := getRandomWord(m.states[prefix]) 96 | sentence.WriteString(suffix + " ") 97 | 98 | // Break the loop if suffix ends in "." and senetenceLength is enough 99 | if isTerminalWord(suffix) { 100 | break 101 | } 102 | 103 | prefix = [2]string{prefix[1], suffix} 104 | } 105 | 106 | return sentence.String() 107 | } 108 | 109 | // Return a random prefix other than the one in the arguments 110 | func (m *margopher) getRandomPrefix(prefix [2]string) [2]string { 111 | // By default, Go orders keys randomly for maps 112 | for key := range m.states { 113 | if key != prefix { 114 | prefix = key 115 | break 116 | } 117 | } 118 | 119 | return prefix 120 | } 121 | 122 | // Return a random element from a given string slice 123 | func getRandomWord(slice []string) string { 124 | if cap(slice) != 0 { 125 | return slice[rand.Intn(len(slice))] 126 | } else { 127 | return "" 128 | } 129 | } 130 | 131 | func isTerminalWord(word string) bool { 132 | match, _ := regexp.MatchString("(\\.|,|:|;|\\?|!)$", word) 133 | return match 134 | } 135 | --------------------------------------------------------------------------------