├── .gitattributes
├── .gitignore
├── README.md
├── charmodel
    └── charmodel.go
├── earley
    └── earley.go
├── environment
    └── environment.go
├── grammar
    └── grammar.go
├── interpreter
    └── interpreter.go
├── lexer
    └── lexer.go
├── lexfuncs.go
├── logopoeist.go
├── parser
    ├── parsefuncs.go
    └── parser.go
├── test.lgp
├── types
    └── types.go
└── wordmodel
    └── wordmodel.go


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 | 
17 | # Windows shortcuts
18 | *.lnk
19 | 
20 | # =========================
21 | # Operating System Files
22 | # =========================
23 | 
24 | # OSX
25 | # =========================
26 | 
27 | .DS_Store
28 | .AppleDouble
29 | .LSOverride
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear on external disk
35 | .Spotlight-V100
36 | .Trashes
37 | 
38 | # Directories potentially created on remote AFP share
39 | .AppleDB
40 | .AppleDesktop
41 | Network Trash Folder
42 | Temporary Items
43 | .apdisk
44 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Logopoeist
 2 | ==========
 3 | 
 4 | Logopoeist is a random word-generation system for conlangers (creators of constructed languages) which conforms to user-specified phonotactic rules as well as user-specified conditional probability distributions to control phoneme frequencies.
 5 | 
 6 | Pre-compiled binaries for 64-bit Windows, Mac OS, and Linux can be downloaded on the release page: https://github.com/conlang-software-dev/Logopoeist/releases/tag/v0.1
 7 | 
 8 | Logopoeist is a command-line program, and takes the following arguments:
 9 | 
10 | * `-file {string}`: the name of an input configuration file. If absent, Logopoeist will try to read configuration commands from standard input.
11 | * `-n {int}`: the number of random words to output. Defaults to 10.
12 | * `-lmin {uint}`: the minimum length of words to output. Defaults to 0.
13 | * `-lmax {uint}`: the maximum length of words to output. Defaults to unbounded.
14 | 
15 | A sample configuration for a strict-CV language with vowel harmony is provided in `test.lgp`.
16 | 
17 | Word Generation
18 | ---------------
19 | 
20 | Logopoeist generates words by randomly selecting phoneme/grapheme tokens from a distribution that is calculated for each position by intersecting information from an n-gram character model and syllable structure model. An incremental Earley chart parser is used to keep track of all of the possible partial-parses that satisfy the syllable structure rules given whatever phonemes have been generated so far; the parser state is examined to produce a combined distribution for all possible phonemes that could be added and still produce a valid parse, weighted by the probability of each partial parse in the Earley chart at that position. The n-gram model is then used to determine what distribution of phonemes would be allowed in the same position given the previous context. These two distributions are then intersected, and a random phoneme is selected from the resulting joint distribution to fill in that slot, which then further constrains the possible parses and n-gram environments for the next position. Word boundaries are produced (thus terminating the productino of one word) by considering the relative total probability of all complete parses vs. all incomplete parses at a given position; that ratio is then used to make a weighted random choice to produce a completed word or to keep going.
21 | 
22 | Several possible situations can arise that result in a failed production- a state where there are no possible phonemes that can be added, but the word also isn't complete according to the syllable structure rules. These include things like disjoint distributions produced by the n-gram and syllable structure models, production of a word that is too short or too long compared to the limits set by the user, or production of a duplicate word that's already been seen before. In any of these cases, Logopoeist will use recursive back-tracking; the parser state is rewound, the last produced phoneme is discarded and removed from the distribution so that it cannot be selected for the same environment again, and Logopoeist tries again with a different randomly selected phoneme (or, in the case of a too-short word, the word boundary is discarded and the system starts the next-phoneme selection process for the first time). This guarantees that the word generator will make progress and produce new output in finite time, without retracing failed paths that it had already explored, while still matching whatever number of output words were requested; i.e., it does not have to randomly generate possibly-colliding words for an unbounded amount of time, hoping to accumulate as many as you asked for; nor does it run a fixed number of cycles, showing you maybe as many unique words as you requested, but maybe less, after filtering duplicates. Additionally, the recursive backtracking strategy allows Logopoeist to detect when it has completely exhausted the finite number of options permitted in a certain range, and inform you of that fact, rather than freezing up while continuing to look for more options that don't exist. Unfortunately, however, it cannot detect infinite grammars- syllable structure rules that do not permit any finite words. If you're not careful, and feed it an infinite grammar with no maximum word length specified, it will loop forever (if a maximum word length is specified, it will helpfully inform you that no valid words exist in the given range).
23 | 
24 | The system does start to slow down eventually, due to increased need for backtracking, after generating large numbers of unique words. For practical purposes, however, it is quite fast. For example, it can generate all 6156 possible 8-letter words allowed by the sample configuration file in about 15 seconds, and then helpfully inform you that there are no more valid words of that length.
25 | 
26 | Configuration
27 | -------------
28 | 
29 | Logopoeist uses a simple domain-specific programming language to describe the allowed shapes of words in its config files. The LGP language has three kinds of statements:
30 | 
31 | 1. Variable declarations
32 | 2. Word Syntax rules 
33 | 3. Conditional probability rules
34 | 
35 | ### Variable Declarations
36 | 
37 | Variable declarations let you give names to sets (or classes) of characters, so that you can use them multiple times in the phonotactic rules. Variable declarations have the form
38 | 
39 |     {C-var} = {C-class}
40 | 
41 | where `{C-var}` is a character class variable name, and `{C-class}` is either another variable name, or a literal character class. Character class variables always begin with a hash symbol (`#`). Literal character classes have the form
42 | 
43 |     <{char} *{Frequency} ...>
44 | 
45 | where `{char}` is some string of characters representing a phoneme (not limiting it to a single typable character allows you to treat digraphs, trigraphs, and other sequences as single characters from the point of view of phonotactics), and `{Frequency}` is a number specifying the relative frequency of that phoneme compared to others in the same set. The `*{Frequency}` setting after each phoneme is optional, and will be automatically set to 1 if not specified.
46 | 
47 | ### Word Syntax Rules
48 | 
49 | Word Syntax rules describe the high-level phonotactic structure of a language in terms of a probabilistic context-free grammar.
50 | 
51 | Syntax rules have the general form
52 | 
53 |     {S-var} -> [Replacement List] *{Frequency}
54 | 
55 | Where `{S-var}` is a syntax variable, `[Replacement List]` is a space-separated list of syntax variables, character class variables, or literal character classes. As with character classes, the frequency specification is optional, and defaults to 1 if not specified; in this case, it specifies how frequently a particular substitution rule will be applied when more than one is available for the same syntax variable. Syntax variables always begin with a dollar symbol (`$`).
56 | 
57 | The left-hand symbol for the _first_ syntax rule in a configuration file will be used as the starting symbol for the probabilistic grammar.
58 | 
59 | ### Conditional Probability Rules
60 | 
61 | Conditional Probability rules are used to manipulate the frequency of certain phonemes in particular contexts, given by the two preceding phonemes in a given word. They have the general form
62 | 
63 | 	_ {C-class} ... -> {C-class}
64 | 
65 | where each `{C-class}` is either a character class variable or a character class literal. The leading underscore is optional; if present, it indicates a word boundary. Thus, `_ -> #A` specifies a distribution of phonemes that can come at the beginning of a word, while `_ #A -> #B` specifies a distribution of phonemes that can come second in a word.
66 | 
67 | Classes preceding the arrow (`->`) are known as _conditioning classes_. The frequencies of the conditioning classes are ignored, and they are used to generate _conditioning n-grams_ from all combinations of phonemes that can be selected from each class in order (i.e., the Cartesian product). The conditional distribution to the right of the arrow is assigned to all positions following any conditioning n-gram derived from the current rule.
68 | 
69 | A variant on conditional probability rules can be used to indicate that certain phonemes are disallowed in specific position. These have the form
70 | 
71 | 	_ {C-class} ... !> {C-class}
72 | 
73 | Note the differently shaped arrow- `!>` instead of `->`. In these rules, frequencies are ignored in both conditioning and conditional classes; members of the conditional class are assigned zero probability after any conditioning n-grams derived from these rules, overriding whatever other distributions they might have had due to other rules.
74 |  
75 | 
76 | first creates a phonotactic template for that word using the probabilistic word grammar by replacing syntax variables until a list of only character classes (indicated either by variables or literal character classes) is left.
77 | 
78 | At that point, the template is filled in from left-to-right by
79 | 
80 | 1. Examining the distribution for the character class.
81 | 2. Intersecting the template distribution with any conditional distributions given by the previous phonemes already generated for the word.
82 | 3. Randomly selecting a character from the resulting distribution.
83 | 
84 | If there are no conditional distribution rules that apply at a certain position, the conditional distribution is implicitly a uniform distribution over all phonemes in the language. Intersecting distributions means eliminating phonemes that are not present in both distributions, and the multiplying the relative frequencies for each phoneme from each distribution.
85 | 
86 | TODO
87 | ----
88 | 
89 | There are plenty of ways that Logopoeist could be improved, so feel free to make suggestions and/or pull requests!


--------------------------------------------------------------------------------
/charmodel/charmodel.go:
--------------------------------------------------------------------------------
 1 | package charmodel
 2 | 
 3 | import "strings"
 4 | import . "github.com/conlang-software-dev/Logopoeist/types"
 5 | 
 6 | type ngrams map[string]*CharSet
 7 | 
 8 | type CharModel struct {
 9 | 	conds ngrams
10 | 	excls ngrams
11 | }
12 | 
13 | func NewModel() *CharModel {
14 | 	return &CharModel{
15 | 		conds: make(ngrams),
16 | 		excls: make(ngrams),
17 | 	}
18 | }
19 | 
20 | func (m *CharModel) AddCondition(ngram string, dist *CharSet) {
21 | 	if ndist, ok := m.conds[ngram]; ok {
22 | 		// copy the old map in case it was shared,
23 | 		union := make(CharSet, len(*ndist))
24 | 		m.conds[ngram] = &union
25 | 		for k, v := range *ndist {
26 | 			union[k] = v
27 | 		}
28 | 
29 | 		// then union with the current distribution
30 | 		for k, v := range *dist {
31 | 			if _, ok := union[k]; ok {
32 | 				union[k] += v
33 | 			} else {
34 | 				union[k] = v
35 | 			}
36 | 		}
37 | 	} else {
38 | 		m.conds[ngram] = dist
39 | 	}
40 | }
41 | 
42 | func (m *CharModel) AddExclusion(ngram string, dist *CharSet) {
43 | 	if edist, ok := m.excls[ngram]; ok {
44 | 
45 | 		// create a new map in case the original was shared
46 | 		union := make(CharSet, len(*edist))
47 | 		m.excls[ngram] = &union
48 | 
49 | 		for k := range *edist {
50 | 			union[k] = 0
51 | 		}
52 | 		for k := range *dist {
53 | 			union[k] = 0
54 | 		}
55 | 	} else {
56 | 		// reference a single common object as much as possible
57 | 		m.excls[ngram] = dist
58 | 	}
59 | }
60 | 
61 | func (m *CharModel) CalcDistribution(base *CharSet, context []string) CharSet {
62 | 	ndist := make(CharSet, len(*base))
63 | 	for k, v := range *base {
64 | 		ndist[k] = v
65 | 	}
66 | 
67 | 	// iterate over conditioning ngrams
68 | 	order := len(context)
69 | 	for j := order; j > 0; j-- {
70 | 		ngram := strings.Join(context[order-j:order], "")
71 | 
72 | 		// remove any exclusions
73 | 		if edist, ok := m.excls[ngram]; ok {
74 | 			for char, _ := range *edist {
75 | 				delete(ndist, char)
76 | 			}
77 | 		}
78 | 
79 | 		// intersect with conditional distributions
80 | 		if cdist, ok := m.conds[ngram]; ok {
81 | 			for char, nweight := range ndist {
82 | 				if cweight, ok := (*cdist)[char]; ok {
83 | 					ndist[char] = nweight * cweight
84 | 				} else {
85 | 					delete(ndist, char)
86 | 				}
87 | 			}
88 | 		}
89 | 	}
90 | 	return ndist
91 | }
92 | 


--------------------------------------------------------------------------------
/earley/earley.go:
--------------------------------------------------------------------------------
  1 | package earley
  2 | 
  3 | import . "github.com/conlang-software-dev/Logopoeist/parser"
  4 | import . "github.com/conlang-software-dev/Logopoeist/grammar"
  5 | import . "github.com/conlang-software-dev/Logopoeist/types"
  6 | import . "github.com/conlang-software-dev/Logopoeist/environment"
  7 | 
  8 | type state struct {
  9 | 	lhs      string
 10 | 	rhs      []*Node
 11 | 	dot      uint
 12 | 	start    uint
 13 | 	terminal bool
 14 | 	weight   float64
 15 | }
 16 | 
 17 | func (s *state) iscomplete() bool {
 18 | 	return s.dot >= uint(len(s.rhs))
 19 | }
 20 | 
 21 | func (s *state) needNonTerminal() bool {
 22 | 	return s.rhs[s.dot].Type == SVar
 23 | }
 24 | 
 25 | func (s *state) equals(other *state) bool {
 26 | 	if s.terminal != other.terminal ||
 27 | 		s.lhs != other.lhs ||
 28 | 		s.dot != other.dot ||
 29 | 		s.start != other.start {
 30 | 		return false
 31 | 	}
 32 | 	if len(s.rhs) != len(other.rhs) {
 33 | 		return false
 34 | 	}
 35 | 	for i, n := range s.rhs {
 36 | 		o := other.rhs[i]
 37 | 		if n.Type != o.Type || n.Value != o.Value {
 38 | 			return false
 39 | 		}
 40 | 	}
 41 | 	return true
 42 | }
 43 | 
 44 | type EarleyParser struct {
 45 | 	parent   *EarleyParser
 46 | 	level    uint
 47 | 	synmodel Grammar
 48 | 	env      Environment
 49 | 	root     string
 50 | 	column   []*state
 51 | 	finished bool
 52 | }
 53 | 
 54 | func NewParser(env Environment, g Grammar, root string) *EarleyParser {
 55 | 	np := &EarleyParser{
 56 | 		parent:   nil,
 57 | 		level:    0,
 58 | 		env:      env,
 59 | 		synmodel: g,
 60 | 		root:     root,
 61 | 		column:   []*state{},
 62 | 		finished: false,
 63 | 	}
 64 | 
 65 | 	np.init()
 66 | 	return np
 67 | }
 68 | 
 69 | func newLevel(p *EarleyParser) *EarleyParser {
 70 | 	return &EarleyParser{
 71 | 		parent:   p,
 72 | 		level:    p.level + 1,
 73 | 		env:      p.env,
 74 | 		synmodel: p.synmodel,
 75 | 		root:     p.root,
 76 | 		column:   []*state{},
 77 | 		finished: false,
 78 | 	}
 79 | }
 80 | 
 81 | func (p *EarleyParser) init() {
 82 | 	if rset, ok := p.synmodel.Rules(p.root); ok {
 83 | 		for i, rhs := range rset.Rules {
 84 | 			p.addToChart(&state{
 85 | 				lhs:      p.root,
 86 | 				rhs:      rhs,
 87 | 				dot:      0,
 88 | 				start:    0,
 89 | 				terminal: false,
 90 | 				weight:   rset.Weights[i],
 91 | 			})
 92 | 		}
 93 | 	}
 94 | 	p.process()
 95 | }
 96 | 
 97 | func (p *EarleyParser) IsFinished() bool {
 98 | 	return p.finished
 99 | }
100 | 
101 | func (p *EarleyParser) IsEmpty() bool {
102 | 	return len(p.column) == 0
103 | }
104 | 
105 | func (p *EarleyParser) addToChart(s *state) {
106 | 	for _, old := range p.column {
107 | 		if s.equals(old) {
108 | 			old.weight += s.weight
109 | 			return
110 | 		}
111 | 	}
112 | 	p.column = append(p.column, s)
113 | }
114 | 
115 | func (p *EarleyParser) getColumn(index uint) []*state {
116 | 	for p.level > index {
117 | 		p = p.parent
118 | 	}
119 | 	return p.column
120 | }
121 | 
122 | func (chart *EarleyParser) scan(s *state, token string) {
123 | 	if s.iscomplete() {
124 | 		return
125 | 	}
126 | 
127 | 	term := s.rhs[s.dot]
128 | 	if term.Type != CVar {
129 | 		return
130 | 	}
131 | 
132 | 	chars, ok := chart.env.Lookup(term.Value)
133 | 	if !ok {
134 | 		return
135 | 	}
136 | 
137 | 	if chars.Contains(token) {
138 | 		chart.addToChart(&state{
139 | 			lhs:      term.Value,
140 | 			rhs:      []*Node{}, // could store the token here, but it's not necessary for our purposes
141 | 			dot:      1,         // 0 would work as well, since rhs is empty; the point is to make this state "finished"
142 | 			start:    chart.level - 1,
143 | 			terminal: true,
144 | 			weight:   s.weight,
145 | 		})
146 | 	}
147 | }
148 | 
149 | func (chart *EarleyParser) predict(s *state) {
150 | 	g := chart.synmodel
151 | 	term := s.rhs[s.dot]
152 | 	if term.Type != SVar {
153 | 		return
154 | 	}
155 | 	if rset, ok := g.Rules(term.Value); ok {
156 | 		for i, rhs := range rset.Rules {
157 | 			chart.addToChart(&state{
158 | 				lhs:      term.Value,
159 | 				rhs:      rhs,
160 | 				dot:      0,
161 | 				start:    chart.level,
162 | 				terminal: false,
163 | 				weight:   s.weight * rset.Weights[i],
164 | 			})
165 | 		}
166 | 	}
167 | }
168 | 
169 | func (chart *EarleyParser) complete(s *state) {
170 | 	for _, old := range chart.getColumn(s.start) {
171 | 		if old.iscomplete() {
172 | 			continue
173 | 		}
174 | 		term := old.rhs[old.dot]
175 | 		t := SVar
176 | 		if s.terminal {
177 | 			t = CVar
178 | 		}
179 | 		if term.Type == t && term.Value == s.lhs {
180 | 			chart.addToChart(&state{
181 | 				lhs:      old.lhs,
182 | 				rhs:      old.rhs,
183 | 				dot:      old.dot + 1,
184 | 				start:    old.start,
185 | 				terminal: false,
186 | 				weight:   s.weight,
187 | 			})
188 | 		}
189 | 	}
190 | }
191 | 
192 | func (p *EarleyParser) process() {
193 | 	//can't range because p.column is altered during the loop
194 | 	for i := 0; i < len(p.column); i++ {
195 | 		s := p.column[i]
196 | 		if s.iscomplete() {
197 | 			if s.start == 0 && s.lhs == p.root {
198 | 				p.finished = true
199 | 			}
200 | 			p.complete(s)
201 | 		} else if s.needNonTerminal() {
202 | 			p.predict(s)
203 | 		}
204 | 	}
205 | 	//optional: filter out completed states to save memory
206 | }
207 | 
208 | func (p *EarleyParser) Next(token string) (*EarleyParser, bool) {
209 | 	np := newLevel(p)
210 | 	for _, s := range p.column {
211 | 		np.scan(s, token)
212 | 	}
213 | 
214 | 	np.process()
215 | 	return np, len(np.column) > 0
216 | }
217 | 
218 | func (p *EarleyParser) TerminationProbability() float64 {
219 | 	done_weight := 0.0
220 | 	cont_weight := 0.0
221 | 	for _, s := range p.column {
222 | 		if s.iscomplete() {
223 | 			if s.start == 0 && s.lhs == p.root {
224 | 				done_weight += s.weight
225 | 			}
226 | 		} else {
227 | 			cont_weight += s.weight
228 | 		}
229 | 	}
230 | 	return done_weight / cont_weight
231 | }
232 | 
233 | func (p *EarleyParser) AllowedTokens() *CharSet {
234 | 	cset := make(CharSet)
235 | 	for _, s := range p.column {
236 | 		if s.iscomplete() {
237 | 			continue
238 | 		}
239 | 
240 | 		term := s.rhs[s.dot]
241 | 		if term.Type != CVar {
242 | 			continue
243 | 		}
244 | 
245 | 		if sset, ok := p.env.Lookup(term.Value); ok {
246 | 			for k, v := range sset.Weights {
247 | 				if _, ok := cset[k]; ok {
248 | 					cset[k] += v * s.weight
249 | 				} else {
250 | 					cset[k] = v * s.weight
251 | 				}
252 | 			}
253 | 		}
254 | 	}
255 | 	return &cset
256 | }
257 | 


--------------------------------------------------------------------------------
/environment/environment.go:
--------------------------------------------------------------------------------
 1 | package environment
 2 | 
 3 | import "fmt"
 4 | import "strconv"
 5 | import . "github.com/conlang-software-dev/Logopoeist/parser"
 6 | import . "github.com/conlang-software-dev/Logopoeist/types"
 7 | import . "github.com/conlang-software-dev/Logopoeist/interpreter"
 8 | 
 9 | type Environment map[string]*CharClass
10 | 
11 | func (e Environment) Assign(varname string, n *Node) {
12 | 	e[varname] = e.GetClass(n)
13 | }
14 | 
15 | var nextvar = 0
16 | 
17 | func (e Environment) AssignNew(n *Node) string {
18 | 	nextvar += 1
19 | 	varname := strconv.Itoa(nextvar)
20 | 	e[varname] = e.GetClass(n)
21 | 	return varname
22 | }
23 | 
24 | func (e Environment) Lookup(varname string) (*CharClass, bool) {
25 | 	if cclass, ok := e[varname]; ok {
26 | 		return cclass, true
27 | 	}
28 | 	return nil, false
29 | }
30 | 
31 | func (e Environment) GetClass(n *Node) *CharClass {
32 | 	switch n.Type {
33 | 	case CVar:
34 | 		if cclass, ok := e.Lookup(n.Value); ok {
35 | 			return cclass
36 | 		}
37 | 		panic(fmt.Sprintf("Variable #%s referenced before definition", n.Value))
38 | 	case Class:
39 | 		return InterpretClass(n)
40 | 	default:
41 | 		panic(fmt.Sprintf("Invalid Node Type for Character Class: %s", n.ToString()))
42 | 	}
43 | }
44 | 


--------------------------------------------------------------------------------
/grammar/grammar.go:
--------------------------------------------------------------------------------
 1 | package grammar
 2 | 
 3 | import . "github.com/conlang-software-dev/Logopoeist/parser"
 4 | 
 5 | type RuleSet struct {
 6 | 	total   float64
 7 | 	Weights []float64
 8 | 	Rules   [][]*Node
 9 | }
10 | 
11 | type Grammar map[string]*RuleSet
12 | 
13 | func (g Grammar) AddRule(v string, rule []*Node, weight float64) {
14 | 	if rset, ok := g[v]; ok {
15 | 		rset.total += weight
16 | 		rset.Rules = append(rset.Rules, rule)
17 | 		rset.Weights = append(rset.Weights, weight)
18 | 	} else {
19 | 		g[v] = &RuleSet{
20 | 			total:   weight,
21 | 			Weights: []float64{weight},
22 | 			Rules:   [][]*Node{rule},
23 | 		}
24 | 	}
25 | }
26 | 
27 | func (g Grammar) Rules(v string) (*RuleSet, bool) {
28 | 	if ruleset, ok := g[v]; ok {
29 | 		return ruleset, true
30 | 	}
31 | 	return &RuleSet{}, false
32 | }
33 | 


--------------------------------------------------------------------------------
/interpreter/interpreter.go:
--------------------------------------------------------------------------------
 1 | package interpreter
 2 | 
 3 | import "fmt"
 4 | import "strconv"
 5 | import . "github.com/conlang-software-dev/Logopoeist/parser"
 6 | import . "github.com/conlang-software-dev/Logopoeist/types"
 7 | 
 8 | func InterpretNumber(n *Node) float64 {
 9 | 	freq, err := strconv.ParseFloat(n.Value, 64)
10 | 	if err != nil {
11 | 		panic(fmt.Sprintf("Invalid numeric literal: %s", n.Value))
12 | 	}
13 | 	return freq
14 | }
15 | 
16 | func InterpretClass(n *Node) *CharClass {
17 | 	list := make([]string, 0, 10)
18 | 	weights := make(CharSet, 10)
19 | 	for sn := n.Left; sn != nil; sn = sn.Right {
20 | 		fnode := sn.Left
21 | 		phoneme := fnode.Left.Value
22 | 		freq := InterpretNumber(fnode.Right)
23 | 
24 | 		if _, ok := weights[phoneme]; ok {
25 | 			weights[phoneme] += freq
26 | 		} else {
27 | 			weights[phoneme] = freq
28 | 			list = append(list, phoneme)
29 | 		}
30 | 	}
31 | 	return &CharClass{
32 | 		List:    list,
33 | 		Weights: weights,
34 | 	}
35 | }
36 | 


--------------------------------------------------------------------------------
/lexer/lexer.go:
--------------------------------------------------------------------------------
  1 | package lexer
  2 | 
  3 | import "strings"
  4 | import "io"
  5 | 
  6 | type Item struct {
  7 | 	Type  string
  8 | 	Token string
  9 | }
 10 | 
 11 | type StateFn func(*RuneBuffer, chan *Item) StateFn
 12 | 
 13 | type RuneBuffer struct {
 14 | 	in   io.RuneReader
 15 | 	r    rune
 16 | 	more bool
 17 | }
 18 | 
 19 | // Peek looks at the next rune but doesn't advance the input.
 20 | func (rb *RuneBuffer) Peek() (rune, bool) {
 21 | 	return rb.r, rb.more
 22 | }
 23 | 
 24 | // Next returns the next rune in the input.
 25 | func (rb *RuneBuffer) Next() (rune, bool) {
 26 | 	if !rb.more {
 27 | 		return 0, false
 28 | 	}
 29 | 	r, more := rb.r, rb.more
 30 | 	nr, _, err := rb.in.ReadRune()
 31 | 	rb.r, rb.more = nr, (err == nil)
 32 | 	return r, more
 33 | }
 34 | 
 35 | // accept consumes the next rune if it's from the valid set.
 36 | func (rb *RuneBuffer) Accept(valid string) (rune, bool, bool) {
 37 | 	r, ok := rb.Peek()
 38 | 	if !ok {
 39 | 		return r, false, false
 40 | 	}
 41 | 	if strings.IndexRune(valid, r) >= 0 {
 42 | 		rb.Next()
 43 | 		return r, true, true
 44 | 	}
 45 | 	return r, false, true
 46 | }
 47 | 
 48 | // accept consumes the next rune if it's not from the invalid set.
 49 | func (rb *RuneBuffer) AcceptNot(invalid string) (rune, bool, bool) {
 50 | 	r, ok := rb.Peek()
 51 | 	if !ok {
 52 | 		return r, false, false
 53 | 	}
 54 | 	if strings.IndexRune(invalid, r) < 0 {
 55 | 		rb.Next()
 56 | 		return r, true, true
 57 | 	}
 58 | 	return r, false, true
 59 | }
 60 | 
 61 | type Lexer struct {
 62 | 	tokens chan *Item
 63 | 	next   *Item
 64 | 	more   bool
 65 | }
 66 | 
 67 | // Peek looks at the next token but doesn't advance the input.
 68 | func (l *Lexer) Peek() (*Item, bool) {
 69 | 	return l.next, l.more
 70 | }
 71 | 
 72 | // Next returns the next token from the input.
 73 | func (l *Lexer) Next() (*Item, bool) {
 74 | 	item, ok := l.next, l.more
 75 | 	l.next, l.more = <-l.tokens
 76 | 	return item, ok
 77 | }
 78 | 
 79 | func Lex(input io.RuneReader, start StateFn) *Lexer {
 80 | 	tokens := make(chan *Item)
 81 | 
 82 | 	go func() {
 83 | 		r, _, err := input.ReadRune()
 84 | 		buf := &RuneBuffer{
 85 | 			in:   input,
 86 | 			r:    r,
 87 | 			more: (err == nil),
 88 | 		}
 89 | 
 90 | 		for state := start; state != nil; {
 91 | 			state = state(buf, tokens)
 92 | 		}
 93 | 
 94 | 		close(tokens)
 95 | 	}()
 96 | 
 97 | 	first, ok := <-tokens
 98 | 	return &Lexer{
 99 | 		tokens: tokens,
100 | 		next:   first,
101 | 		more:   ok,
102 | 	}
103 | }
104 | 


--------------------------------------------------------------------------------
/lexfuncs.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import "bytes"
  4 | import "strings"
  5 | import . "github.com/conlang-software-dev/Logopoeist/lexer"
  6 | 
  7 | func commentState(in *RuneBuffer, out chan *Item) StateFn {
  8 | 	for {
  9 | 		r, ok := in.Next()
 10 | 		if !ok || r == '\n' {
 11 | 			break
 12 | 		}
 13 | 	}
 14 | 	return switchState
 15 | }
 16 | 
 17 | func numberState(in *RuneBuffer, out chan *Item) StateFn {
 18 | 	buf := new(bytes.Buffer)
 19 | 	for {
 20 | 		r, ok, more := in.Accept("0123456789.")
 21 | 		if !(more && ok) {
 22 | 			break
 23 | 		}
 24 | 		buf.WriteRune(r)
 25 | 	}
 26 | 	out <- &Item{Type: "number", Token: buf.String()}
 27 | 	return switchState
 28 | }
 29 | 
 30 | func symbolState(in *RuneBuffer, out chan *Item) StateFn {
 31 | 	buf := new(bytes.Buffer)
 32 | 	for {
 33 | 		r, ok, _ := in.AcceptNot(" \t\r\n;*<>-!=")
 34 | 		if !ok {
 35 | 			break
 36 | 		}
 37 | 		buf.WriteRune(r)
 38 | 	}
 39 | 
 40 | 	out <- &Item{Type: "symbol", Token: buf.String()}
 41 | 	return switchState
 42 | }
 43 | 
 44 | func arrowState(in *RuneBuffer, out chan *Item) StateFn {
 45 | 	first, _ := in.Next()
 46 | 	second, _ := in.Next()
 47 | 
 48 | 	out <- &Item{Type: "arrow", Token: string([]rune{first, second})}
 49 | 	return switchState
 50 | }
 51 | 
 52 | func phonemeState(in *RuneBuffer, out chan *Item) StateFn {
 53 | 	buf := new(bytes.Buffer)
 54 | 	for {
 55 | 		r, ok, _ := in.AcceptNot(" \t\r\n*>")
 56 | 		if !ok {
 57 | 			break
 58 | 		}
 59 | 		if r == '\\' { // escape character
 60 | 			in.Next()
 61 | 			r, ok = in.Next()
 62 | 			if !ok {
 63 | 				break
 64 | 			}
 65 | 		}
 66 | 		buf.WriteRune(r)
 67 | 	}
 68 | 
 69 | 	out <- &Item{Type: "phoneme", Token: buf.String()}
 70 | 	return setState
 71 | }
 72 | 
 73 | func setState(in *RuneBuffer, out chan *Item) StateFn {
 74 | 	if r, ok := in.Peek(); ok {
 75 | 		switch {
 76 | 		case strings.IndexRune(" \t\r\n", r) >= 0:
 77 | 			for ok { //skip whitespace
 78 | 				_, ok, _ = in.Accept(" \t\r\n")
 79 | 			}
 80 | 			return setState
 81 | 		case strings.IndexRune("*/", r) >= 0:
 82 | 			in.Next()
 83 | 			out <- &Item{Type: string(r), Token: string(r)}
 84 | 			return setState
 85 | 		case strings.IndexRune("0123456789", r) >= 0:
 86 | 			numberState(in, out)
 87 | 			return setState
 88 | 		case r == ';':
 89 | 			in.Next()
 90 | 			out <- &Item{Type: "EOL", Token: "EOL"}
 91 | 			commentState(in, out)
 92 | 			return setState
 93 | 		case r == '>':
 94 | 			in.Next()
 95 | 			out <- &Item{Type: ">", Token: ">"}
 96 | 			return switchState
 97 | 		default:
 98 | 			return phonemeState
 99 | 		}
100 | 	} else {
101 | 		return nil
102 | 	}
103 | }
104 | 
105 | func switchState(in *RuneBuffer, out chan *Item) StateFn {
106 | 	if r, ok := in.Peek(); ok {
107 | 		for ok { // skip spaces
108 | 			_, ok, _ = in.Accept(" \t\r")
109 | 		}
110 | 		switch {
111 | 		case strings.IndexRune(" \t\r", r) >= 0:
112 | 			for ok { // skip whitespace
113 | 				_, ok, _ = in.Accept(" \t\r")
114 | 			}
115 | 			return switchState
116 | 		case r == '\n':
117 | 			in.Next()
118 | 			out <- &Item{Type: "EOL", Token: "EOL"}
119 | 			return switchState
120 | 		case r == ';':
121 | 			in.Next()
122 | 			out <- &Item{Type: "EOL", Token: "EOL"}
123 | 			return commentState
124 | 		case strings.IndexRune("#$_*/=", r) >= 0:
125 | 			in.Next()
126 | 			out <- &Item{Type: string(r), Token: string(r)}
127 | 			return switchState
128 | 		case r == '<':
129 | 			in.Next()
130 | 			out <- &Item{Type: "<", Token: "<"}
131 | 			return setState
132 | 		case strings.IndexRune("-!", r) >= 0:
133 | 			return arrowState
134 | 		case strings.IndexRune("0123456789", r) >= 0:
135 | 			return numberState
136 | 		default:
137 | 			return symbolState
138 | 		}
139 | 	}
140 | 	out <- &Item{Type: "EOF", Token: "EOF"}
141 | 	return nil
142 | }
143 | 


--------------------------------------------------------------------------------
/logopoeist.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import "fmt"
 4 | import "bufio"
 5 | import "os"
 6 | import "flag"
 7 | import "strings"
 8 | 
 9 | import "github.com/conlang-software-dev/Logopoeist/lexer"
10 | import "github.com/conlang-software-dev/Logopoeist/parser"
11 | import . "github.com/conlang-software-dev/Logopoeist/wordmodel"
12 | 
13 | func main() {
14 | 	var file *os.File
15 | 	var fname string
16 | 	var wcount int
17 | 	var min int
18 | 	var max int
19 | 
20 | 	flag.StringVar(&fname, "file", "", "The name of the configuration file; defaults to standard input.")
21 | 	flag.IntVar(&wcount, "n", 10, "The number of words to generate; defaults to 10.")
22 | 	flag.IntVar(&min, "lmin", 0, "The minimum length of words; defaults to 0.")
23 | 	flag.IntVar(&max, "lmax", 0, "The maximum length of words; defaults to unbounded.")
24 | 
25 | 	flag.Parse()
26 | 
27 | 	if max > 0 && min > max {
28 | 		fmt.Printf("lmin must be less than lmax\n")
29 | 		return
30 | 	}
31 | 
32 | 	if fname != "" {
33 | 		var err error
34 | 		file, err = os.Open(fname)
35 | 		if err != nil {
36 | 			fmt.Printf("Error opening source file.\n")
37 | 			return
38 | 		}
39 | 		defer file.Close()
40 | 	} else {
41 | 		file = os.Stdin
42 | 	}
43 | 
44 | 	defer func() {
45 | 		if err := recover(); err != nil {
46 | 			fmt.Printf("Error: %s\n", err)
47 | 		}
48 | 	}()
49 | 
50 | 	lex := lexer.Lex(bufio.NewReader(file), switchState)
51 | 	model := WordModel()
52 | 	for command := range parser.Parse(lex) {
53 | 		model.Execute(command)
54 | 	}
55 | 
56 | 	for i := 0; i < wcount; i++ {
57 | 		if clist, ok := model.Generate(min, max); ok {
58 | 			word := strings.Join(clist, "")
59 | 			fmt.Printf("%s\n", word)
60 | 			continue
61 | 		}
62 | 
63 | 		if min == 0 && max == 0 {
64 | 			if i == 0 {
65 | 				fmt.Printf("No Valid Words Found. Model May Be Inconsistent.")
66 | 			} else {
67 | 				fmt.Printf("Exhausted Unique Words.")
68 | 			}
69 | 		} else {
70 | 			if i == 0 {
71 | 				fmt.Printf("No Valid Words Found in the Given Range.")
72 | 			} else {
73 | 				fmt.Printf("Exhausted Unique Words in the Given Range.")
74 | 			}
75 | 		}
76 | 		return
77 | 	}
78 | }
79 | 


--------------------------------------------------------------------------------
/parser/parsefuncs.go:
--------------------------------------------------------------------------------
  1 | package parser
  2 | 
  3 | import "fmt"
  4 | import . "github.com/conlang-software-dev/Logopoeist/lexer"
  5 | 
  6 | func parseSVar(lex *Lexer) *Node {
  7 | 	lex.Next() // skip $ sigil
  8 | 	symbol, ok := lex.Next()
  9 | 	if !ok || symbol.Type != "symbol" {
 10 | 		panic("Parse error: Missing Syntax Variable")
 11 | 	}
 12 | 	return &Node{
 13 | 		Type:  SVar,
 14 | 		Value: symbol.Token,
 15 | 	}
 16 | }
 17 | 
 18 | func parseCVar(lex *Lexer) *Node {
 19 | 	lex.Next() // skip # sigil
 20 | 	symbol, ok := lex.Next()
 21 | 	if !ok || symbol.Type != "symbol" {
 22 | 		panic("Parse error: Missing Class Variable")
 23 | 	}
 24 | 	return &Node{
 25 | 		Type:  CVar,
 26 | 		Value: symbol.Token,
 27 | 	}
 28 | }
 29 | 
 30 | func parsePhoneme(lex *Lexer) *Node {
 31 | loop:
 32 | 	symbol, ok := lex.Next()
 33 | 	if !ok || symbol.Type == ">" {
 34 | 		return nil
 35 | 	}
 36 | 	if symbol.Type == "EOL" {
 37 | 		goto loop
 38 | 	}
 39 | 
 40 | 	frequency := parseFrequency(lex)
 41 | 	rest := parsePhoneme(lex)
 42 | 	phoneme := &Node{
 43 | 		Type:  Phoneme,
 44 | 		Value: symbol.Token,
 45 | 	}
 46 | 
 47 | 	return &Node{
 48 | 		Type:  Seq,
 49 | 		Value: "",
 50 | 		Right: rest,
 51 | 		Left: &Node{
 52 | 			Type:  Freq,
 53 | 			Left:  phoneme,
 54 | 			Right: frequency,
 55 | 		},
 56 | 	}
 57 | }
 58 | 
 59 | func parseClass(lex *Lexer) *Node {
 60 | 	lex.Next() // skip < token
 61 | 	phonemes := parsePhoneme(lex)
 62 | 	return &Node{
 63 | 		Type: Class,
 64 | 		Left: phonemes,
 65 | 	}
 66 | }
 67 | 
 68 | func parseClassOrCVar(lex *Lexer) *Node {
 69 | 	item, ok := lex.Peek()
 70 | 	if !ok {
 71 | 		panic("Parse error: Expected Character Class or Variable")
 72 | 	}
 73 | 	switch item.Type {
 74 | 	case "#":
 75 | 		return parseCVar(lex)
 76 | 	case "<":
 77 | 		return parseClass(lex)
 78 | 	default:
 79 | 		panic(fmt.Sprintf("Parse error: Expected Character Class or Variable; saw %s", item.Token))
 80 | 	}
 81 | }
 82 | 
 83 | func parseSubstitutions(lex *Lexer) *Node {
 84 | 	item, ok := lex.Peek()
 85 | 	if !ok {
 86 | 		return nil
 87 | 	}
 88 | 
 89 | 	var left *Node
 90 | 	switch item.Type {
 91 | 	case "*", "EOL":
 92 | 		return nil
 93 | 	case "$":
 94 | 		left = parseSVar(lex)
 95 | 	case "#":
 96 | 		left = parseCVar(lex)
 97 | 	case "<":
 98 | 		left = parseClass(lex)
 99 | 	default:
100 | 		panic(fmt.Sprintf("Parse error: Unexpected Token %s in Syntax Rule", item.Token))
101 | 	}
102 | 
103 | 	right := parseSubstitutions(lex)
104 | 	return &Node{
105 | 		Type:  Seq,
106 | 		Left:  left,
107 | 		Right: right,
108 | 	}
109 | }
110 | 
111 | func parseFrequency(lex *Lexer) *Node {
112 | 	item, ok := lex.Peek()
113 | 	if !ok || item.Type != "*" {
114 | 		return &Node{
115 | 			Type:  Num,
116 | 			Value: "1",
117 | 		}
118 | 	}
119 | 
120 | 	lex.Next() // skip * token
121 | 	item, ok = lex.Next()
122 | 	if !ok || item.Type != "number" {
123 | 		panic("Parse error: Missing Number")
124 | 	}
125 | 
126 | 	return &Node{
127 | 		Type:  Num,
128 | 		Value: item.Token,
129 | 	}
130 | }
131 | 
132 | func parseSyntax(lex *Lexer) *Node {
133 | 	left := parseSVar(lex)
134 | 
135 | 	arrow, ok := lex.Next()
136 | 	if !ok || arrow.Token != "->" {
137 | 		panic("Parse error: Expected -> in syntax definition")
138 | 	}
139 | 
140 | 	substitutions := parseSubstitutions(lex)
141 | 	frequency := parseFrequency(lex)
142 | 
143 | 	return &Node{
144 | 		Type:  Production,
145 | 		Value: "",
146 | 		Left:  left,
147 | 		Right: &Node{
148 | 			Type:  Freq,
149 | 			Left:  substitutions,
150 | 			Right: frequency,
151 | 		},
152 | 	}
153 | }
154 | 
155 | func parseCondList(lex *Lexer) *Node {
156 | 	item, ok := lex.Peek()
157 | 	if !ok {
158 | 		return nil
159 | 	}
160 | 
161 | 	var left *Node
162 | 	switch item.Type {
163 | 	case "EOL", "EOF", "arrow":
164 | 		return nil
165 | 	case "#":
166 | 		left = parseCVar(lex)
167 | 	case "<":
168 | 		left = parseClass(lex)
169 | 	default:
170 | 		panic(fmt.Sprintf("Parse error: Unexpected Token %s in Condition Expression", item.Token))
171 | 	}
172 | 
173 | 	right := parseCondList(lex)
174 | 	return &Node{
175 | 		Type:  Seq,
176 | 		Left:  left,
177 | 		Right: right,
178 | 	}
179 | }
180 | 
181 | func parseCondOrDef(lex *Lexer) *Node {
182 | 	var first *Node
183 | 
184 | 	item, ok := lex.Peek()
185 | 	if !ok {
186 | 		panic("Invalid call to parseCondOrDef")
187 | 	}
188 | 
189 | 	switch item.Type {
190 | 	case "#":
191 | 		first = parseCVar(lex)
192 | 	case "<":
193 | 		first = parseClass(lex)
194 | 	case "_":
195 | 		lex.Next()
196 | 		first = &Node{Type: Boundary}
197 | 	default:
198 | 		panic("Invalid call to parseCondOrDef")
199 | 	}
200 | 
201 | 	item, ok = lex.Peek()
202 | 	if !ok || item.Type == "EOL" {
203 | 		if first.Type == CVar {
204 | 			panic("Parse error: Incomplete Variable Definition")
205 | 		}
206 | 		panic("Parse error: Incomplete Condition Expression")
207 | 	}
208 | 
209 | 	if item.Type == "=" {
210 | 		if first.Type != CVar {
211 | 			panic("Parse error: Unexpected _")
212 | 		}
213 | 
214 | 		lex.Next() // skip = token
215 | 		second := parseClassOrCVar(lex)
216 | 		return &Node{
217 | 			Type:  Definition,
218 | 			Left:  first,
219 | 			Right: second,
220 | 		}
221 | 	} else {
222 | 		rest := parseCondList(lex)
223 | 
224 | 		arrow, ok := lex.Next()
225 | 		if !ok {
226 | 			panic("Parse error: Missing Arrow in Condition Expression")
227 | 		}
228 | 
229 | 		right := parseClassOrCVar(lex)
230 | 		left := &Node{
231 | 			Type:  Seq,
232 | 			Left:  first,
233 | 			Right: rest,
234 | 		}
235 | 
236 | 		switch arrow.Token {
237 | 		case "->":
238 | 			return &Node{
239 | 				Type:  Condition,
240 | 				Left:  left,
241 | 				Right: right,
242 | 			}
243 | 		case "!>":
244 | 			return &Node{
245 | 				Type:  Exclusion,
246 | 				Left:  left,
247 | 				Right: right,
248 | 			}
249 | 		default:
250 | 			panic("Parse error: Invalid Arrow in Condition Expression")
251 | 		}
252 | 	}
253 | }
254 | 
255 | func parseCommand(lex *Lexer) *Node {
256 | 	item, ok := lex.Peek()
257 | 	for ok && item.Type != "EOF" {
258 | 		switch item.Type {
259 | 		case "#", "_", "<":
260 | 			return parseCondOrDef(lex)
261 | 		case "$":
262 | 			return parseSyntax(lex)
263 | 		case "EOL":
264 | 			lex.Next()
265 | 			item, ok = lex.Peek()
266 | 		default:
267 | 			panic(fmt.Sprintf("Parse error: Unexpected Token %s", item.Token))
268 | 		}
269 | 	}
270 | 	return nil
271 | }
272 | 


--------------------------------------------------------------------------------
/parser/parser.go:
--------------------------------------------------------------------------------
 1 | package parser
 2 | 
 3 | import "fmt"
 4 | import . "github.com/conlang-software-dev/Logopoeist/lexer"
 5 | 
 6 | const ( // Node Types
 7 | 	Production = iota
 8 | 	Definition
 9 | 	Condition
10 | 	Exclusion
11 | 	SVar
12 | 	CVar
13 | 	Class
14 | 	Phoneme
15 | 	Seq
16 | 	Freq
17 | 	Num
18 | 	Boundary
19 | )
20 | 
21 | type Node struct {
22 | 	Type  int
23 | 	Value string
24 | 	Left  *Node
25 | 	Right *Node
26 | }
27 | 
28 | func (n *Node) ToString() string {
29 | 	if n == nil {
30 | 		return ""
31 | 	}
32 | 	switch n.Type {
33 | 	case Production:
34 | 		return fmt.Sprintf("%s -> %s\n", n.Left.ToString(), n.Right.ToString())
35 | 	case Definition:
36 | 		return fmt.Sprintf("%s = %s\n", n.Left.ToString(), n.Right.ToString())
37 | 	case Condition:
38 | 		return fmt.Sprintf("%s -> %s\n", n.Left.ToString(), n.Right.ToString())
39 | 	case Exclusion:
40 | 		return fmt.Sprintf("%s !> %s\n", n.Left.ToString(), n.Right.ToString())
41 | 	case SVar:
42 | 		return fmt.Sprintf("$%s", n.Value)
43 | 	case CVar:
44 | 		return fmt.Sprintf("#%s", n.Value)
45 | 	case Class:
46 | 		return fmt.Sprintf("<%s>", n.Left.ToString())
47 | 	case Seq:
48 | 		if n.Right == nil {
49 | 			return n.Left.ToString()
50 | 		}
51 | 		return fmt.Sprintf("%s %s", n.Left.ToString(), n.Right.ToString())
52 | 	case Freq:
53 | 		if n.Right.Value == "1" {
54 | 			return n.Left.ToString()
55 | 		}
56 | 		return fmt.Sprintf("%s *%s", n.Left.ToString(), n.Right.ToString())
57 | 	case Num:
58 | 		return n.Value
59 | 	case Phoneme:
60 | 		return n.Value
61 | 	case Boundary:
62 | 		return "_"
63 | 	default:
64 | 		return "{unknown}"
65 | 	}
66 | }
67 | 
68 | func Parse(lex *Lexer) chan *Node {
69 | 	nodes := make(chan *Node)
70 | 
71 | 	go func() {
72 | 
73 | 		defer func() {
74 | 			if err := recover(); err != nil {
75 | 				close(nodes)
76 | 				fmt.Printf("Error: %s\n", err)
77 | 			}
78 | 		}()
79 | 
80 | 		for {
81 | 			item, ok := lex.Peek()
82 | 			if !ok || item.Type == "EOF" {
83 | 				close(nodes)
84 | 				return
85 | 			}
86 | 			nodes <- parseCommand(lex)
87 | 		}
88 | 	}()
89 | 
90 | 	return nodes
91 | }
92 | 


--------------------------------------------------------------------------------
/test.lgp:
--------------------------------------------------------------------------------
 1 | $W -> $W1 *2   ; Start symbol is $W
 2 | $W -> $W2 *1   ; A word can be a $W1, or a $W2, with type 1 occuring twice as often
 3 | $W1 -> $W1 $S1 ; A type-1 word is a list of type-1 syllables ($S1)
 4 | $W1 -> $S1     ; After every syllable, it's equally likely to add one more or stop there
 5 | $W2 -> $W2 $S2 ; Type-2 words are built the same way
 6 | $W2 -> $S2     ; Thus, 1/2 of all words are 1 syllable, 1/4 are 2 syllables, etc. 
 7 | $S1 -> #C #V1  ; A type-1 syllable consist of a consonant and a type-1 vowel
 8 | $S2 -> #C #V2  ; And similarly for type-2 syllables
 9 | 
10 | ; This means that all syllables are strictly CV,
11 | ; and all syllables in one word have the same kind
12 | ; of vowel, which is how we enforce vowel harmony
13 | 
14 | #C = <p t *2 k>     ; Consonants are <p>, <t>, <k>, with <t> twice as frequent as <p> or <k>
15 | #V1 = <a *2 o *2 u> ; Type-1 vowels are <a>, <o>, <u>, with <a> and <o> each twice as frequent as <u>
16 | #V2 = <i e>
17 | 
18 | <t> #V1 -> <p k>    ; After a type-1 syllable starting with <t>, only allow <p> or <k>
19 |                     ; (with equal probability). Equivalent to the exclusion rule `<t> #V1 !> <t>`


--------------------------------------------------------------------------------
/types/types.go:
--------------------------------------------------------------------------------
 1 | package types
 2 | 
 3 | type CharSet map[string]float64
 4 | 
 5 | type CharClass struct {
 6 | 	List    []string
 7 | 	Weights CharSet
 8 | }
 9 | 
10 | func (c CharClass) Contains(k string) bool {
11 | 	_, ok := c.Weights[k]
12 | 	return ok
13 | }
14 | 


--------------------------------------------------------------------------------
/wordmodel/wordmodel.go:
--------------------------------------------------------------------------------
  1 | package wordmodel
  2 | 
  3 | import "time"
  4 | import "strings"
  5 | import "math/rand"
  6 | import . "github.com/conlang-software-dev/Logopoeist/parser"
  7 | import . "github.com/conlang-software-dev/Logopoeist/types"
  8 | import . "github.com/conlang-software-dev/Logopoeist/interpreter"
  9 | import . "github.com/conlang-software-dev/Logopoeist/environment"
 10 | import . "github.com/conlang-software-dev/Logopoeist/grammar"
 11 | import . "github.com/conlang-software-dev/Logopoeist/charmodel"
 12 | import . "github.com/conlang-software-dev/Logopoeist/earley"
 13 | 
 14 | type model struct {
 15 | 	start    string
 16 | 	nextvar  int
 17 | 	env      Environment
 18 | 	synmodel Grammar
 19 | 	chrmodel *CharModel
 20 | 	rnd      *rand.Rand
 21 | 	words    map[string]struct{}
 22 | }
 23 | 
 24 | func (m *model) addRule(svar string, n *Node) {
 25 | 	freq := InterpretNumber(n.Right)
 26 | 	rule := make([]*Node, 0, 10)
 27 | 	for sn := n.Left; sn != nil; sn = sn.Right {
 28 | 		subst := sn.Left
 29 | 		switch subst.Type {
 30 | 		case SVar, CVar:
 31 | 			rule = append(rule, subst)
 32 | 		case Class:
 33 | 			cvar := m.env.AssignNew(subst)
 34 | 			rule = append(rule, &Node{
 35 | 				Type:  CVar,
 36 | 				Value: cvar,
 37 | 			})
 38 | 		default:
 39 | 			panic("Invalid Node Type in Syntax Rule")
 40 | 		}
 41 | 	}
 42 | 
 43 | 	m.synmodel.AddRule(svar, rule, freq)
 44 | }
 45 | 
 46 | func (m *model) generateNgrams(cond_n *Node) [][]string {
 47 | 	var last_ngrams [][]string
 48 | 
 49 | 	sn := cond_n
 50 | 	if sn.Left.Type == Boundary {
 51 | 		last_ngrams = append(last_ngrams, []string{"_"})
 52 | 		sn = sn.Right
 53 | 	} else {
 54 | 		last_ngrams = append(last_ngrams, []string{})
 55 | 	}
 56 | 
 57 | 	for ; sn != nil; sn = sn.Right {
 58 | 		cclass := m.env.GetClass(sn.Left)
 59 | 		next_ngrams := make([][]string, 0, cap(last_ngrams)*len(cclass.List))
 60 | 		for _, ngram := range last_ngrams {
 61 | 			for _, chr := range cclass.List {
 62 | 				new_ngram := append(ngram, chr)
 63 | 				next_ngrams = append(next_ngrams, new_ngram)
 64 | 			}
 65 | 		}
 66 | 		last_ngrams = next_ngrams
 67 | 	}
 68 | 
 69 | 	return last_ngrams
 70 | }
 71 | 
 72 | func (m *model) addCondition(cond_n *Node, dist_n *Node) {
 73 | 	dist := m.env.GetClass(dist_n).Weights
 74 | 	for _, ngchars := range m.generateNgrams(cond_n) {
 75 | 		ngram := strings.Join(ngchars, "")
 76 | 		m.chrmodel.AddCondition(ngram, &dist)
 77 | 	}
 78 | }
 79 | 
 80 | func (m *model) addExclusion(cond_n *Node, dist_n *Node) {
 81 | 	dist := m.env.GetClass(dist_n).Weights
 82 | 	for _, ngchars := range m.generateNgrams(cond_n) {
 83 | 		ngram := strings.Join(ngchars, "")
 84 | 		m.chrmodel.AddExclusion(ngram, &dist)
 85 | 	}
 86 | }
 87 | 
 88 | func (m *model) Execute(n *Node) {
 89 | 	if n == nil || m == nil {
 90 | 		return
 91 | 	}
 92 | 	switch n.Type {
 93 | 	case Production:
 94 | 		m.addRule(n.Left.Value, n.Right)
 95 | 		if m.start == "" {
 96 | 			m.start = n.Left.Value
 97 | 		}
 98 | 	case Definition:
 99 | 		m.env.Assign(n.Left.Value, n.Right)
100 | 	case Condition:
101 | 		m.addCondition(n.Left, n.Right)
102 | 	case Exclusion:
103 | 		m.addExclusion(n.Left, n.Right)
104 | 	}
105 | }
106 | 
107 | func (m *model) gen_rec(ep *EarleyParser, clist []string, min int, max int) ([]string, bool) {
108 | 
109 | 	finalize := func() ([]string, bool) {
110 | 		final := clist[1:]
111 | 		word := strings.Join(final, "")
112 | 		if _, ok := m.words[word]; !ok {
113 | 			m.words[word] = struct{}{}
114 | 			return final, true
115 | 		}
116 | 		return nil, false
117 | 	}
118 | 
119 | 	recurse := func() ([]string, bool) {
120 | 		if max > 0 && len(clist) > max {
121 | 			return nil, false
122 | 		}
123 | 
124 | 		base := ep.AllowedTokens()
125 | 		dist := m.chrmodel.CalcDistribution(base, clist)
126 | 
127 | 		total := 0.0
128 | 		for _, w := range dist {
129 | 			total += w
130 | 		}
131 | 
132 | 		for len(dist) > 0 {
133 | 			r := m.rnd.Float64() * total
134 | 			for c, w := range dist {
135 | 				r -= w
136 | 				if r <= 0 {
137 | 					total -= w
138 | 					delete(dist, c)
139 | 
140 | 					if np, ok := ep.Next(c); ok {
141 | 						if nclist, ok := m.gen_rec(np, append(clist, c), min, max); ok {
142 | 							return nclist, true
143 | 						}
144 | 					}
145 | 					break
146 | 				}
147 | 			}
148 | 		}
149 | 		return nil, false
150 | 	}
151 | 
152 | 	if ep.IsFinished() && len(clist) > min {
153 | 		var attempt func() ([]string, bool)
154 | 		var fallback func() ([]string, bool)
155 | 		if m.rnd.Float64() < ep.TerminationProbability() {
156 | 			attempt = finalize
157 | 			fallback = recurse
158 | 		} else {
159 | 			attempt = recurse
160 | 			fallback = finalize
161 | 		}
162 | 		if nclist, ok := attempt(); ok {
163 | 			return nclist, true
164 | 		}
165 | 		return fallback()
166 | 	}
167 | 
168 | 	return recurse()
169 | }
170 | 
171 | func (m *model) Generate(min int, max int) ([]string, bool) {
172 | 	clist := make([]string, 1, 10)
173 | 	clist[0] = "_"
174 | 
175 | 	ep := NewParser(m.env, m.synmodel, m.start)
176 | 	return m.gen_rec(ep, clist, min, max)
177 | }
178 | 
179 | func WordModel() *model {
180 | 	return &model{
181 | 		start:    "",
182 | 		env:      make(Environment),
183 | 		synmodel: make(Grammar),
184 | 		chrmodel: NewModel(),
185 | 		rnd:      rand.New(rand.NewSource(time.Now().UnixNano())),
186 | 		words:    make(map[string]struct{}),
187 | 	}
188 | }
189 | 


--------------------------------------------------------------------------------