├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── LICENSE
├── README.md
├── doc.go
├── go.mod
├── token.go
├── ut.go
└── ut_test.go


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | on: [push]
 3 | 
 4 | jobs:
 5 |   build:
 6 |     strategy:
 7 |         matrix:
 8 |           go-version: ['1.22.x']
 9 |           os: [ubuntu-latest, macos-latest, windows-latest]
10 |     runs-on: ${{ matrix.os }}
11 |     steps:
12 |     - name: Set up Go
13 |       uses: actions/setup-go@v1
14 |       with:
15 |         go-version: ${{ matrix.go-version }}
16 |       id: go
17 | 
18 |     - name: Check out code into the Go module directory
19 |       uses: actions/checkout@v2
20 | 
21 |     - name: Get dependencies
22 |       run: go get -v -t -d ./...
23 | 
24 |     - name: Build
25 |       run: |
26 |         go vet 
27 |         go build -v .
28 |     
29 |     - name: Test
30 |       run: |          
31 |         go test -v -race  -covermode=atomic
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.exe~
 4 | *.dll
 5 | *.so
 6 | *.dylib
 7 | 
 8 | # Test binary, build with `go test -c`
 9 | *.test
10 | 
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 | go.sum
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Kuba Podgórski
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![GoDoc](https://godoc.org/github.com/kuba--/ut?status.svg)](http://godoc.org/github.com/kuba--/ut)
 2 | [![Go Report Card](https://goreportcard.com/badge/github.com/kuba--/ut)](https://goreportcard.com/report/github.com/kuba--/ut)
 3 | [![Build Status](https://github.com/kuba--/ut/workflows/build/badge.svg)](https://github.com/kuba--/ut/actions?query=workflow%3Abuild)
 4 | [![Version](https://badge.fury.io/gh/kuba--%2Fut.svg)](https://github.com/kuba--/ut/releases)
 5 | 
 6 | # ut
 7 | Package ut implements "Yet Another Efficient Unification Algorithm" by Alin Suciu (https://arxiv.org/abs/cs/0603080v1).
 8 | 
 9 | The unification algorithm is at the core of the logic programming paradigm, the first unification algorithm being developed by Robinson. More efficient algorithms were developed later by Martelli and, Montanari.
10 | 
11 | Unification unifies terms. A term is one of variable, constant, functor, or literal value. A variable starts with an upper case letter and their names are the keys in the map returned by Unify. Constants start with a lowercase letter. Values are integers, floating point values, or strings. As a simple example:
12 | ```go
13 | x,y := `f(X,Y)`, `f(1,"hello")`
14 | ```
15 | This would unify the functor f such that the variable `X` is `1` and the variable `Y` is `"hello"` in the returned map.
16 | 
17 | 
18 | Here yet another efficient unification algorithm centered on a specific data structure, called the Unification Table.
19 | 
20 | ```Go
21 | x, y := "p(Z,h(Z,W),f(W))", "p(f(X),h(Y,f(a)),Y)"
22 | mgu := ut.Unify(x, y)
23 | fmt.Println("W = " + mgu["W"])
24 | fmt.Println("X = " + mgu["X"])
25 | fmt.Println("Y = " + mgu["Y"])
26 | fmt.Println("Z = " + mgu["Z"])
27 | 
28 | // Output:
29 | // W = f(a)
30 | // X = f(a)
31 | // Y = f(f(a))
32 | // Z = f(f(a))
33 | 
34 | x, y = "f(X1,g(X2,X3),X2,b)", "f(g(h(a,X5),X2),X1,h(a,X4),X4)"
35 | mgu = ut.Unify(x, y)
36 | fmt.Println("X1 = " + mgu["X1"])
37 | fmt.Println("X2 = " + mgu["X2"])
38 | fmt.Println("X3 = " + mgu["X3"])
39 | fmt.Println("X4 = " + mgu["X4"])
40 | fmt.Println("X5 = " + mgu["X5"])
41 | 
42 | // Output:
43 | // X1 = g(h(a,b),h(a,b))
44 | // X2 = h(a,b)
45 | // X3 = h(a,b)
46 | // X4 = b
47 | // X5 = b
48 | ```
49 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
1 | // Package ut implements "Yet Another Efficient Unification Algorithm" by Alin Suciu (https://arxiv.org/abs/cs/0603080v1).
2 | // The unification algorithm is at the core of the logic programming paradigm,
3 | // the first unification algorithm being developed by Robinson. More efficient algorithms were developed later by Martelli and, Montanari.
4 | // Here yet another efficient unification algorithm centered on a specific data structure, called the Unification Table.
5 | package ut
6 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/kuba--/ut
2 | 
3 | go 1.22
4 | 


--------------------------------------------------------------------------------
/token.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2009 The Go Authors. All rights reserved.
  2 | // Copyright 2013 Michael Hendricks. All rights reserved.
  3 | // Copyright 2018 Kuba Podgórski. All rights reserved.
  4 | // Use of this source code is governed by a BSD-style
  5 | // license that can be found in the LICENSE file.
  6 | 
  7 | // Tokenize UTF-8-encoded Prolog text.
  8 | // It takes an io.Reader providing the source, which then can be tokenized
  9 | // with the Scan function.  For compatibility with
 10 | // existing tools, the NUL character is not allowed. If the first character
 11 | // in the source is a UTF-8 encoded byte order mark (BOM), it is discarded.
 12 | 
 13 | package ut
 14 | 
 15 | import (
 16 | 	"bytes"
 17 | 	"fmt"
 18 | 	"io"
 19 | 	"os"
 20 | 	"strings"
 21 | 	"unicode"
 22 | 	"unicode/utf8"
 23 | )
 24 | 
 25 | // The result of Scan is one of these tokens or a Unicode character.
 26 | const (
 27 | 	EOF      = -(iota + 1) // reached end of source
 28 | 	Atom                   // a Prolog atom, possibly quoted
 29 | 	Comment                // a comment
 30 | 	Float                  // a floating point number
 31 | 	Functor                // an atom used as a predicate functor
 32 | 	FullStop               // "." ending a term
 33 | 	Int                    // an integer
 34 | 	String                 // a double-quoted string
 35 | 	Variable               // a Prolog variable
 36 | 	Void                   // the special "_" variable
 37 | )
 38 | 
 39 | const bufLen = 1024 // at least utf8.UTFMax
 40 | 
 41 | var tokenString = map[rune]string{
 42 | 	EOF:      "EOF",
 43 | 	Atom:     "Atom",
 44 | 	Comment:  "Comment",
 45 | 	Float:    "Float",
 46 | 	Functor:  "Functor",
 47 | 	FullStop: "FullStop",
 48 | 	Int:      "Int",
 49 | 	String:   "String",
 50 | 	Variable: "Variable",
 51 | 	Void:     "Void",
 52 | }
 53 | 
 54 | // Token encapsulating its type, content and related components.
 55 | type Token struct {
 56 | 	Type       rune
 57 | 	Term       string
 58 | 	Functor    string
 59 | 	Components []string
 60 | }
 61 | 
 62 | // Tokenize scans and classifies prolog terms.
 63 | func Tokenize(terms ...string) []*Token {
 64 | 	var tokens []*Token
 65 | 
 66 | 	s := new(scanner)
 67 | 	for _, t := range terms {
 68 | 		s.Init(strings.NewReader(t))
 69 | 		tokens = append(tokens, tokenize("", s)...)
 70 | 	}
 71 | 	return tokens
 72 | }
 73 | 
 74 | func tokenize(name string, s *scanner) (tokens []*Token) {
 75 | 	term := name
 76 | 	if name != "" {
 77 | 		tokens = append(tokens, &Token{Type: Functor, Functor: name})
 78 | 	}
 79 | 
 80 | 	for t := s.Scan(); t != EOF; t = s.Scan() {
 81 | 		txt := s.TokenText()
 82 | 
 83 | 		switch t {
 84 | 		case Atom, Float, Int, String, Void, Variable:
 85 | 			term += txt
 86 | 			tokens = append(tokens, &Token{Type: t, Term: txt, Functor: txt})
 87 | 			if name != "" {
 88 | 				tokens[0].Components = append(tokens[0].Components, txt)
 89 | 			}
 90 | 
 91 | 		case Functor:
 92 | 			tt := tokenize(txt, s)
 93 | 			term += tt[0].Term
 94 | 			tokens = append(tokens, tt...)
 95 | 			if name != "" {
 96 | 				tokens[0].Components = append(tokens[0].Components, tt[0].Term)
 97 | 			}
 98 | 
 99 | 		case ')':
100 | 			term += txt
101 | 			tokens[0].Term = term
102 | 
103 | 			return tokens
104 | 
105 | 		default:
106 | 			term += txt
107 | 		}
108 | 
109 | 	}
110 | 
111 | 	return tokens
112 | }
113 | 
114 | // Position is represented by a Position value.
115 | // A position is valid if Line > 0.
116 | type position struct {
117 | 	Filename string // filename, if any
118 | 	Offset   int    // byte offset, starting at 0
119 | 	Line     int    // line number, starting at 1
120 | 	Column   int    // column number, starting at 1 (character count per line)
121 | }
122 | 
123 | // IsValid returns true if the position is valid.
124 | func (pos *position) IsValid() bool { return pos.Line > 0 }
125 | 
126 | func (pos position) String() string {
127 | 	s := pos.Filename
128 | 	if pos.IsValid() {
129 | 		if s != "" {
130 | 			s += ":"
131 | 		}
132 | 		s += fmt.Sprintf("%d:%d", pos.Line, pos.Column)
133 | 	}
134 | 	if s == "" {
135 | 		s = "???"
136 | 	}
137 | 	return s
138 | }
139 | 
140 | // A Scanner implements reading of Unicode characters and tokens from an io.Reader.
141 | type scanner struct {
142 | 	// Input
143 | 	src io.Reader
144 | 
145 | 	// Source buffer
146 | 	srcBuf [bufLen + 1]byte // +1 for sentinel for common case of s.next()
147 | 	srcPos int              // reading position (srcBuf index)
148 | 	srcEnd int              // source end (srcBuf index)
149 | 
150 | 	// Source position
151 | 	srcBufOffset int // byte offset of srcBuf[0] in source
152 | 	line         int // line count
153 | 	column       int // character count
154 | 	lastLineLen  int // length of last line in characters (for correct column reporting)
155 | 	lastCharLen  int // length of last character in bytes
156 | 
157 | 	// Token text buffer
158 | 	// Typically, token text is stored completely in srcBuf, but in general
159 | 	// the token text's head may be buffered in tokBuf while the token text's
160 | 	// tail is stored in srcBuf.
161 | 	tokBuf bytes.Buffer // token text head that is not in srcBuf anymore
162 | 	tokPos int          // token text tail position (srcBuf index); valid if >= 0
163 | 	tokEnd int          // token text tail end (srcBuf index)
164 | 
165 | 	// One character look-ahead
166 | 	ch rune // character before current srcPos
167 | 
168 | 	extraTok rune // an extra token accidentally read early
169 | 
170 | 	// Error is called for each error encountered. If no Error
171 | 	// function is set, the error is reported to os.Stderr.
172 | 	Error func(s *scanner, msg string)
173 | 
174 | 	// ErrorCount is incremented by one for each error encountered.
175 | 	ErrorCount int
176 | 
177 | 	// Start position of most recently scanned token; set by Scan.
178 | 	// Calling Init or Next invalidates the position (Line == 0).
179 | 	// The Filename field is always left untouched by the Scanner.
180 | 	// If an error is reported (via Error) and Position is invalid,
181 | 	// the scanner is not inside a token. Call Pos to obtain an error
182 | 	// position in that case.
183 | 	position
184 | }
185 | 
186 | // Init initializes a Scanner with a new source and returns s.
187 | // Error is set to nil, ErrorCount is set to 0
188 | func (s *scanner) Init(src io.Reader) *scanner {
189 | 	s.src = src
190 | 
191 | 	// initialize source buffer
192 | 	// (the first call to next() will fill it by calling src.Read)
193 | 	s.srcBuf[0] = utf8.RuneSelf // sentinel
194 | 	s.srcPos = 0
195 | 	s.srcEnd = 0
196 | 
197 | 	// initialize source position
198 | 	s.srcBufOffset = 0
199 | 	s.line = 1
200 | 	s.column = 0
201 | 	s.lastLineLen = 0
202 | 	s.lastCharLen = 0
203 | 
204 | 	// initialize token text buffer
205 | 	// (required for first call to next()).
206 | 	s.tokPos = -1
207 | 
208 | 	// initialize one character look-ahead
209 | 	s.ch = -1 // no char read yet
210 | 
211 | 	// initialize extra token
212 | 	s.extraTok = 0
213 | 
214 | 	// initialize public fields
215 | 	s.Error = nil
216 | 	s.ErrorCount = 0
217 | 	s.Line = 0 // invalidate token position
218 | 
219 | 	return s
220 | }
221 | 
222 | // next reads and returns the next Unicode character. It is designed such
223 | // that only a minimal amount of work needs to be done in the common ASCII
224 | // case (one test to check for both ASCII and end-of-buffer, and one test
225 | // to check for newlines).
226 | func (s *scanner) next() rune {
227 | 	// if there's an extra token, return it instead of scanning a new one
228 | 	if s.extraTok != 0 {
229 | 		ch := s.extraTok
230 | 		s.extraTok = 0
231 | 		return ch
232 | 	}
233 | 
234 | 	ch, width := rune(s.srcBuf[s.srcPos]), 1
235 | 
236 | 	if ch >= utf8.RuneSelf {
237 | 		// uncommon case: not ASCII or not enough bytes
238 | 		for s.srcPos+utf8.UTFMax > s.srcEnd && !utf8.FullRune(s.srcBuf[s.srcPos:s.srcEnd]) {
239 | 			// not enough bytes: read some more, but first
240 | 			// save away token text if any
241 | 			if s.tokPos >= 0 {
242 | 				s.tokBuf.Write(s.srcBuf[s.tokPos:s.srcPos])
243 | 				s.tokPos = 0
244 | 				// s.tokEnd is set by Scan()
245 | 			}
246 | 			// move unread bytes to beginning of buffer
247 | 			copy(s.srcBuf[0:], s.srcBuf[s.srcPos:s.srcEnd])
248 | 			s.srcBufOffset += s.srcPos
249 | 			// read more bytes
250 | 			// (an io.Reader must return io.EOF when it reaches
251 | 			// the end of what it is reading - simply returning
252 | 			// n == 0 will make this loop retry forever; but the
253 | 			// error is in the reader implementation in that case)
254 | 			i := s.srcEnd - s.srcPos
255 | 			n, err := s.src.Read(s.srcBuf[i:bufLen])
256 | 			s.srcPos = 0
257 | 			s.srcEnd = i + n
258 | 			s.srcBuf[s.srcEnd] = utf8.RuneSelf // sentinel
259 | 			if err != nil {
260 | 				if s.srcEnd == 0 {
261 | 					if s.lastCharLen > 0 {
262 | 						// previous character was not EOF
263 | 						s.column++
264 | 					}
265 | 					s.lastCharLen = 0
266 | 					return EOF
267 | 				}
268 | 				if err != io.EOF {
269 | 					s.error(err.Error())
270 | 				}
271 | 				// If err == EOF, we won't be getting more
272 | 				// bytes; break to avoid infinite loop. If
273 | 				// err is something else, we don't know if
274 | 				// we can get more bytes; thus also break.
275 | 				break
276 | 			}
277 | 		}
278 | 		// at least one byte
279 | 		ch = rune(s.srcBuf[s.srcPos])
280 | 		if ch >= utf8.RuneSelf {
281 | 			// uncommon case: not ASCII
282 | 			ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd])
283 | 			if ch == utf8.RuneError && width == 1 {
284 | 				// advance for correct error position
285 | 				s.srcPos += width
286 | 				s.lastCharLen = width
287 | 				s.column++
288 | 				s.error("illegal UTF-8 encoding")
289 | 				return ch
290 | 			}
291 | 		}
292 | 	}
293 | 
294 | 	// advance
295 | 	s.srcPos += width
296 | 	s.lastCharLen = width
297 | 	s.column++
298 | 
299 | 	// special situations
300 | 	switch ch {
301 | 	case 0:
302 | 		// for compatibility with other tools
303 | 		s.error("illegal character NUL")
304 | 	case '\n':
305 | 		s.line++
306 | 		s.lastLineLen = s.column
307 | 		s.column = 0
308 | 	}
309 | 
310 | 	return ch
311 | }
312 | 
313 | // Next reads and returns the next Unicode character.
314 | // It returns EOF at the end of the source. It reports
315 | // a read error by calling s.Error, if not nil; otherwise
316 | // it prints an error message to os.Stderr. Next does not
317 | // update the Scanner's Position field; use Pos() to
318 | // get the current position.
319 | func (s *scanner) Next() rune {
320 | 	s.tokPos = -1 // don't collect token text
321 | 	s.Line = 0    // invalidate token position
322 | 	ch := s.Peek()
323 | 	s.ch = s.next()
324 | 	return ch
325 | }
326 | 
327 | // Peek returns the next Unicode character in the source without advancing
328 | // the scanner. It returns EOF if the scanner's position is at the last
329 | // character of the source.
330 | func (s *scanner) Peek() rune {
331 | 	if s.ch < 0 {
332 | 		// this code is only run for the very first character
333 | 		s.ch = s.next()
334 | 		if s.ch == '\uFEFF' {
335 | 			s.ch = s.next() // ignore BOM
336 | 		}
337 | 	}
338 | 	return s.ch
339 | }
340 | 
341 | func (s *scanner) error(msg string) {
342 | 	s.ErrorCount++
343 | 	if s.Error != nil {
344 | 		s.Error(s, msg)
345 | 		return
346 | 	}
347 | 	pos := s.position
348 | 	if !pos.IsValid() {
349 | 		pos = s.Pos()
350 | 	}
351 | 	fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
352 | }
353 | 
354 | func (s *scanner) scanAlphanumeric(ch rune) rune {
355 | 	for isAlphanumeric(ch) {
356 | 		ch = s.next()
357 | 	}
358 | 	return ch
359 | }
360 | 
361 | func (s *scanner) scanGraphic(ch rune) rune {
362 | 	for isGraphic(ch) {
363 | 		ch = s.next()
364 | 	}
365 | 	return ch
366 | }
367 | 
368 | func digitVal(ch rune) int {
369 | 	switch {
370 | 	case '0' <= ch && ch <= '9':
371 | 		return int(ch - '0')
372 | 	case 'a' <= ch && ch <= 'f':
373 | 		return int(ch - 'a' + 10)
374 | 	case 'A' <= ch && ch <= 'F':
375 | 		return int(ch - 'A' + 10)
376 | 	}
377 | 	return 16 // larger than any legal digit val
378 | }
379 | 
380 | func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
381 | 
382 | // True if the rune is a graphic token char per ISO §6.4.2
383 | func isGraphic(ch rune) bool {
384 | 	return isOneOf(ch, `#$&*+-./:<=>?@^\~`)
385 | }
386 | 
387 | // ISO §6.5.2 "alphanumeric char" extended to Unicode
388 | func isAlphanumeric(ch rune) bool {
389 | 	if ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) {
390 | 		return true
391 | 	}
392 | 	return false
393 | }
394 | 
395 | // true if the rune is a valid start for a variable
396 | func isVariableStart(ch rune) bool {
397 | 	return ch == '_' || unicode.IsUpper(ch)
398 | }
399 | 
400 | func isOneOf(ch rune, chars string) bool {
401 | 	for _, allowed := range chars {
402 | 		if ch == allowed {
403 | 			return true
404 | 		}
405 | 	}
406 | 	return false
407 | }
408 | 
409 | func isSolo(ch rune) bool { return ch == '!' || ch == ';' }
410 | 
411 | func (s *scanner) scanMantissa(ch rune) rune {
412 | 	for isDecimal(ch) {
413 | 		ch = s.next()
414 | 	}
415 | 	return ch
416 | }
417 | 
418 | func (s *scanner) scanFraction(ch rune) rune {
419 | 	if ch == '.' {
420 | 		ch = s.scanMantissa(s.next())
421 | 	}
422 | 	return ch
423 | }
424 | 
425 | func (s *scanner) scanExponent(ch rune) rune {
426 | 	if ch == 'e' || ch == 'E' {
427 | 		ch = s.next()
428 | 		if ch == '-' || ch == '+' {
429 | 			ch = s.next()
430 | 		}
431 | 		ch = s.scanMantissa(ch)
432 | 	}
433 | 	return ch
434 | }
435 | 
436 | func (s *scanner) scanNumber(ch rune) (rune, rune, rune) {
437 | 	// isDecimal(ch)
438 | 	if ch == '0' {
439 | 		// int or float
440 | 		ch = s.next()
441 | 		switch ch {
442 | 		case 'x', 'X':
443 | 			// hexadecimal int
444 | 			ch = s.next()
445 | 			hasMantissa := false
446 | 			for digitVal(ch) < 16 {
447 | 				ch = s.next()
448 | 				hasMantissa = true
449 | 			}
450 | 			if !hasMantissa {
451 | 				s.error("illegal hexadecimal number")
452 | 			}
453 | 		case '\'':
454 | 			ch = s.next()
455 | 			if ch == '\\' {
456 | 				ch = s.scanEscape('\'')
457 | 			} else {
458 | 				ch = s.next()
459 | 			}
460 | 		default:
461 | 			// octal int or float
462 | 			has8or9 := false
463 | 			for isDecimal(ch) {
464 | 				if ch > '7' {
465 | 					has8or9 = true
466 | 				}
467 | 				ch = s.next()
468 | 			}
469 | 			if ch == '.' || ch == 'e' || ch == 'E' {
470 | 				// float
471 | 				ch = s.scanFraction(ch)
472 | 				ch = s.scanExponent(ch)
473 | 				return Float, ch, 0
474 | 			}
475 | 			// octal int
476 | 			if has8or9 {
477 | 				s.error("illegal octal number")
478 | 			}
479 | 		}
480 | 		return Int, ch, 0
481 | 	}
482 | 	// decimal int or float
483 | 	ch = s.scanMantissa(ch)
484 | 	if ch == 'e' || ch == 'E' { // float
485 | 		ch = s.scanExponent(ch)
486 | 		return Float, ch, 0
487 | 	}
488 | 	if ch == '.' {
489 | 		ch = s.next()
490 | 		if isDecimal(ch) {
491 | 			ch = s.scanMantissa(ch)
492 | 			ch = s.scanExponent(ch)
493 | 			return Float, ch, 0
494 | 		}
495 | 		return Int, ch, FullStop
496 | 	}
497 | 	return Int, ch, 0
498 | }
499 | 
500 | func (s *scanner) scanDigits(ch rune, base, n int) rune {
501 | 	for n > 0 && digitVal(ch) < base {
502 | 		ch = s.next()
503 | 		n--
504 | 	}
505 | 	if n > 0 {
506 | 		s.error("illegal char escape")
507 | 	}
508 | 	return ch
509 | }
510 | 
511 | func (s *scanner) scanEscape(quote rune) rune {
512 | 	ch := s.next() // read character after '/'
513 | 	switch ch {
514 | 	case 'a', 'b', 'f', 'n', 'r', 's', 't', 'v', '\\', quote:
515 | 		// nothing to do
516 | 		ch = s.next()
517 | 	case '0', '1', '2', '3', '4', '5', '6', '7':
518 | 		ch = s.scanDigits(ch, 8, 3)
519 | 	case 'x':
520 | 		ch = s.scanDigits(s.next(), 16, 2)
521 | 	case 'u':
522 | 		ch = s.scanDigits(s.next(), 16, 4)
523 | 	case 'U':
524 | 		ch = s.scanDigits(s.next(), 16, 8)
525 | 	default:
526 | 		s.error("illegal char escape")
527 | 	}
528 | 	return ch
529 | }
530 | 
531 | func (s *scanner) scanString(quote rune) (n int) {
532 | 	ch := s.next() // read character after quote
533 | 	for ch != quote {
534 | 		if ch == '\n' || ch < 0 {
535 | 			s.error("literal not terminated")
536 | 			return
537 | 		}
538 | 		if ch == '\\' {
539 | 			ch = s.scanEscape(quote)
540 | 		} else {
541 | 			ch = s.next()
542 | 		}
543 | 		n++
544 | 	}
545 | 	return
546 | }
547 | 
548 | func (s *scanner) scanComment(ch rune) rune {
549 | 	// ch == '%' || ch == '*'
550 | 	if ch == '%' {
551 | 		// line comment
552 | 		ch = s.next() // read character after "%"
553 | 		for ch != '\n' && ch >= 0 {
554 | 			ch = s.next()
555 | 		}
556 | 		return ch
557 | 	}
558 | 
559 | 	// general comment.  See Note1
560 | 	depth := 1
561 | 	ch = s.next() // read character after "/*"
562 | 	for depth > 0 {
563 | 		if ch < 0 {
564 | 			s.error("comment not terminated")
565 | 			break
566 | 		}
567 | 		ch0 := ch
568 | 		ch = s.next()
569 | 		if ch0 == '*' && ch == '/' {
570 | 			ch = s.next()
571 | 			depth--
572 | 		} else if ch0 == '/' && ch == '*' {
573 | 			ch = s.next()
574 | 			depth++
575 | 		}
576 | 	}
577 | 	return ch
578 | }
579 | 
580 | // Note1: Nested comments are prohibited by ISO Prolog §6.4.1.  To wit,
581 | // "The comment text of a bracketed comment shall not contain the comment
582 | // close sequence."  However, nested comments are ridiculously practical
583 | // during debugging and development, so I've chosen to deviate by being
584 | // more permissive than is strictly allowed.  SWI-Prolog does the same thing.
585 | 
586 | // Scan reads the next token or Unicode character from source and returns it.
587 | // It returns EOF at the end of the source. It reports scanner errors (read and
588 | // token errors) by calling s.Error, if not nil; otherwise it prints an error
589 | // message to os.Stderr.
590 | func (s *scanner) Scan() rune {
591 | 	ch := s.Peek()
592 | 
593 | 	// reset token text position
594 | 	s.tokPos = -1
595 | 	s.Line = 0
596 | 
597 | 	// skip white space
598 | 	for unicode.IsSpace(ch) {
599 | 		ch = s.next()
600 | 	}
601 | 
602 | 	// start collecting token text
603 | 	s.tokBuf.Reset()
604 | 	s.tokPos = s.srcPos - s.lastCharLen
605 | 
606 | 	// set token position
607 | 	// (this is a slightly optimized version of the code in Pos())
608 | 	s.Offset = s.srcBufOffset + s.tokPos
609 | 	if s.column > 0 {
610 | 		// common case: last character was not a '\n'
611 | 		s.Line = s.line
612 | 		s.Column = s.column
613 | 	} else {
614 | 		// last character was a '\n'
615 | 		// (we cannot be at the beginning of the source
616 | 		// since we have called next() at least once)
617 | 		s.Line = s.line - 1
618 | 		s.Column = s.lastLineLen
619 | 	}
620 | 
621 | 	// determine token value
622 | 	tok := ch
623 | 	switch {
624 | 	case ch == '/': // '/' can start a comment or an atom
625 | 		ch = s.next()
626 | 		if ch == '*' {
627 | 			ch = s.scanComment(ch)
628 | 			tok = Comment
629 | 		} else {
630 | 			tok = Atom
631 | 			ch = s.scanGraphic(ch)
632 | 			if ch == '(' {
633 | 				tok = Functor
634 | 			}
635 | 		}
636 | 	case isGraphic(ch):
637 | 		ch = s.next()
638 | 		tok = Atom
639 | 		ch = s.scanGraphic(ch)
640 | 		if ch == '(' {
641 | 			tok = Functor
642 | 		}
643 | 	case isSolo(ch):
644 | 		tok = Atom
645 | 		ch = s.next()
646 | 	case unicode.IsLower(ch): // name by "letter digit token" rule §6.4.2 w/ Unicode
647 | 		tok = Atom
648 | 		ch = s.next()
649 | 		ch = s.scanAlphanumeric(ch)
650 | 		if ch == '(' {
651 | 			tok = Functor
652 | 		}
653 | 	case isVariableStart(ch):
654 | 		tok = Variable
655 | 		ch = s.next()
656 | 		ch = s.scanAlphanumeric(ch) // variables look like atoms after the start
657 | 	case isDecimal(ch):
658 | 		var extraTok rune
659 | 		tok, ch, extraTok = s.scanNumber(ch)
660 | 		if extraTok != 0 {
661 | 			s.extraTok = extraTok
662 | 		}
663 | 	default:
664 | 		switch ch {
665 | 		case '"':
666 | 			s.scanString('"')
667 | 			tok = String
668 | 			ch = s.next()
669 | 		case '\'':
670 | 			s.scanString('\'')
671 | 			tok = Atom
672 | 			ch = s.next()
673 | 			if ch == '(' {
674 | 				tok = Functor
675 | 			}
676 | 		case '%':
677 | 			ch = s.scanComment(ch)
678 | 			tok = Comment
679 | 		default:
680 | 			ch = s.next()
681 | 		}
682 | 	}
683 | 
684 | 	// end of token text
685 | 	s.tokEnd = s.srcPos - s.lastCharLen
686 | 
687 | 	s.ch = ch
688 | 
689 | 	// last minute specializations
690 | 	switch tok {
691 | 	case Atom:
692 | 		switch s.TokenText() {
693 | 		case ".":
694 | 			return FullStop
695 | 		}
696 | 	case Variable:
697 | 		switch s.TokenText() {
698 | 		case "_":
699 | 			return Void
700 | 		}
701 | 	}
702 | 	return tok
703 | }
704 | 
705 | // Pos returns the position of the character immediately after
706 | // the character or token returned by the last call to Next or Scan.
707 | func (s *scanner) Pos() (pos position) {
708 | 	pos.Filename = s.Filename
709 | 	pos.Offset = s.srcBufOffset + s.srcPos - s.lastCharLen
710 | 	switch {
711 | 	case s.column > 0:
712 | 		// common case: last character was not a '\n'
713 | 		pos.Line = s.line
714 | 		pos.Column = s.column
715 | 	case s.lastLineLen > 0:
716 | 		// last character was a '\n'
717 | 		pos.Line = s.line - 1
718 | 		pos.Column = s.lastLineLen
719 | 	default:
720 | 		// at the beginning of the source
721 | 		pos.Line = 1
722 | 		pos.Column = 1
723 | 	}
724 | 	return
725 | }
726 | 
727 | // TokenText returns the string corresponding to the most recently scanned token.
728 | // Valid after calling Scan().
729 | func (s *scanner) TokenText() string {
730 | 	if s.tokPos < 0 {
731 | 		// no token text
732 | 		return ""
733 | 	}
734 | 
735 | 	if s.tokEnd < 0 {
736 | 		// if EOF was reached, s.tokEnd is set to -1 (s.srcPos == 0)
737 | 		s.tokEnd = s.tokPos
738 | 	}
739 | 
740 | 	if s.tokBuf.Len() == 0 {
741 | 		// common case: the entire token text is still in srcBuf
742 | 		return string(s.srcBuf[s.tokPos:s.tokEnd])
743 | 	}
744 | 
745 | 	// part of the token text was saved in tokBuf: save the rest in
746 | 	// tokBuf as well and return its content
747 | 	s.tokBuf.Write(s.srcBuf[s.tokPos:s.tokEnd])
748 | 	s.tokPos = s.tokEnd // ensure idempotency of TokenText() call
749 | 	return s.tokBuf.String()
750 | }
751 | 


--------------------------------------------------------------------------------
/ut.go:
--------------------------------------------------------------------------------
  1 | package ut
  2 | 
  3 | import (
  4 | 	"log"
  5 | 	"strings"
  6 | )
  7 | 
  8 | var (
  9 | 	// VAR stands for variable type
 10 | 	VAR = func(t rune) bool {
 11 | 		return t == Variable
 12 | 	}
 13 | 
 14 | 	// STR stands for constants or composite terms
 15 | 	STR = func(t rune) bool {
 16 | 		return t == Atom ||
 17 | 			t == Float ||
 18 | 			t == Int ||
 19 | 			t == String ||
 20 | 			t == Void ||
 21 | 			t == Functor
 22 | 	}
 23 | )
 24 | 
 25 | type (
 26 | 	// Entry stands for Unification Table Entry
 27 | 	Entry struct {
 28 | 		Term       string
 29 | 		Functor    string
 30 | 		Components []int
 31 | 		Type       rune
 32 | 	}
 33 | 
 34 | 	// UT stands for Unification Table
 35 | 	UT struct {
 36 | 		// Lookup table (term) -> (index)
 37 | 		Lookup   map[string]int
 38 | 		Entries  []*Entry
 39 | 		Bindings map[int]int
 40 | 	}
 41 | )
 42 | 
 43 | // Arity is the arity of the term; for variables and constants, it is 0.
 44 | func (e *Entry) Arity() int {
 45 | 	if e.Type != Functor {
 46 | 		return 0
 47 | 	}
 48 | 
 49 | 	return len(e.Components)
 50 | }
 51 | 
 52 | // Unify returns a unification maps with VAR bindings.
 53 | // Also see ut.MGU for particular terms.
 54 | func Unify(x, y string) map[string]string {
 55 | 	tokens := Tokenize(x, y)
 56 | 	ut := New(tokens)
 57 | 	ix, iy := ut.Lookup[x], ut.Lookup[y]
 58 | 	if !ut.Unify(ix, iy) {
 59 | 		return nil
 60 | 	}
 61 | 
 62 | 	mgu := make(map[string]string)
 63 | 	for i, j := range ut.Bindings {
 64 | 		j = ut.dereference(j)
 65 | 		mgu[ut.Entries[i].Term] = ut.termString(j)
 66 | 	}
 67 | 
 68 | 	return mgu
 69 | }
 70 | 
 71 | // New creates a new Unification Table.
 72 | func New(tokens []*Token) (ut *UT) {
 73 | 	ut = &UT{Lookup: make(map[string]int), Bindings: make(map[int]int)}
 74 | 
 75 | 	for i, n := 0, len(tokens)-1; n >= 0; n-- {
 76 | 		t := tokens[n]
 77 | 		if _, exists := ut.Lookup[t.Term]; exists {
 78 | 			continue
 79 | 		}
 80 | 
 81 | 		e := &Entry{
 82 | 			Term:    t.Term,
 83 | 			Functor: t.Functor,
 84 | 			Type:    t.Type,
 85 | 		}
 86 | 
 87 | 		for _, c := range t.Components {
 88 | 			idx, exists := ut.Lookup[c]
 89 | 			if !exists {
 90 | 				log.Fatalf("Component: %s not found", c)
 91 | 			}
 92 | 			e.Components = append(e.Components, idx)
 93 | 		}
 94 | 
 95 | 		ut.Lookup[t.Term] = i
 96 | 		ut.Entries = append(ut.Entries, e)
 97 | 		i++
 98 | 	}
 99 | 
100 | 	return ut
101 | }
102 | 
103 | // Unify tries to calculate MGU (Most General Unifier)
104 | func (ut *UT) Unify(ix, iy int) bool {
105 | 	var (
106 | 		// stacks
107 | 		sx, sy = []int{ix}, []int{iy}
108 | 		empty  = func(s []int) bool {
109 | 			return len(s) == 0
110 | 		}
111 | 		pop = func(s []int) (int, []int) {
112 | 			n := len(s)
113 | 			if n > 0 {
114 | 				i := s[n-1]
115 | 				s = s[:n-1]
116 | 
117 | 				return i, s
118 | 			}
119 | 
120 | 			return 0, nil
121 | 		}
122 | 
123 | 		push = func(s []int, i ...int) []int {
124 | 			return append(s, i...)
125 | 		}
126 | 	)
127 | 
128 | 	for !empty(sx) && !empty(sy) {
129 | 		// pop entries from stacks
130 | 		ix, sx = pop(sx)
131 | 		iy, sy = pop(sy)
132 | 		ex, ey := ut.Entries[ix], ut.Entries[iy]
133 | 
134 | 		switch true {
135 | 		// case 1: ex is bound to a term and ey is bound to a term}
136 | 		case STR(ex.Type) && STR(ey.Type):
137 | 			ax, ay := ex.Arity(), ey.Arity()
138 | 			if ex.Functor != ey.Functor || ax != ay {
139 | 				return false
140 | 			}
141 | 
142 | 			if ax > 0 {
143 | 				sx = push(sx, ex.Components...)
144 | 				sy = push(sy, ey.Components...)
145 | 			}
146 | 
147 | 		// case 2: ex is bound to a term and ey is bound to a variable
148 | 		case STR(ex.Type) && VAR(ey.Type):
149 | 			if idx, _, b := ut.bindSTR(ix, iy); !b {
150 | 				sx = push(sx, ix)
151 | 				sy = push(sy, idx)
152 | 			}
153 | 
154 | 		// case 3: ex is bound to a variable and ey is bound to a term
155 | 		case VAR(ex.Type) && STR(ey.Type):
156 | 			if idx, _, b := ut.bindSTR(iy, ix); !b {
157 | 				sx = push(sx, idx)
158 | 				sy = push(sy, iy)
159 | 			}
160 | 
161 | 		// case 4: ex is bound to a variable and ey is bound to a variable
162 | 		case VAR(ex.Type) && VAR(ey.Type):
163 | 			if idx1, idx2, b := ut.bindVAR(ix, iy); !b {
164 | 				sx = push(sx, idx1)
165 | 				sy = push(sy, idx2)
166 | 			}
167 | 		}
168 | 	}
169 | 
170 | 	return true
171 | }
172 | 
173 | // MGU returns The Most General Unifier as a string for a given term.
174 | // It dereferences bindings and term componenets.
175 | func (ut *UT) MGU(term string) string {
176 | 	i, ok := ut.Lookup[term]
177 | 	if !ok {
178 | 		return ""
179 | 	}
180 | 
181 | 	i = ut.dereference(ut.Bindings[i])
182 | 	return ut.termString(i)
183 | }
184 | 
185 | // termString constructs a new term string by dereferencing all components.
186 | func (ut *UT) termString(idx int) string {
187 | 	if idx >= len(ut.Entries) {
188 | 		return ""
189 | 	}
190 | 
191 | 	e := ut.Entries[idx]
192 | 	if e.Type != Functor {
193 | 		return e.Functor
194 | 	}
195 | 
196 | 	components := []string{}
197 | 	for _, c := range e.Components {
198 | 		i := ut.dereference(c)
199 | 		if i != idx && ut.Entries[i].Type == Functor {
200 | 			components = append(components, ut.termString(i))
201 | 		} else {
202 | 			components = append(components, ut.Entries[i].Functor)
203 | 		}
204 | 	}
205 | 
206 | 	return ut.Entries[idx].Functor + "(" + strings.Join(components, ",") + ")"
207 | }
208 | 
209 | // dereference follows bindings and returns index for dereferenced variable.
210 | func (ut *UT) dereference(idx int) int {
211 | 	i, ok := idx, true
212 | 	for ok {
213 | 		i, ok = ut.Bindings[i]
214 | 		if ok {
215 | 			idx = i
216 | 		}
217 | 	}
218 | 	return idx
219 | }
220 | 
221 | // bindSTR tries to bind a VAR(varIdx) to STR(strIdx).
222 | // If VAR(varIdx) is already bound then dereference it and try again
223 | // or returns indexes to push them on stacks
224 | func (ut *UT) bindSTR(strIdx, varIdx int) (int, int, bool) {
225 | 	idx, ok := ut.Bindings[varIdx]
226 | 	if !ok {
227 | 		// var is a free variable
228 | 		// bind var to str
229 | 		ut.Bindings[varIdx] = strIdx
230 | 		return strIdx, varIdx, true
231 | 	}
232 | 
233 | 	// var is already bound - dereference
234 | 	idx = ut.dereference(idx)
235 | 
236 | 	// var is bound to a STR
237 | 	e := ut.Entries[idx]
238 | 	if STR(e.Type) {
239 | 		return idx, varIdx, false
240 | 	}
241 | 
242 | 	// free variable
243 | 	ut.Bindings[varIdx] = idx
244 | 	return idx, varIdx, true
245 | }
246 | 
247 | // bindVAR tries to bind two VARs.
248 | // If both are already bound the function returns indexes to push them on stacks
249 | // and false as boolean information that binding failed.
250 | func (ut *UT) bindVAR(varIdx1, varIdx2 int) (int, int, bool) {
251 | 	i1, ok1 := ut.Bindings[varIdx1]
252 | 	i2, ok2 := ut.Bindings[varIdx2]
253 | 
254 | 	// var1 is free and var2 is free
255 | 	if !ok1 && !ok2 {
256 | 		ut.Bindings[varIdx1] = varIdx2
257 | 		return varIdx1, varIdx2, true
258 | 	}
259 | 
260 | 	// var1 is free and var2 is bound
261 | 	if !ok1 && ok2 {
262 | 		ut.Bindings[varIdx1] = varIdx2
263 | 		return varIdx1, varIdx2, true
264 | 	}
265 | 
266 | 	// var1 is bound and var2 us free
267 | 	if ok1 && !ok2 {
268 | 		ut.Bindings[varIdx2] = varIdx1
269 | 		return varIdx1, varIdx2, true
270 | 	}
271 | 
272 | 	// var1 is bound and var2 is bound
273 | 	return i1, i2, false
274 | }
275 | 


--------------------------------------------------------------------------------
/ut_test.go:
--------------------------------------------------------------------------------
 1 | package ut
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestTokenize(t *testing.T) {
 9 | 	x, y := "p(f(X),h(Y,f(a)),Y)", "p(X,h(Z,Y),f(a))"
10 | 	n, ix, iy := 15, 0, 8
11 | 	tokens := Tokenize(x, y)
12 | 
13 | 	if len(tokens) != n {
14 | 		t.Fatalf("len(tokens) Got: %d, Expected: %d", len(tokens), n)
15 | 	}
16 | 
17 | 	if x != tokens[ix].Term {
18 | 		t.Fatalf("tokens[%d] Got: %s, Expected: %s", ix, tokens[ix].Term, x)
19 | 	}
20 | 
21 | 	if y != tokens[iy].Term {
22 | 		t.Fatalf("tokens[%d] Got: %s, Expected: %s", iy, tokens[iy].Term, y)
23 | 	}
24 | }
25 | 
26 | func TestUT(t *testing.T) {
27 | 	x, y := "p(Z,h(Z,W),f(W))", "p(f(X),h(Y,f(a)),Y)"
28 | 	n, ix, iy := 12, 11, 6
29 | 
30 | 	ut := New(Tokenize(x, y))
31 | 	if len(ut.Entries) != n {
32 | 		for i, e := range ut.Entries {
33 | 			t.Logf("[%d]: %s\n", i, e.Term)
34 | 		}
35 | 		t.Fatalf("len(ut.Entries) Got: %d, Expected: %d", len(ut.Entries), n)
36 | 	}
37 | 
38 | 	if ut.Lookup[x] != ix {
39 | 		t.Fatalf("ut.Lookup[%s] Got: %v, Expected: %d", x, ut.Lookup[x], ix)
40 | 	}
41 | 
42 | 	if ut.Lookup[y] != iy {
43 | 		t.Fatalf("ut.Lookup[%s] Got: %v, Expected: %d", y, ut.Lookup[y], iy)
44 | 	}
45 | }
46 | 
47 | func TestUnify(t *testing.T) {
48 | 	x, y := "p(Z,h(Z,W),f(W))", "p(f(X),h(Y,f(a)),Y)"
49 | 	ut := New(Tokenize(x, y))
50 | 	ix, iy := ut.Lookup[x], ut.Lookup[y]
51 | 	if !ut.Unify(ix, iy) {
52 | 		t.Fatalf("ut.Unify(%d, %d) failed", ix, iy)
53 | 	}
54 | 
55 | 	mguW := ut.MGU("W")
56 | 	if mguW != "f(a)" {
57 | 		t.Fatalf("Got W => %s Expected: f(a)", mguW)
58 | 	}
59 | 
60 | 	mguX := ut.MGU("X")
61 | 	if mguX != "f(a)" {
62 | 		t.Fatalf("Got X => %s Expected: f(a)", mguX)
63 | 	}
64 | 
65 | 	mguY := ut.MGU("Y")
66 | 	if mguY != "f(f(a))" {
67 | 		t.Fatalf("Got Y => %s Expected: f(f(a))", mguY)
68 | 	}
69 | 
70 | 	mguZ := ut.MGU("Z")
71 | 	if mguZ != "f(f(a))" {
72 | 		t.Fatalf("Got Z => %s Expected: f(f(a))", mguZ)
73 | 	}
74 | }
75 | 
76 | func ExampleUnify() {
77 | 	x, y := "f(X1,g(X2,X3),X2,b)", "f(g(h(a,X5),X2),X1,h(a,X4),X4)"
78 | 	mgu := Unify(x, y)
79 | 	fmt.Println("X1 = " + mgu["X1"])
80 | 	fmt.Println("X2 = " + mgu["X2"])
81 | 	fmt.Println("X3 = " + mgu["X3"])
82 | 	fmt.Println("X4 = " + mgu["X4"])
83 | 	fmt.Println("X5 = " + mgu["X5"])
84 | 
85 | 	// Output:
86 | 	// X1 = g(h(a,b),h(a,b))
87 | 	// X2 = h(a,b)
88 | 	// X3 = h(a,b)
89 | 	// X4 = b
90 | 	// X5 = b
91 | }
92 | 


--------------------------------------------------------------------------------