├── .github └── workflows │ └── build.yml ├── .gitignore ├── LICENSE ├── README.md ├── doc.go ├── go.mod ├── token.go ├── ut.go └── ut_test.go /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: [push] 3 | 4 | jobs: 5 | build: 6 | strategy: 7 | matrix: 8 | go-version: ['1.22.x'] 9 | os: [ubuntu-latest, macos-latest, windows-latest] 10 | runs-on: ${{ matrix.os }} 11 | steps: 12 | - name: Set up Go 13 | uses: actions/setup-go@v1 14 | with: 15 | go-version: ${{ matrix.go-version }} 16 | id: go 17 | 18 | - name: Check out code into the Go module directory 19 | uses: actions/checkout@v2 20 | 21 | - name: Get dependencies 22 | run: go get -v -t -d ./... 23 | 24 | - name: Build 25 | run: | 26 | go vet 27 | go build -v . 28 | 29 | - name: Test 30 | run: | 31 | go test -v -race -covermode=atomic 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | go.sum 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Kuba Podgórski 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![GoDoc](https://godoc.org/github.com/kuba--/ut?status.svg)](http://godoc.org/github.com/kuba--/ut) 2 | [![Go Report Card](https://goreportcard.com/badge/github.com/kuba--/ut)](https://goreportcard.com/report/github.com/kuba--/ut) 3 | [![Build Status](https://github.com/kuba--/ut/workflows/build/badge.svg)](https://github.com/kuba--/ut/actions?query=workflow%3Abuild) 4 | [![Version](https://badge.fury.io/gh/kuba--%2Fut.svg)](https://github.com/kuba--/ut/releases) 5 | 6 | # ut 7 | Package ut implements "Yet Another Efficient Unification Algorithm" by Alin Suciu (https://arxiv.org/abs/cs/0603080v1). 8 | 9 | The unification algorithm is at the core of the logic programming paradigm, the first unification algorithm being developed by Robinson. More efficient algorithms were developed later by Martelli and, Montanari. 10 | 11 | Unification unifies terms. A term is one of variable, constant, functor, or literal value. A variable starts with an upper case letter and their names are the keys in the map returned by Unify. Constants start with a lowercase letter. Values are integers, floating point values, or strings. As a simple example: 12 | ```go 13 | x,y := `f(X,Y)`, `f(1,"hello")` 14 | ``` 15 | This would unify the functor f such that the variable `X` is `1` and the variable `Y` is `"hello"` in the returned map. 16 | 17 | 18 | Here yet another efficient unification algorithm centered on a specific data structure, called the Unification Table. 19 | 20 | ```Go 21 | x, y := "p(Z,h(Z,W),f(W))", "p(f(X),h(Y,f(a)),Y)" 22 | mgu := ut.Unify(x, y) 23 | fmt.Println("W = " + mgu["W"]) 24 | fmt.Println("X = " + mgu["X"]) 25 | fmt.Println("Y = " + mgu["Y"]) 26 | fmt.Println("Z = " + mgu["Z"]) 27 | 28 | // Output: 29 | // W = f(a) 30 | // X = f(a) 31 | // Y = f(f(a)) 32 | // Z = f(f(a)) 33 | 34 | x, y = "f(X1,g(X2,X3),X2,b)", "f(g(h(a,X5),X2),X1,h(a,X4),X4)" 35 | mgu = ut.Unify(x, y) 36 | fmt.Println("X1 = " + mgu["X1"]) 37 | fmt.Println("X2 = " + mgu["X2"]) 38 | fmt.Println("X3 = " + mgu["X3"]) 39 | fmt.Println("X4 = " + mgu["X4"]) 40 | fmt.Println("X5 = " + mgu["X5"]) 41 | 42 | // Output: 43 | // X1 = g(h(a,b),h(a,b)) 44 | // X2 = h(a,b) 45 | // X3 = h(a,b) 46 | // X4 = b 47 | // X5 = b 48 | ``` 49 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Package ut implements "Yet Another Efficient Unification Algorithm" by Alin Suciu (https://arxiv.org/abs/cs/0603080v1). 2 | // The unification algorithm is at the core of the logic programming paradigm, 3 | // the first unification algorithm being developed by Robinson. More efficient algorithms were developed later by Martelli and, Montanari. 4 | // Here yet another efficient unification algorithm centered on a specific data structure, called the Unification Table. 5 | package ut 6 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/kuba--/ut 2 | 3 | go 1.22 4 | -------------------------------------------------------------------------------- /token.go: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The Go Authors. All rights reserved. 2 | // Copyright 2013 Michael Hendricks. All rights reserved. 3 | // Copyright 2018 Kuba Podgórski. All rights reserved. 4 | // Use of this source code is governed by a BSD-style 5 | // license that can be found in the LICENSE file. 6 | 7 | // Tokenize UTF-8-encoded Prolog text. 8 | // It takes an io.Reader providing the source, which then can be tokenized 9 | // with the Scan function. For compatibility with 10 | // existing tools, the NUL character is not allowed. If the first character 11 | // in the source is a UTF-8 encoded byte order mark (BOM), it is discarded. 12 | 13 | package ut 14 | 15 | import ( 16 | "bytes" 17 | "fmt" 18 | "io" 19 | "os" 20 | "strings" 21 | "unicode" 22 | "unicode/utf8" 23 | ) 24 | 25 | // The result of Scan is one of these tokens or a Unicode character. 26 | const ( 27 | EOF = -(iota + 1) // reached end of source 28 | Atom // a Prolog atom, possibly quoted 29 | Comment // a comment 30 | Float // a floating point number 31 | Functor // an atom used as a predicate functor 32 | FullStop // "." ending a term 33 | Int // an integer 34 | String // a double-quoted string 35 | Variable // a Prolog variable 36 | Void // the special "_" variable 37 | ) 38 | 39 | const bufLen = 1024 // at least utf8.UTFMax 40 | 41 | var tokenString = map[rune]string{ 42 | EOF: "EOF", 43 | Atom: "Atom", 44 | Comment: "Comment", 45 | Float: "Float", 46 | Functor: "Functor", 47 | FullStop: "FullStop", 48 | Int: "Int", 49 | String: "String", 50 | Variable: "Variable", 51 | Void: "Void", 52 | } 53 | 54 | // Token encapsulating its type, content and related components. 55 | type Token struct { 56 | Type rune 57 | Term string 58 | Functor string 59 | Components []string 60 | } 61 | 62 | // Tokenize scans and classifies prolog terms. 63 | func Tokenize(terms ...string) []*Token { 64 | var tokens []*Token 65 | 66 | s := new(scanner) 67 | for _, t := range terms { 68 | s.Init(strings.NewReader(t)) 69 | tokens = append(tokens, tokenize("", s)...) 70 | } 71 | return tokens 72 | } 73 | 74 | func tokenize(name string, s *scanner) (tokens []*Token) { 75 | term := name 76 | if name != "" { 77 | tokens = append(tokens, &Token{Type: Functor, Functor: name}) 78 | } 79 | 80 | for t := s.Scan(); t != EOF; t = s.Scan() { 81 | txt := s.TokenText() 82 | 83 | switch t { 84 | case Atom, Float, Int, String, Void, Variable: 85 | term += txt 86 | tokens = append(tokens, &Token{Type: t, Term: txt, Functor: txt}) 87 | if name != "" { 88 | tokens[0].Components = append(tokens[0].Components, txt) 89 | } 90 | 91 | case Functor: 92 | tt := tokenize(txt, s) 93 | term += tt[0].Term 94 | tokens = append(tokens, tt...) 95 | if name != "" { 96 | tokens[0].Components = append(tokens[0].Components, tt[0].Term) 97 | } 98 | 99 | case ')': 100 | term += txt 101 | tokens[0].Term = term 102 | 103 | return tokens 104 | 105 | default: 106 | term += txt 107 | } 108 | 109 | } 110 | 111 | return tokens 112 | } 113 | 114 | // Position is represented by a Position value. 115 | // A position is valid if Line > 0. 116 | type position struct { 117 | Filename string // filename, if any 118 | Offset int // byte offset, starting at 0 119 | Line int // line number, starting at 1 120 | Column int // column number, starting at 1 (character count per line) 121 | } 122 | 123 | // IsValid returns true if the position is valid. 124 | func (pos *position) IsValid() bool { return pos.Line > 0 } 125 | 126 | func (pos position) String() string { 127 | s := pos.Filename 128 | if pos.IsValid() { 129 | if s != "" { 130 | s += ":" 131 | } 132 | s += fmt.Sprintf("%d:%d", pos.Line, pos.Column) 133 | } 134 | if s == "" { 135 | s = "???" 136 | } 137 | return s 138 | } 139 | 140 | // A Scanner implements reading of Unicode characters and tokens from an io.Reader. 141 | type scanner struct { 142 | // Input 143 | src io.Reader 144 | 145 | // Source buffer 146 | srcBuf [bufLen + 1]byte // +1 for sentinel for common case of s.next() 147 | srcPos int // reading position (srcBuf index) 148 | srcEnd int // source end (srcBuf index) 149 | 150 | // Source position 151 | srcBufOffset int // byte offset of srcBuf[0] in source 152 | line int // line count 153 | column int // character count 154 | lastLineLen int // length of last line in characters (for correct column reporting) 155 | lastCharLen int // length of last character in bytes 156 | 157 | // Token text buffer 158 | // Typically, token text is stored completely in srcBuf, but in general 159 | // the token text's head may be buffered in tokBuf while the token text's 160 | // tail is stored in srcBuf. 161 | tokBuf bytes.Buffer // token text head that is not in srcBuf anymore 162 | tokPos int // token text tail position (srcBuf index); valid if >= 0 163 | tokEnd int // token text tail end (srcBuf index) 164 | 165 | // One character look-ahead 166 | ch rune // character before current srcPos 167 | 168 | extraTok rune // an extra token accidentally read early 169 | 170 | // Error is called for each error encountered. If no Error 171 | // function is set, the error is reported to os.Stderr. 172 | Error func(s *scanner, msg string) 173 | 174 | // ErrorCount is incremented by one for each error encountered. 175 | ErrorCount int 176 | 177 | // Start position of most recently scanned token; set by Scan. 178 | // Calling Init or Next invalidates the position (Line == 0). 179 | // The Filename field is always left untouched by the Scanner. 180 | // If an error is reported (via Error) and Position is invalid, 181 | // the scanner is not inside a token. Call Pos to obtain an error 182 | // position in that case. 183 | position 184 | } 185 | 186 | // Init initializes a Scanner with a new source and returns s. 187 | // Error is set to nil, ErrorCount is set to 0 188 | func (s *scanner) Init(src io.Reader) *scanner { 189 | s.src = src 190 | 191 | // initialize source buffer 192 | // (the first call to next() will fill it by calling src.Read) 193 | s.srcBuf[0] = utf8.RuneSelf // sentinel 194 | s.srcPos = 0 195 | s.srcEnd = 0 196 | 197 | // initialize source position 198 | s.srcBufOffset = 0 199 | s.line = 1 200 | s.column = 0 201 | s.lastLineLen = 0 202 | s.lastCharLen = 0 203 | 204 | // initialize token text buffer 205 | // (required for first call to next()). 206 | s.tokPos = -1 207 | 208 | // initialize one character look-ahead 209 | s.ch = -1 // no char read yet 210 | 211 | // initialize extra token 212 | s.extraTok = 0 213 | 214 | // initialize public fields 215 | s.Error = nil 216 | s.ErrorCount = 0 217 | s.Line = 0 // invalidate token position 218 | 219 | return s 220 | } 221 | 222 | // next reads and returns the next Unicode character. It is designed such 223 | // that only a minimal amount of work needs to be done in the common ASCII 224 | // case (one test to check for both ASCII and end-of-buffer, and one test 225 | // to check for newlines). 226 | func (s *scanner) next() rune { 227 | // if there's an extra token, return it instead of scanning a new one 228 | if s.extraTok != 0 { 229 | ch := s.extraTok 230 | s.extraTok = 0 231 | return ch 232 | } 233 | 234 | ch, width := rune(s.srcBuf[s.srcPos]), 1 235 | 236 | if ch >= utf8.RuneSelf { 237 | // uncommon case: not ASCII or not enough bytes 238 | for s.srcPos+utf8.UTFMax > s.srcEnd && !utf8.FullRune(s.srcBuf[s.srcPos:s.srcEnd]) { 239 | // not enough bytes: read some more, but first 240 | // save away token text if any 241 | if s.tokPos >= 0 { 242 | s.tokBuf.Write(s.srcBuf[s.tokPos:s.srcPos]) 243 | s.tokPos = 0 244 | // s.tokEnd is set by Scan() 245 | } 246 | // move unread bytes to beginning of buffer 247 | copy(s.srcBuf[0:], s.srcBuf[s.srcPos:s.srcEnd]) 248 | s.srcBufOffset += s.srcPos 249 | // read more bytes 250 | // (an io.Reader must return io.EOF when it reaches 251 | // the end of what it is reading - simply returning 252 | // n == 0 will make this loop retry forever; but the 253 | // error is in the reader implementation in that case) 254 | i := s.srcEnd - s.srcPos 255 | n, err := s.src.Read(s.srcBuf[i:bufLen]) 256 | s.srcPos = 0 257 | s.srcEnd = i + n 258 | s.srcBuf[s.srcEnd] = utf8.RuneSelf // sentinel 259 | if err != nil { 260 | if s.srcEnd == 0 { 261 | if s.lastCharLen > 0 { 262 | // previous character was not EOF 263 | s.column++ 264 | } 265 | s.lastCharLen = 0 266 | return EOF 267 | } 268 | if err != io.EOF { 269 | s.error(err.Error()) 270 | } 271 | // If err == EOF, we won't be getting more 272 | // bytes; break to avoid infinite loop. If 273 | // err is something else, we don't know if 274 | // we can get more bytes; thus also break. 275 | break 276 | } 277 | } 278 | // at least one byte 279 | ch = rune(s.srcBuf[s.srcPos]) 280 | if ch >= utf8.RuneSelf { 281 | // uncommon case: not ASCII 282 | ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd]) 283 | if ch == utf8.RuneError && width == 1 { 284 | // advance for correct error position 285 | s.srcPos += width 286 | s.lastCharLen = width 287 | s.column++ 288 | s.error("illegal UTF-8 encoding") 289 | return ch 290 | } 291 | } 292 | } 293 | 294 | // advance 295 | s.srcPos += width 296 | s.lastCharLen = width 297 | s.column++ 298 | 299 | // special situations 300 | switch ch { 301 | case 0: 302 | // for compatibility with other tools 303 | s.error("illegal character NUL") 304 | case '\n': 305 | s.line++ 306 | s.lastLineLen = s.column 307 | s.column = 0 308 | } 309 | 310 | return ch 311 | } 312 | 313 | // Next reads and returns the next Unicode character. 314 | // It returns EOF at the end of the source. It reports 315 | // a read error by calling s.Error, if not nil; otherwise 316 | // it prints an error message to os.Stderr. Next does not 317 | // update the Scanner's Position field; use Pos() to 318 | // get the current position. 319 | func (s *scanner) Next() rune { 320 | s.tokPos = -1 // don't collect token text 321 | s.Line = 0 // invalidate token position 322 | ch := s.Peek() 323 | s.ch = s.next() 324 | return ch 325 | } 326 | 327 | // Peek returns the next Unicode character in the source without advancing 328 | // the scanner. It returns EOF if the scanner's position is at the last 329 | // character of the source. 330 | func (s *scanner) Peek() rune { 331 | if s.ch < 0 { 332 | // this code is only run for the very first character 333 | s.ch = s.next() 334 | if s.ch == '\uFEFF' { 335 | s.ch = s.next() // ignore BOM 336 | } 337 | } 338 | return s.ch 339 | } 340 | 341 | func (s *scanner) error(msg string) { 342 | s.ErrorCount++ 343 | if s.Error != nil { 344 | s.Error(s, msg) 345 | return 346 | } 347 | pos := s.position 348 | if !pos.IsValid() { 349 | pos = s.Pos() 350 | } 351 | fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg) 352 | } 353 | 354 | func (s *scanner) scanAlphanumeric(ch rune) rune { 355 | for isAlphanumeric(ch) { 356 | ch = s.next() 357 | } 358 | return ch 359 | } 360 | 361 | func (s *scanner) scanGraphic(ch rune) rune { 362 | for isGraphic(ch) { 363 | ch = s.next() 364 | } 365 | return ch 366 | } 367 | 368 | func digitVal(ch rune) int { 369 | switch { 370 | case '0' <= ch && ch <= '9': 371 | return int(ch - '0') 372 | case 'a' <= ch && ch <= 'f': 373 | return int(ch - 'a' + 10) 374 | case 'A' <= ch && ch <= 'F': 375 | return int(ch - 'A' + 10) 376 | } 377 | return 16 // larger than any legal digit val 378 | } 379 | 380 | func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' } 381 | 382 | // True if the rune is a graphic token char per ISO §6.4.2 383 | func isGraphic(ch rune) bool { 384 | return isOneOf(ch, `#$&*+-./:<=>?@^\~`) 385 | } 386 | 387 | // ISO §6.5.2 "alphanumeric char" extended to Unicode 388 | func isAlphanumeric(ch rune) bool { 389 | if ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) { 390 | return true 391 | } 392 | return false 393 | } 394 | 395 | // true if the rune is a valid start for a variable 396 | func isVariableStart(ch rune) bool { 397 | return ch == '_' || unicode.IsUpper(ch) 398 | } 399 | 400 | func isOneOf(ch rune, chars string) bool { 401 | for _, allowed := range chars { 402 | if ch == allowed { 403 | return true 404 | } 405 | } 406 | return false 407 | } 408 | 409 | func isSolo(ch rune) bool { return ch == '!' || ch == ';' } 410 | 411 | func (s *scanner) scanMantissa(ch rune) rune { 412 | for isDecimal(ch) { 413 | ch = s.next() 414 | } 415 | return ch 416 | } 417 | 418 | func (s *scanner) scanFraction(ch rune) rune { 419 | if ch == '.' { 420 | ch = s.scanMantissa(s.next()) 421 | } 422 | return ch 423 | } 424 | 425 | func (s *scanner) scanExponent(ch rune) rune { 426 | if ch == 'e' || ch == 'E' { 427 | ch = s.next() 428 | if ch == '-' || ch == '+' { 429 | ch = s.next() 430 | } 431 | ch = s.scanMantissa(ch) 432 | } 433 | return ch 434 | } 435 | 436 | func (s *scanner) scanNumber(ch rune) (rune, rune, rune) { 437 | // isDecimal(ch) 438 | if ch == '0' { 439 | // int or float 440 | ch = s.next() 441 | switch ch { 442 | case 'x', 'X': 443 | // hexadecimal int 444 | ch = s.next() 445 | hasMantissa := false 446 | for digitVal(ch) < 16 { 447 | ch = s.next() 448 | hasMantissa = true 449 | } 450 | if !hasMantissa { 451 | s.error("illegal hexadecimal number") 452 | } 453 | case '\'': 454 | ch = s.next() 455 | if ch == '\\' { 456 | ch = s.scanEscape('\'') 457 | } else { 458 | ch = s.next() 459 | } 460 | default: 461 | // octal int or float 462 | has8or9 := false 463 | for isDecimal(ch) { 464 | if ch > '7' { 465 | has8or9 = true 466 | } 467 | ch = s.next() 468 | } 469 | if ch == '.' || ch == 'e' || ch == 'E' { 470 | // float 471 | ch = s.scanFraction(ch) 472 | ch = s.scanExponent(ch) 473 | return Float, ch, 0 474 | } 475 | // octal int 476 | if has8or9 { 477 | s.error("illegal octal number") 478 | } 479 | } 480 | return Int, ch, 0 481 | } 482 | // decimal int or float 483 | ch = s.scanMantissa(ch) 484 | if ch == 'e' || ch == 'E' { // float 485 | ch = s.scanExponent(ch) 486 | return Float, ch, 0 487 | } 488 | if ch == '.' { 489 | ch = s.next() 490 | if isDecimal(ch) { 491 | ch = s.scanMantissa(ch) 492 | ch = s.scanExponent(ch) 493 | return Float, ch, 0 494 | } 495 | return Int, ch, FullStop 496 | } 497 | return Int, ch, 0 498 | } 499 | 500 | func (s *scanner) scanDigits(ch rune, base, n int) rune { 501 | for n > 0 && digitVal(ch) < base { 502 | ch = s.next() 503 | n-- 504 | } 505 | if n > 0 { 506 | s.error("illegal char escape") 507 | } 508 | return ch 509 | } 510 | 511 | func (s *scanner) scanEscape(quote rune) rune { 512 | ch := s.next() // read character after '/' 513 | switch ch { 514 | case 'a', 'b', 'f', 'n', 'r', 's', 't', 'v', '\\', quote: 515 | // nothing to do 516 | ch = s.next() 517 | case '0', '1', '2', '3', '4', '5', '6', '7': 518 | ch = s.scanDigits(ch, 8, 3) 519 | case 'x': 520 | ch = s.scanDigits(s.next(), 16, 2) 521 | case 'u': 522 | ch = s.scanDigits(s.next(), 16, 4) 523 | case 'U': 524 | ch = s.scanDigits(s.next(), 16, 8) 525 | default: 526 | s.error("illegal char escape") 527 | } 528 | return ch 529 | } 530 | 531 | func (s *scanner) scanString(quote rune) (n int) { 532 | ch := s.next() // read character after quote 533 | for ch != quote { 534 | if ch == '\n' || ch < 0 { 535 | s.error("literal not terminated") 536 | return 537 | } 538 | if ch == '\\' { 539 | ch = s.scanEscape(quote) 540 | } else { 541 | ch = s.next() 542 | } 543 | n++ 544 | } 545 | return 546 | } 547 | 548 | func (s *scanner) scanComment(ch rune) rune { 549 | // ch == '%' || ch == '*' 550 | if ch == '%' { 551 | // line comment 552 | ch = s.next() // read character after "%" 553 | for ch != '\n' && ch >= 0 { 554 | ch = s.next() 555 | } 556 | return ch 557 | } 558 | 559 | // general comment. See Note1 560 | depth := 1 561 | ch = s.next() // read character after "/*" 562 | for depth > 0 { 563 | if ch < 0 { 564 | s.error("comment not terminated") 565 | break 566 | } 567 | ch0 := ch 568 | ch = s.next() 569 | if ch0 == '*' && ch == '/' { 570 | ch = s.next() 571 | depth-- 572 | } else if ch0 == '/' && ch == '*' { 573 | ch = s.next() 574 | depth++ 575 | } 576 | } 577 | return ch 578 | } 579 | 580 | // Note1: Nested comments are prohibited by ISO Prolog §6.4.1. To wit, 581 | // "The comment text of a bracketed comment shall not contain the comment 582 | // close sequence." However, nested comments are ridiculously practical 583 | // during debugging and development, so I've chosen to deviate by being 584 | // more permissive than is strictly allowed. SWI-Prolog does the same thing. 585 | 586 | // Scan reads the next token or Unicode character from source and returns it. 587 | // It returns EOF at the end of the source. It reports scanner errors (read and 588 | // token errors) by calling s.Error, if not nil; otherwise it prints an error 589 | // message to os.Stderr. 590 | func (s *scanner) Scan() rune { 591 | ch := s.Peek() 592 | 593 | // reset token text position 594 | s.tokPos = -1 595 | s.Line = 0 596 | 597 | // skip white space 598 | for unicode.IsSpace(ch) { 599 | ch = s.next() 600 | } 601 | 602 | // start collecting token text 603 | s.tokBuf.Reset() 604 | s.tokPos = s.srcPos - s.lastCharLen 605 | 606 | // set token position 607 | // (this is a slightly optimized version of the code in Pos()) 608 | s.Offset = s.srcBufOffset + s.tokPos 609 | if s.column > 0 { 610 | // common case: last character was not a '\n' 611 | s.Line = s.line 612 | s.Column = s.column 613 | } else { 614 | // last character was a '\n' 615 | // (we cannot be at the beginning of the source 616 | // since we have called next() at least once) 617 | s.Line = s.line - 1 618 | s.Column = s.lastLineLen 619 | } 620 | 621 | // determine token value 622 | tok := ch 623 | switch { 624 | case ch == '/': // '/' can start a comment or an atom 625 | ch = s.next() 626 | if ch == '*' { 627 | ch = s.scanComment(ch) 628 | tok = Comment 629 | } else { 630 | tok = Atom 631 | ch = s.scanGraphic(ch) 632 | if ch == '(' { 633 | tok = Functor 634 | } 635 | } 636 | case isGraphic(ch): 637 | ch = s.next() 638 | tok = Atom 639 | ch = s.scanGraphic(ch) 640 | if ch == '(' { 641 | tok = Functor 642 | } 643 | case isSolo(ch): 644 | tok = Atom 645 | ch = s.next() 646 | case unicode.IsLower(ch): // name by "letter digit token" rule §6.4.2 w/ Unicode 647 | tok = Atom 648 | ch = s.next() 649 | ch = s.scanAlphanumeric(ch) 650 | if ch == '(' { 651 | tok = Functor 652 | } 653 | case isVariableStart(ch): 654 | tok = Variable 655 | ch = s.next() 656 | ch = s.scanAlphanumeric(ch) // variables look like atoms after the start 657 | case isDecimal(ch): 658 | var extraTok rune 659 | tok, ch, extraTok = s.scanNumber(ch) 660 | if extraTok != 0 { 661 | s.extraTok = extraTok 662 | } 663 | default: 664 | switch ch { 665 | case '"': 666 | s.scanString('"') 667 | tok = String 668 | ch = s.next() 669 | case '\'': 670 | s.scanString('\'') 671 | tok = Atom 672 | ch = s.next() 673 | if ch == '(' { 674 | tok = Functor 675 | } 676 | case '%': 677 | ch = s.scanComment(ch) 678 | tok = Comment 679 | default: 680 | ch = s.next() 681 | } 682 | } 683 | 684 | // end of token text 685 | s.tokEnd = s.srcPos - s.lastCharLen 686 | 687 | s.ch = ch 688 | 689 | // last minute specializations 690 | switch tok { 691 | case Atom: 692 | switch s.TokenText() { 693 | case ".": 694 | return FullStop 695 | } 696 | case Variable: 697 | switch s.TokenText() { 698 | case "_": 699 | return Void 700 | } 701 | } 702 | return tok 703 | } 704 | 705 | // Pos returns the position of the character immediately after 706 | // the character or token returned by the last call to Next or Scan. 707 | func (s *scanner) Pos() (pos position) { 708 | pos.Filename = s.Filename 709 | pos.Offset = s.srcBufOffset + s.srcPos - s.lastCharLen 710 | switch { 711 | case s.column > 0: 712 | // common case: last character was not a '\n' 713 | pos.Line = s.line 714 | pos.Column = s.column 715 | case s.lastLineLen > 0: 716 | // last character was a '\n' 717 | pos.Line = s.line - 1 718 | pos.Column = s.lastLineLen 719 | default: 720 | // at the beginning of the source 721 | pos.Line = 1 722 | pos.Column = 1 723 | } 724 | return 725 | } 726 | 727 | // TokenText returns the string corresponding to the most recently scanned token. 728 | // Valid after calling Scan(). 729 | func (s *scanner) TokenText() string { 730 | if s.tokPos < 0 { 731 | // no token text 732 | return "" 733 | } 734 | 735 | if s.tokEnd < 0 { 736 | // if EOF was reached, s.tokEnd is set to -1 (s.srcPos == 0) 737 | s.tokEnd = s.tokPos 738 | } 739 | 740 | if s.tokBuf.Len() == 0 { 741 | // common case: the entire token text is still in srcBuf 742 | return string(s.srcBuf[s.tokPos:s.tokEnd]) 743 | } 744 | 745 | // part of the token text was saved in tokBuf: save the rest in 746 | // tokBuf as well and return its content 747 | s.tokBuf.Write(s.srcBuf[s.tokPos:s.tokEnd]) 748 | s.tokPos = s.tokEnd // ensure idempotency of TokenText() call 749 | return s.tokBuf.String() 750 | } 751 | -------------------------------------------------------------------------------- /ut.go: -------------------------------------------------------------------------------- 1 | package ut 2 | 3 | import ( 4 | "log" 5 | "strings" 6 | ) 7 | 8 | var ( 9 | // VAR stands for variable type 10 | VAR = func(t rune) bool { 11 | return t == Variable 12 | } 13 | 14 | // STR stands for constants or composite terms 15 | STR = func(t rune) bool { 16 | return t == Atom || 17 | t == Float || 18 | t == Int || 19 | t == String || 20 | t == Void || 21 | t == Functor 22 | } 23 | ) 24 | 25 | type ( 26 | // Entry stands for Unification Table Entry 27 | Entry struct { 28 | Term string 29 | Functor string 30 | Components []int 31 | Type rune 32 | } 33 | 34 | // UT stands for Unification Table 35 | UT struct { 36 | // Lookup table (term) -> (index) 37 | Lookup map[string]int 38 | Entries []*Entry 39 | Bindings map[int]int 40 | } 41 | ) 42 | 43 | // Arity is the arity of the term; for variables and constants, it is 0. 44 | func (e *Entry) Arity() int { 45 | if e.Type != Functor { 46 | return 0 47 | } 48 | 49 | return len(e.Components) 50 | } 51 | 52 | // Unify returns a unification maps with VAR bindings. 53 | // Also see ut.MGU for particular terms. 54 | func Unify(x, y string) map[string]string { 55 | tokens := Tokenize(x, y) 56 | ut := New(tokens) 57 | ix, iy := ut.Lookup[x], ut.Lookup[y] 58 | if !ut.Unify(ix, iy) { 59 | return nil 60 | } 61 | 62 | mgu := make(map[string]string) 63 | for i, j := range ut.Bindings { 64 | j = ut.dereference(j) 65 | mgu[ut.Entries[i].Term] = ut.termString(j) 66 | } 67 | 68 | return mgu 69 | } 70 | 71 | // New creates a new Unification Table. 72 | func New(tokens []*Token) (ut *UT) { 73 | ut = &UT{Lookup: make(map[string]int), Bindings: make(map[int]int)} 74 | 75 | for i, n := 0, len(tokens)-1; n >= 0; n-- { 76 | t := tokens[n] 77 | if _, exists := ut.Lookup[t.Term]; exists { 78 | continue 79 | } 80 | 81 | e := &Entry{ 82 | Term: t.Term, 83 | Functor: t.Functor, 84 | Type: t.Type, 85 | } 86 | 87 | for _, c := range t.Components { 88 | idx, exists := ut.Lookup[c] 89 | if !exists { 90 | log.Fatalf("Component: %s not found", c) 91 | } 92 | e.Components = append(e.Components, idx) 93 | } 94 | 95 | ut.Lookup[t.Term] = i 96 | ut.Entries = append(ut.Entries, e) 97 | i++ 98 | } 99 | 100 | return ut 101 | } 102 | 103 | // Unify tries to calculate MGU (Most General Unifier) 104 | func (ut *UT) Unify(ix, iy int) bool { 105 | var ( 106 | // stacks 107 | sx, sy = []int{ix}, []int{iy} 108 | empty = func(s []int) bool { 109 | return len(s) == 0 110 | } 111 | pop = func(s []int) (int, []int) { 112 | n := len(s) 113 | if n > 0 { 114 | i := s[n-1] 115 | s = s[:n-1] 116 | 117 | return i, s 118 | } 119 | 120 | return 0, nil 121 | } 122 | 123 | push = func(s []int, i ...int) []int { 124 | return append(s, i...) 125 | } 126 | ) 127 | 128 | for !empty(sx) && !empty(sy) { 129 | // pop entries from stacks 130 | ix, sx = pop(sx) 131 | iy, sy = pop(sy) 132 | ex, ey := ut.Entries[ix], ut.Entries[iy] 133 | 134 | switch true { 135 | // case 1: ex is bound to a term and ey is bound to a term} 136 | case STR(ex.Type) && STR(ey.Type): 137 | ax, ay := ex.Arity(), ey.Arity() 138 | if ex.Functor != ey.Functor || ax != ay { 139 | return false 140 | } 141 | 142 | if ax > 0 { 143 | sx = push(sx, ex.Components...) 144 | sy = push(sy, ey.Components...) 145 | } 146 | 147 | // case 2: ex is bound to a term and ey is bound to a variable 148 | case STR(ex.Type) && VAR(ey.Type): 149 | if idx, _, b := ut.bindSTR(ix, iy); !b { 150 | sx = push(sx, ix) 151 | sy = push(sy, idx) 152 | } 153 | 154 | // case 3: ex is bound to a variable and ey is bound to a term 155 | case VAR(ex.Type) && STR(ey.Type): 156 | if idx, _, b := ut.bindSTR(iy, ix); !b { 157 | sx = push(sx, idx) 158 | sy = push(sy, iy) 159 | } 160 | 161 | // case 4: ex is bound to a variable and ey is bound to a variable 162 | case VAR(ex.Type) && VAR(ey.Type): 163 | if idx1, idx2, b := ut.bindVAR(ix, iy); !b { 164 | sx = push(sx, idx1) 165 | sy = push(sy, idx2) 166 | } 167 | } 168 | } 169 | 170 | return true 171 | } 172 | 173 | // MGU returns The Most General Unifier as a string for a given term. 174 | // It dereferences bindings and term componenets. 175 | func (ut *UT) MGU(term string) string { 176 | i, ok := ut.Lookup[term] 177 | if !ok { 178 | return "" 179 | } 180 | 181 | i = ut.dereference(ut.Bindings[i]) 182 | return ut.termString(i) 183 | } 184 | 185 | // termString constructs a new term string by dereferencing all components. 186 | func (ut *UT) termString(idx int) string { 187 | if idx >= len(ut.Entries) { 188 | return "" 189 | } 190 | 191 | e := ut.Entries[idx] 192 | if e.Type != Functor { 193 | return e.Functor 194 | } 195 | 196 | components := []string{} 197 | for _, c := range e.Components { 198 | i := ut.dereference(c) 199 | if i != idx && ut.Entries[i].Type == Functor { 200 | components = append(components, ut.termString(i)) 201 | } else { 202 | components = append(components, ut.Entries[i].Functor) 203 | } 204 | } 205 | 206 | return ut.Entries[idx].Functor + "(" + strings.Join(components, ",") + ")" 207 | } 208 | 209 | // dereference follows bindings and returns index for dereferenced variable. 210 | func (ut *UT) dereference(idx int) int { 211 | i, ok := idx, true 212 | for ok { 213 | i, ok = ut.Bindings[i] 214 | if ok { 215 | idx = i 216 | } 217 | } 218 | return idx 219 | } 220 | 221 | // bindSTR tries to bind a VAR(varIdx) to STR(strIdx). 222 | // If VAR(varIdx) is already bound then dereference it and try again 223 | // or returns indexes to push them on stacks 224 | func (ut *UT) bindSTR(strIdx, varIdx int) (int, int, bool) { 225 | idx, ok := ut.Bindings[varIdx] 226 | if !ok { 227 | // var is a free variable 228 | // bind var to str 229 | ut.Bindings[varIdx] = strIdx 230 | return strIdx, varIdx, true 231 | } 232 | 233 | // var is already bound - dereference 234 | idx = ut.dereference(idx) 235 | 236 | // var is bound to a STR 237 | e := ut.Entries[idx] 238 | if STR(e.Type) { 239 | return idx, varIdx, false 240 | } 241 | 242 | // free variable 243 | ut.Bindings[varIdx] = idx 244 | return idx, varIdx, true 245 | } 246 | 247 | // bindVAR tries to bind two VARs. 248 | // If both are already bound the function returns indexes to push them on stacks 249 | // and false as boolean information that binding failed. 250 | func (ut *UT) bindVAR(varIdx1, varIdx2 int) (int, int, bool) { 251 | i1, ok1 := ut.Bindings[varIdx1] 252 | i2, ok2 := ut.Bindings[varIdx2] 253 | 254 | // var1 is free and var2 is free 255 | if !ok1 && !ok2 { 256 | ut.Bindings[varIdx1] = varIdx2 257 | return varIdx1, varIdx2, true 258 | } 259 | 260 | // var1 is free and var2 is bound 261 | if !ok1 && ok2 { 262 | ut.Bindings[varIdx1] = varIdx2 263 | return varIdx1, varIdx2, true 264 | } 265 | 266 | // var1 is bound and var2 us free 267 | if ok1 && !ok2 { 268 | ut.Bindings[varIdx2] = varIdx1 269 | return varIdx1, varIdx2, true 270 | } 271 | 272 | // var1 is bound and var2 is bound 273 | return i1, i2, false 274 | } 275 | -------------------------------------------------------------------------------- /ut_test.go: -------------------------------------------------------------------------------- 1 | package ut 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestTokenize(t *testing.T) { 9 | x, y := "p(f(X),h(Y,f(a)),Y)", "p(X,h(Z,Y),f(a))" 10 | n, ix, iy := 15, 0, 8 11 | tokens := Tokenize(x, y) 12 | 13 | if len(tokens) != n { 14 | t.Fatalf("len(tokens) Got: %d, Expected: %d", len(tokens), n) 15 | } 16 | 17 | if x != tokens[ix].Term { 18 | t.Fatalf("tokens[%d] Got: %s, Expected: %s", ix, tokens[ix].Term, x) 19 | } 20 | 21 | if y != tokens[iy].Term { 22 | t.Fatalf("tokens[%d] Got: %s, Expected: %s", iy, tokens[iy].Term, y) 23 | } 24 | } 25 | 26 | func TestUT(t *testing.T) { 27 | x, y := "p(Z,h(Z,W),f(W))", "p(f(X),h(Y,f(a)),Y)" 28 | n, ix, iy := 12, 11, 6 29 | 30 | ut := New(Tokenize(x, y)) 31 | if len(ut.Entries) != n { 32 | for i, e := range ut.Entries { 33 | t.Logf("[%d]: %s\n", i, e.Term) 34 | } 35 | t.Fatalf("len(ut.Entries) Got: %d, Expected: %d", len(ut.Entries), n) 36 | } 37 | 38 | if ut.Lookup[x] != ix { 39 | t.Fatalf("ut.Lookup[%s] Got: %v, Expected: %d", x, ut.Lookup[x], ix) 40 | } 41 | 42 | if ut.Lookup[y] != iy { 43 | t.Fatalf("ut.Lookup[%s] Got: %v, Expected: %d", y, ut.Lookup[y], iy) 44 | } 45 | } 46 | 47 | func TestUnify(t *testing.T) { 48 | x, y := "p(Z,h(Z,W),f(W))", "p(f(X),h(Y,f(a)),Y)" 49 | ut := New(Tokenize(x, y)) 50 | ix, iy := ut.Lookup[x], ut.Lookup[y] 51 | if !ut.Unify(ix, iy) { 52 | t.Fatalf("ut.Unify(%d, %d) failed", ix, iy) 53 | } 54 | 55 | mguW := ut.MGU("W") 56 | if mguW != "f(a)" { 57 | t.Fatalf("Got W => %s Expected: f(a)", mguW) 58 | } 59 | 60 | mguX := ut.MGU("X") 61 | if mguX != "f(a)" { 62 | t.Fatalf("Got X => %s Expected: f(a)", mguX) 63 | } 64 | 65 | mguY := ut.MGU("Y") 66 | if mguY != "f(f(a))" { 67 | t.Fatalf("Got Y => %s Expected: f(f(a))", mguY) 68 | } 69 | 70 | mguZ := ut.MGU("Z") 71 | if mguZ != "f(f(a))" { 72 | t.Fatalf("Got Z => %s Expected: f(f(a))", mguZ) 73 | } 74 | } 75 | 76 | func ExampleUnify() { 77 | x, y := "f(X1,g(X2,X3),X2,b)", "f(g(h(a,X5),X2),X1,h(a,X4),X4)" 78 | mgu := Unify(x, y) 79 | fmt.Println("X1 = " + mgu["X1"]) 80 | fmt.Println("X2 = " + mgu["X2"]) 81 | fmt.Println("X3 = " + mgu["X3"]) 82 | fmt.Println("X4 = " + mgu["X4"]) 83 | fmt.Println("X5 = " + mgu["X5"]) 84 | 85 | // Output: 86 | // X1 = g(h(a,b),h(a,b)) 87 | // X2 = h(a,b) 88 | // X3 = h(a,b) 89 | // X4 = b 90 | // X5 = b 91 | } 92 | --------------------------------------------------------------------------------