├── TODO ├── MANIFEST.in ├── finenight ├── lisp │ ├── fsa-dat.lisp │ ├── finenight.lisp │ ├── tests.lisp │ ├── TODO.txt │ ├── test.lisp │ ├── compile-debug.lisp │ ├── compile.lisp │ ├── iadfa-run.lisp │ ├── edge-test.lisp │ ├── utils-test.lisp │ ├── package.lisp │ ├── fsa-test.lisp │ ├── edge.lisp │ ├── node.lisp │ ├── utils.lisp │ ├── fsc-test.lisp │ ├── fsa.lisp │ ├── fsc.lisp │ ├── iadfa-test.lisp │ ├── fsa-builder.lisp │ ├── CLUnit.lisp │ └── iadfa.lisp ├── python │ ├── __init__.py │ ├── iadfaTest.py │ ├── fsaTest2.py │ ├── error.py │ ├── nameGenerator.py │ ├── fstTest.py │ ├── test.py │ ├── transition.py │ ├── state.py │ ├── et.py │ ├── fscTest.py │ ├── utils.py │ ├── fst.py │ ├── iadfa.py │ ├── possibleStates.py │ ├── recognize │ └── fsc.py ├── CHANGES ├── scheme │ ├── utils-scm-test.scm │ ├── iadfa-run.scm │ ├── test.scm │ ├── edges-test.scm │ ├── plt-comp.scm │ ├── finenight.setup │ ├── node.scm │ ├── finenight.meta │ ├── utils-scm.scm │ ├── SConstruct │ ├── edges.scm │ ├── iadfa-test.scm │ ├── fsa-test.scm │ ├── defstruct.scm │ ├── fsa.scm │ ├── sort.scm │ ├── fsa2.scm │ ├── fsa-builder.scm │ ├── iadfa.scm │ └── chicken.py ├── ChangeLog └── TODO ├── .hgignore ├── ChangeLog ├── CHANGES ├── setup.py ├── LICENSE.TXT ├── DISCUTIONS ├── data └── test.dico └── README.md /TODO: -------------------------------------------------------------------------------- 1 | Python: 2 | - We need to start the FST implementation. 3 | 4 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | ;recursive-include data/ * 2 | setup.py 3 | recursive-include finenight/src * 4 | -------------------------------------------------------------------------------- /finenight/lisp/fsa-dat.lisp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jpbarrette/moman/HEAD/finenight/lisp/fsa-dat.lisp -------------------------------------------------------------------------------- /finenight/python/__init__.py: -------------------------------------------------------------------------------- 1 | #__all__ = ["error", "fsa", "et", "fsc", "fst", "iadfa", "state", "transition"] 2 | -------------------------------------------------------------------------------- /finenight/CHANGES: -------------------------------------------------------------------------------- 1 | Changes from version 0.1 to version 0.2: 2 | - Added transposition operations for Fast String Correction (fsc.py) 3 | - Added utils.lisp file. 4 | -------------------------------------------------------------------------------- /finenight/lisp/finenight.lisp: -------------------------------------------------------------------------------- 1 | (load "package") 2 | (load "utils") 3 | (load "fsa") 4 | (load "fsa-builder") 5 | (load "iadfa") 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /finenight/python/iadfaTest.py: -------------------------------------------------------------------------------- 1 | from iadfa import IncrementalAdfa 2 | 3 | f = ["append", "appendice", "bappend"] 4 | 5 | fsa = IncrementalAdfa(f, sorted = True) 6 | fsa.graphVizExport("test.dot") 7 | -------------------------------------------------------------------------------- /finenight/scheme/utils-scm-test.scm: -------------------------------------------------------------------------------- 1 | (require-extension utils-scm) 2 | (require-extension check) 3 | 4 | (some (lambda (lhs rhs) 5 | (if (> 5 lhs) 6 | rhs 7 | #f)) 8 | '(1 2 3 4 5 6 7) 9 | '(a b c d e f g)) -------------------------------------------------------------------------------- /finenight/lisp/tests.lisp: -------------------------------------------------------------------------------- 1 | (require :com.rrette.finenight.fsa :fsa) 2 | 3 | (load "node-test.lisp") 4 | (load "edge-test.lisp") 5 | (load "fsa-test.lisp") 6 | (load "fsc-test.lisp") 7 | (load "utils-test.lisp") 8 | (load :state-test) 9 | 10 | (run-all-tests) -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | (^|/)\.svn($|/) 2 | (^|/)\.hg($|/) 3 | (^|/)\.hgtags($|/) 4 | ^curlpp-svn.log$ 5 | ^tailor.state$ 6 | ^tailor.state.old$ 7 | ^tailor.state.journal$ 8 | ^.*\.ppcf$ 9 | ^.*\.fasl$ 10 | ^.*~$ 11 | ^.*\.orig$ 12 | ^\.DS_Store$ 13 | .*\.dot$ 14 | ^#.*#$ 15 | -------------------------------------------------------------------------------- /finenight/lisp/TODO.txt: -------------------------------------------------------------------------------- 1 | iadfa.lisp: 2 | - We could set finals by appending at the end of the list the final states instead of appending 3 | in the front. Then the first state would be the final state without any edges out. This state 4 | is probably the one equivalent. 5 | -------------------------------------------------------------------------------- /finenight/scheme/iadfa-run.scm: -------------------------------------------------------------------------------- 1 | ;(require-extension check) 2 | ;(require-extension iadfa) 3 | (include "iadfa.scm") 4 | 5 | (define my-fsa (gen-iadfa-from-file "com.zone.sorted.small")) 6 | 7 | (display "done") 8 | (newline) 9 | (display (fsa-start-node my-fsa)) 10 | 11 | 12 | -------------------------------------------------------------------------------- /finenight/scheme/test.scm: -------------------------------------------------------------------------------- 1 | (define my-hash (make-hash-table)) 2 | 3 | (display (hash-table-ref my-hash 4 | #\b 5 | (lambda () (quote ())))) 6 | 7 | (hash-table-set! 8 | my-hash 9 | #\b 10 | (append (list ) '(b))) 11 | 12 | (display (hash-table-ref my-hash #\b)) 13 | -------------------------------------------------------------------------------- /finenight/scheme/edges-test.scm: -------------------------------------------------------------------------------- 1 | (require-extension check) 2 | 3 | (declare (uses edges)) 4 | 5 | (load "edges.scm") 6 | 7 | 8 | (define my-edges (list (cons 'a (list (cons 'b 'b))) (cons 'a (list (cons 'c 'c))))) 9 | 10 | (check (transition my-edges 'a 'b) => (list 'b)) 11 | 12 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | Jean-Philippe Barrette-LaPierre (13 April 2005) version 0.1.0 2 | - The recognize script now uses [schulz02fast] algorithm 3 | by default. 4 | 5 | Jean-Philippe Barrette-LaPierre (07 April 2005) version 0.0.0 6 | - Reorganized the files hierarchy. 7 | - Fixed the Fast String Correction algorithm. 8 | - setup.py added. 9 | 10 | -------------------------------------------------------------------------------- /finenight/lisp/test.lisp: -------------------------------------------------------------------------------- 1 | (find-all-symbols :com.rrette.finenight.utils) 2 | (find-all-symbols "common-lisp") 3 | (list-all-packages) 4 | 5 | (do-symbols (s :com.rrette.finenight.utils) 6 | (format nil "~A ~%" s)) 7 | 8 | (let ((syms '())) 9 | (do-symbols (s :com.rrette.finenight.iadfa) 10 | (setf syms (cons s syms))) 11 | syms) 12 | -------------------------------------------------------------------------------- /finenight/python/fsaTest2.py: -------------------------------------------------------------------------------- 1 | from iadfa import * 2 | import pdb 3 | from pprint import pprint 4 | 5 | 6 | def test(): 7 | dfa = IncrementalAdfa(["aient", 8 | "ais", 9 | "ait", 10 | "ant"]) 11 | print(dfa) 12 | 13 | 14 | if __name__ == "__main__": 15 | test() 16 | -------------------------------------------------------------------------------- /finenight/lisp/compile-debug.lisp: -------------------------------------------------------------------------------- 1 | ;(declaim (optimize (speed 3) (space 3) (debug 0))) 2 | (declaim (optimize (speed 0) (space 0) (compilation-speed 0) (debug 3))) 3 | 4 | (compile-file "package.lisp") 5 | (load "package") 6 | (compile-file "utils.lisp") 7 | (compile-file "fsa.lisp") 8 | (compile-file "fsa-builder.lisp") 9 | (compile-file "iadfa.lisp") 10 | (compile-file "iadfa-run.lisp") 11 | -------------------------------------------------------------------------------- /finenight/lisp/compile.lisp: -------------------------------------------------------------------------------- 1 | (declaim (optimize (safety 0) (speed 3) (space 3) (debug 0))) 2 | ;(declaim (optimize (speed 0) (space 0) (compilation-speed 0) (debug 3))) 3 | 4 | (compile-file "package.lisp") 5 | (load "package") 6 | (compile-file "utils.lisp") 7 | (compile-file "fsa.lisp") 8 | (compile-file "fsa-builder.lisp") 9 | (compile-file "iadfa.lisp") 10 | (compile-file "iadfa-run.lisp") 11 | 12 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | Changes from version 0.2.0 to 0.2.1: 2 | - Adding Fast string correction to lisp code 3 | 4 | Changes from version 0.1.0 to 0.1.1: 5 | - The recognize script now uses [schulz02fast] algorithm 6 | by default. 7 | - The Levenshtein automaton [oflazer96errortolerant] algorithm 8 | is added. 9 | 10 | Changes from version 0.0.0 to 0.1.0: 11 | - Reorganized the files hierarchy. 12 | - Fixed the Fast String Correction algorithm. 13 | - setup.py added. 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from distutils.core import setup 4 | 5 | setup(name='Moman', 6 | version='0.2.1', 7 | description='A tools suite for orthographic/grammatical check', 8 | author='Jean-Philippe Barrette-LaPierre', 9 | author_email='jpb_NO_SPAM@rrette.com', 10 | url='http://rrette.com/moman.html', 11 | packages=['finenight'], 12 | package_dir={'finenight': 'finenight/python/'}, 13 | scripts=['finenight/python/recognize'] 14 | ) 15 | -------------------------------------------------------------------------------- /finenight/scheme/plt-comp.scm: -------------------------------------------------------------------------------- 1 | (define hash-table-exists? 2 | (lambda (table key) 3 | (hash-table-ref table key #f))) 4 | 5 | (define hash-table-values 6 | (lambda (table) 7 | (hash-table-map table (lambda (key value) value)))) 8 | 9 | (define hash-table-set! hash-table-put!) 10 | (define hash-table-ref hash-table-get) 11 | 12 | (define hash-table-keys 13 | (lambda (table) 14 | (hash-table-map table (lambda (key value) key)))) 15 | 16 | ;(define node-final-set! set-node-final!) 17 | 18 | -------------------------------------------------------------------------------- /finenight/scheme/finenight.setup: -------------------------------------------------------------------------------- 1 | ;(compile -s -O3 -inline -profile utils-scm.scm) 2 | ;(compile -s -O3 -inline -profile fsa.scm) 3 | ;(compile -s -O3 -inline -profile fsa-builder.scm) 4 | ;(compile -s -O3 -inline -profile iadfa.scm) 5 | (compile -O3 -inline -block -profile iadfa-run.scm) 6 | (install-program 7 | ; Name of your extension: 8 | 'finenight 9 | ; Files to install for your extension: 10 | '("iadfa-run") 11 | ; Assoc list with properties for your extension: 12 | '((version 1.2) 13 | (documentation "fsa.html"))) 14 | -------------------------------------------------------------------------------- /finenight/ChangeLog: -------------------------------------------------------------------------------- 1 | Jean-Philippe Barrette-LaPierre (19 Mars 2006) version 0.2.0 2 | - (lisp) a node is now epsilon aware. 3 | - (lisp) added transition function for nodes. 4 | - (lisp) fixed graphviz export. 5 | 6 | Jean-Philippe Barrette-LaPierre (5 September 2005) version 0.2.0 7 | - Added some Fast string correction to lisp code. 8 | 9 | Jean-Philippe Barrette-LaPierre (17 April 2005) version 0.1.0 10 | - Added utils.lisp file. 11 | 12 | Jean-Philippe Barrette-LaPierre (13 April 2005) version 0.1.0 13 | - Added transposition operations for Fast String Correction (fsc.py) 14 | -------------------------------------------------------------------------------- /finenight/scheme/node.scm: -------------------------------------------------------------------------------- 1 | ;; the node consists of a label and a map a symbol to 2 | ;; a destination object. 3 | (define-record node 4 | label 5 | input-map) 6 | 7 | 8 | (define node-add-edge! 9 | (lambda (node edge) 10 | (set! (cons (destination-node edge) 11 | (hash-table-href (node-input-map node) 12 | (input-symbol edge) 13 | '()))))) 14 | 15 | 16 | ;; will return the list of destination nodes for the 17 | ;; given node. 18 | (define node-transition 19 | (lambda (node symbol) 20 | (hash-table-ref (node-input-map node) symbol '()))) 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /finenight/lisp/iadfa-run.lisp: -------------------------------------------------------------------------------- 1 | 2 | (load "finenight") 3 | 4 | (in-package :com.rrette.finenight.iadfa) 5 | 6 | (defun iadfa-run () 7 | (let* ((iadfa (gen-iadfa-from-file "../../data/com.zone.sorted.small")) 8 | (fsa (iadfa-fsa iadfa))) 9 | (print-stats iadfa))) 10 | ;(with-open-file (str "output.txt" :direction :output :if-exists :supersede) 11 | ;(dolist (word (extract-words fsa)) 12 | ;(format str "~A~%" word))))) 13 | 14 | (iadfa-run) 15 | 16 | ;(do-symbols (s (find-package :com.rrette.finenight.iadfa)) 17 | ;(sb-ext:save-lisp-and-die "iadfa-run" :executable t :toplevel #'iadfa-run) 18 | 19 | 20 | -------------------------------------------------------------------------------- /finenight/TODO: -------------------------------------------------------------------------------- 1 | General: 2 | - For better unit tests for iadfa generation, we should check: 3 | - If we got non-final leafs nodes. 4 | - If we got non-reachable nodes (usually it means that we got 5 | deleted nodes still in the FSA). 6 | - If the reversed FSA is not deterministic. If it's the case 7 | it means that the suffixes weren't collapsed correctly. 8 | 9 | Python: 10 | - Implement a faster fsa minimization algorithm. 11 | - Complete the IncrementalADFA class. 12 | - Implement a 2-degres levenshtein-automata algorithm. 13 | 14 | Lisp: 15 | - Check if the "I can add the same edge twice" behavior 16 | is correct. 17 | 18 | -------------------------------------------------------------------------------- /finenight/python/error.py: -------------------------------------------------------------------------------- 1 | class Error(Exception): 2 | def __init__(self, string): 3 | self.string = string 4 | 5 | def __str__(self): 6 | return self.string 7 | 8 | 9 | 10 | 11 | class StateError(Error): 12 | """This error is raised when a state is invalid""" 13 | 14 | 15 | 16 | 17 | class AlphabetError(Error): 18 | """This error is raised when the alphabet of a FSA is invalid""" 19 | 20 | 21 | 22 | 23 | class ConstructionError(Error): 24 | """This error is raised when we encounter a problem when 25 | construction a FSA. 26 | """ 27 | 28 | class NotImplemented(Error): 29 | """This error is raised when the implementation of the function 30 | is incomplete 31 | """ 32 | -------------------------------------------------------------------------------- /finenight/python/nameGenerator.py: -------------------------------------------------------------------------------- 1 | 2 | class IndexNameGenerator: 3 | """Renaming states with this class is not stable, that is, 4 | it's not sure that renaming the FSA will give allways the 5 | same result. 6 | """ 7 | def __init__(self): 8 | self.index = 0 9 | 10 | def generate(self): 11 | name = "q" + str(self.index) 12 | self.index += 1 13 | return name 14 | 15 | class PlainIndexNameGenerator: 16 | """Renaming states with this class is not stable, that is, 17 | it's not sure that renaming the FSA will give allways the 18 | same result. 19 | """ 20 | def __init__(self): 21 | self.index = 0 22 | 23 | def generate(self): 24 | name = str(self.index) 25 | self.index += 1 26 | return name 27 | 28 | -------------------------------------------------------------------------------- /finenight/scheme/finenight.meta: -------------------------------------------------------------------------------- 1 | ((egg "finenight.egg") ; This should never change 2 | 3 | ; List here all the files that should be bundled as part of your egg. Note 4 | ; that (1) mpeg3.meta does not need to be listed here and (2) you might 5 | ; want to include mpeg3-base.scm (if it exists). 6 | 7 | (files "fsa.scm" "utils-scm.scm" "iadfa.scm" "finenight.html" "finenight.setup") 8 | 9 | ; The following should only be present if the egg's documentation should be 10 | ; generated from the wiki: 11 | 12 | (doc-from-wiki) 13 | 14 | ; Your egg's license: 15 | 16 | (license "GPL") 17 | 18 | ; Pick one from the list of categories (see below) for your egg and enter it 19 | ; here. 20 | 21 | (category web) 22 | 23 | ; A list of eggs mpeg3 depends on. If none, you can omit this declaration 24 | ; altogether: 25 | 26 | (needs format) 27 | 28 | (author "Your Name Goes Here") 29 | (synopsis "A basic description of the purpose of the egg.")) -------------------------------------------------------------------------------- /finenight/scheme/utils-scm.scm: -------------------------------------------------------------------------------- 1 | ;(define-extension utils-scm) 2 | 3 | (define my-hash-table-get! 4 | (lambda (hash-table key default-func) 5 | (if (hash-table-exists? hash-table key) 6 | (hash-table-ref hash-table key) 7 | (let ((value (default-func))) 8 | (hash-table-set! hash-table key value) 9 | value)))) 10 | 11 | 12 | (define (for-each-line-in-file filename proc . mode) 13 | (with-input-from-file 14 | filename 15 | (lambda () (apply for-each-line proc (current-input-port) mode)))) 16 | 17 | (define (for-each-line proc . port+mode) 18 | (let while () 19 | (let ((line (apply read-line port+mode))) 20 | (unless (eof-object? line) 21 | (proc line) 22 | (while))))) 23 | 24 | (define vector-walk 25 | (lambda (vector func) 26 | (let ([index 0]) 27 | (for-each (lambda (elem) 28 | (func index elem) 29 | (set! index (+ index 1))) 30 | (vector->list vector))))) -------------------------------------------------------------------------------- /finenight/python/fstTest.py: -------------------------------------------------------------------------------- 1 | from fst import Fst 2 | 3 | import unittest 4 | 5 | 6 | class FstTests(unittest.TestCase): 7 | def setUp(self): 8 | 9 | states = [(0, 'a', 'b', 1), 10 | (0, 'b', 'a', 1), 11 | (1, 'b', 'a', 1)] 12 | 13 | self.fst = Fst(states, ['a', 'b'], ['a', 'b'], 0, 1) 14 | 15 | def testInverse(self): 16 | """ 17 | This function is testing if the inverse is as it should be. 18 | """ 19 | states = [(0, 'b', 'a', 1), 20 | (0, 'a', 'b', 1), 21 | (1, 'a', 'b', 1)] 22 | 23 | inverseFst = Fst(states, ['a', 'b'], ['a', 'b'], 0, 1) 24 | realInverseFst = self.fst.inverse() 25 | errorMsg = "\nThe inverse of fst:\n" + str(self.fst) + "\n\n" \ 26 | "should be like:\n" + str(inverseFst) + "\n\n" \ 27 | "but it's like:\n" + str(realInverseFst) 28 | self.assertTrue(inverseFst == realInverseFst, msg = errorMsg) 29 | 30 | 31 | if __name__ == "__main__": 32 | unittest.main() 33 | -------------------------------------------------------------------------------- /finenight/lisp/edge-test.lisp: -------------------------------------------------------------------------------- 1 | (require :org.ancar.CLUnit "CLUnit.lisp") 2 | (require :com.rrette.finenight.edge "edge.lisp") 3 | 4 | (in-package :com.rrette.finenight) 5 | (import 'org.ancar.CLUnit::deftest) 6 | 7 | (defun test-source () 8 | (and (equal (edge-source '(1 "b" 4)) 1) 9 | (equal (edge-source '(#\b "b" 4)) #\b) 10 | (equal (edge-source '("b" "c" 4)) "b"))) 11 | 12 | (defun test-symbol () 13 | (and (equal (edge-symbol '(4 1 1)) 1) 14 | (equal (edge-symbol '(1 #\b 4)) #\b) 15 | (equal (edge-symbol '(1 "b" 1)) "b"))) 16 | 17 | (defun test-destination () 18 | (and (equal (edge-destination '(1 1 4)) 4) 19 | (equal (edge-destination '(1 1 #\b)) #\b) 20 | (equal (edge-destination '(1 1 "b")) "b"))) 21 | 22 | 23 | (deftest "edge-source test" 24 | :category "Edge base functions tests" 25 | :test-fn #'test-source) 26 | 27 | (deftest "edge-symbol test" 28 | :category "Edge base functions tests" 29 | :test-fn #'test-symbol) 30 | 31 | (deftest "edge-destination test" 32 | :category "Edge base functions tests" 33 | :test-fn #'test-destination) 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /LICENSE.TXT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010, Jean-Philippe Barrette-LaPierre, . 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | 21 | -------------------------------------------------------------------------------- /finenight/python/test.py: -------------------------------------------------------------------------------- 1 | import fsc 2 | from copy import copy 3 | from pdb import set_trace 4 | 5 | 6 | 7 | def getRightNonSubsumingPositions(n, iAnde, basePos): 8 | (i, e) = iAnde 9 | positions = [] 10 | j = i + 1 11 | maxDistance = basePos[0] + n + 1 12 | maxHigh = 0 13 | while j < maxDistance: 14 | f = e - maxHigh 15 | while f <= e + maxHigh: 16 | if f >= 0 and f <= n \ 17 | and fsc.isSubsumming(basePos, (j,f)) \ 18 | and j >= 0: 19 | positions.append((j, f)) 20 | f += 1 21 | j += 1 22 | maxHigh += 1 23 | return positions 24 | 25 | 26 | def powerSet(n, pos, basePos): 27 | positions = getRightNonSubsumingPositions(n, pos, basePos) 28 | set = [] 29 | set.append([pos]) 30 | for p in positions: 31 | s = [[pos] + s for s in powerSet(n, p, basePos)] 32 | set += s 33 | return set 34 | 35 | def possibleStates(n): 36 | basePosIndex = 0 37 | j = 0 38 | set = [[]] 39 | for f in range(n + 1): 40 | set += powerSet(n, (j,f), (basePosIndex, 0)) 41 | basePosIndex += 1 42 | return set 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /finenight/lisp/utils-test.lisp: -------------------------------------------------------------------------------- 1 | (require :com.rrette.finenight.utils "utils.lisp") 2 | (require :org.ancar.CLUnit "CLUnit.lisp") 3 | 4 | (in-package :com.rrette.finenight) 5 | 6 | (import 'org.ancar.CLUnit::deftest) 7 | 8 | 9 | (defun test-identity-copy-hash () 10 | (let* ((hash (make-hash-table 11 | :initial-contents '(("a" . 1) ("b" . 2) ("c" . 3)))) 12 | (hash-copy (copy-hash-table hash))) 13 | (not (eq hash hash-copy)))) 14 | 15 | (defun test-equality-copy-hash () 16 | (let* ((hash (make-hash-table 17 | :initial-contents '(("a" . 1) ("b" . 2) ("c" . 3)))) 18 | (hash-copy (copy-hash-table hash))) 19 | (equalp hash hash-copy))) 20 | 21 | (defun test-inequality () 22 | (let* ((hash (make-hash-table 23 | :initial-contents '(("a" . 1) ("b" . 2) ("c" . 3)))) 24 | (hash-copy (copy-hash-table hash))) 25 | (setf (gethash "d" hash-copy) 4) 26 | (not (equalp hash hash-copy)))) 27 | 28 | (deftest "Hash copy instance not EQ test" 29 | :category "Hash copy" 30 | :test-fn #'test-identity-copy-hash) 31 | 32 | 33 | (deftest "Hash copy equality test" 34 | :category "Hash copy" 35 | :test-fn #'test-equality-copy-hash) 36 | 37 | (deftest "Hash copy and add inequality test" 38 | :category "Hash copy" 39 | :test-fn #'test-inequality) 40 | 41 | -------------------------------------------------------------------------------- /finenight/scheme/SConstruct: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (C) 2005 José Pablo Ezequiel "Pupeno" Fernández Silva 3 | # 4 | # This file is part of scons-chicken. 5 | # 6 | # scons-chicken is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. 7 | # scons-chicken is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 8 | # You should have received a copy of the GNU General Public License along with scons-chicken; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 9 | 10 | # Create an environment including the scons-chicken tool. 11 | env = Environment(tools = ["default", "chicken"], toolpath=["./"]) 12 | # The previous line is to use the non-installed scons-chicken. In your project use: 13 | #env = Environment(tools = ["default", "chicken"]) 14 | 15 | # sc-sap (executable binary) made of sc-sap.scm. 16 | #fsatest = env.ChickenProgram(env, "fsa-test", ["fsa-test.scm", "fsa.scm"]) 17 | #env.Install('/usr/local/bin/', fsatest) 18 | iadfa_run = env.ChickenProgram(env, "iadfa-run", ["fsa.scm", "iadfa.scm", "utils-scm.scm", "iadfa-run.scm"]) 19 | #iadfa = env.ChickenProgram(env, "iadfa-run", ["fsa.scm", "iadfa.scm", "iadfa-run.scm"]) 20 | env.Install('/usr/local/bin/', iadfa_run) 21 | 22 | # Install alias. 23 | env.Alias('install', '/usr/bin/') 24 | -------------------------------------------------------------------------------- /finenight/scheme/edges.scm: -------------------------------------------------------------------------------- 1 | ;; edges is seen by others as a list of 3-tupes 2 | ;; (source-node input-symbol destination-node) 3 | 4 | ;; this is the internal representation of edges 5 | ;; ((src-node1 (input-symbol1 dst-node1) (input-symbol2 dst-node2)) 6 | ;; (src-node2 (input-symbol3 dst-node1) (input-symbol2 dst-node3))) 7 | 8 | ;; should return ((input-symbol3 dst-node1) (input-symbol2 dst-node3)) 9 | (define get-edges-for-source-node 10 | (lambda (edges node) 11 | (cond 12 | ((null? edges) 13 | '()) 14 | ((eq? (car (car edges)) node) 15 | (cdr (car edges))) 16 | (else 17 | (get-edges-for-source-node (cdr edges) node))))) 18 | 19 | ;; should return (dst-node1 dst-node2) 20 | (define transition-for-source-node 21 | (lambda (node input) 22 | (letrec ((T 23 | (lambda (transition-table-for-node) 24 | (if ((null? (transition-table-for-node) 25 | '()) 26 | (let ((entry (car (transition-table-for-node)))) 27 | (if (eq? input 28 | (car (entry))) 29 | (cons (cdr (entry)) 30 | (T (cdr transition-table-for-node))) 31 | (T (cdr transition-table-for-node))))))))) 32 | (T (node))))) 33 | 34 | 35 | ;; from a set of edges it returns the destination nodes for the given 36 | ;; input and node. 37 | (define transition 38 | (lambda (edges node-label input) 39 | (transition-for-source-node (get-edges-for-source-node edges node) input))) 40 | 41 | 42 | (define add-edge 43 | (lambda (edges edge) 44 | (if (null? edges) 45 | (cons (car edge) (cons (cdr edge) '())) 46 | (let ((node (car (car edges))) 47 | (entries (cdr (car edges)))) 48 | (if (eq? (car edge) (car (edges))) 49 | (cons (car edges) 50 | -------------------------------------------------------------------------------- /DISCUTIONS: -------------------------------------------------------------------------------- 1 | Finite-State Language Processing (Language, Speech, and Communication) 2 | by Emmanuel Roche (Editor), Yves Schabes (Editor) 3 | Hardcover: 464 pages ; Dimensions (in inches): 1.25 x 9.33 x 7.39 4 | Publisher: MIT Press; (June 6, 1997) 5 | ISBN: 0262181827 6 | 7 | Morphology and Computation (ACL-MIT Series in Natural Language Processing) 8 | by Richard Sproat (Author) 9 | Hardcover: 313 pages ; Dimensions (in inches): 0.94 x 9.37 x 6.36 10 | Publisher: MIT Press; (April 15, 1992) 11 | ISBN: 0262193140 12 | 13 | Finite State Morphology 14 | by Kenneth R. Beesley, Lauri Karttunen 15 | Paperback: ; Dimensions (in inches): 1.10 x 8.96 x 6.34 16 | Publisher: C S L I Publications; Book and CD-ROM edition (June 2003) 17 | ISBN: 1575864347 18 | 19 | Introduction to Automata Theory, Languages, and Computation (2nd Edition) 20 | by John E. Hopcroft (Author), Rajeev Motwani (Author), Jeffrey D. Ullman (Author) 21 | Hardcover: 521 pages ; Dimensions (in inches): 1.25 x 9.75 x 6.25 22 | Publisher: Pearson Addison Wesley; 2nd edition (November 14, 2000) 23 | ISBN: 0201441241 24 | 25 | Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics and Speech Recognition 26 | by Daniel Jurafsky (Author), James H. Martin (Author) 27 | Hardcover: 960 pages ; Dimensions (in inches): 1.58 x 9.55 x 7.26 28 | Publisher: Prentice Hall; 1st edition (January 26, 2000) 29 | ISBN: 0130950696 30 | 31 | Computational Morphology: Practical Mechanisms for the English Lexicon (ACL-MIT Series in Natural Language Processing) 32 | by Graeme D. Ritchie (Author), Graham J. Russell (Author), Alan W. Black (Author), Stephen G. Pulman (Author) 33 | Hardcover: 301 pages ; Dimensions (in inches): 0.91 x 9.32 x 6.34 34 | Publisher: MIT Press; (October 9, 1991) 35 | ISBN: 0262181460 36 | -------------------------------------------------------------------------------- /finenight/scheme/iadfa-test.scm: -------------------------------------------------------------------------------- 1 | (require-extension check) 2 | (require-extension iadfa) 3 | (require-extension fsa-builder) 4 | 5 | (define my-iadfa (debug-gen-iadfa '("addendice" 6 | "append" "appendice" 7 | "bappend" "bappendice" 8 | "cappend" "cappendice"))) 9 | (define my-fsa (iadfa-fsa my-iadfa)) 10 | 11 | (check (sort (iadfa-state-ancestrors my-iadfa 1 #\e) <) => '(8 22 39)) 12 | (check (sort (iadfa-state-ancestrors my-iadfa 7 #\a) <) => '(0)) 13 | ;(check (sort (iadfa-state-ancestrors my-iadfa 8) <) => '(7 24)) 14 | ;(check (sort (iadfa-state-ancestrors my-iadfa 24) <) => '()) 15 | (check (accept? my-fsa (string->list "appendice")) => #t) 16 | (check (accept? my-fsa (string->list "bateau")) => #t) 17 | (check (accept? my-fsa (string->list "appendic")) => #f) 18 | (check (accept? my-fsa (string->list "append")) => #t) 19 | ;(check (node-final (get-node my-fsa 6)) => #t) 20 | 21 | (graphviz-export (make-fsa-builder-from-fsa my-fsa)) 22 | (graphviz-export-to-file (build-fsa-from-ancestrors my-iadfa) "ancestrors.dot") 23 | 24 | 25 | 26 | ;(define my-iadfa (gen-iadfa '("appendice" "appendicee" 27 | ; "bappendice" "bappendicee" 28 | ; "batisa" "batise" "batissa" 29 | ; "criba" "cribaa" 30 | ; "crima" "crime"))) 31 | ;(define my-fsa (iadfa-fsa my-iadfa)) 32 | ;(graphviz-export my-fsa) 33 | ;(check (sort (iadfa-state-ancestrors my-iadfa 10) <) => '(9 27 33)) 34 | ;(check (sort (iadfa-state-ancestrors-for-input my-iadfa 10 #\a) <) => '(27 33)) 35 | ;(check (sort (iadfa-state-ancestrors my-iadfa 8) <) => '(7)) 36 | ;(check (sort (iadfa-state-ancestrors my-iadfa 24) <) => '()) 37 | 38 | 39 | 40 | ;(define my-iadfa (handle-word (build-iadfa) (string->list "appendice"))) 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /finenight/python/transition.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | 4 | class Transition: 5 | """ 6 | This object is a general representation of a transition, that is 7 | easier to use than a 4-tuple or a 3-tuple transition. 8 | """ 9 | 10 | def __init__(self, tuple): 11 | """ 12 | This is the constructor of a Transition. 13 | 14 | tuple may be a 4-tuple: 15 | (Q, E1*, E2*, Q) that is, The input state, The input symbol, 16 | The output symbol and the output state, respectely. 17 | 18 | tuple may also be a 3-tuple: 19 | (Q, E1, Q) that is, The input state, The input symbol 20 | and the output state, respectely. 21 | """ 22 | self.start = str(tuple[0]) 23 | self.input = tuple[1] 24 | 25 | if len(tuple) == 4: 26 | self.output = tuple[2] 27 | self.target = str(tuple[3]) 28 | else: 29 | self.output = None 30 | self.target = str(tuple[2]) 31 | 32 | 33 | 34 | def __str__(self): 35 | """ 36 | Return the string representation of the instance 37 | """ 38 | output = "(" + self.start + ", " 39 | if self.output is not None: 40 | output += "(" + self.input + ", " 41 | output += self.output + "), " 42 | else: 43 | output += self.input + ", " 44 | 45 | output += self.target + ")" 46 | 47 | return output 48 | 49 | 50 | 51 | 52 | def inverse(self): 53 | """ 54 | This function inverse the transition. 55 | such that, for a 4-tuple, (q1, a, b, q2) becomes (q1, b, a, q2). 56 | Note that it doesn't do nothing for a 3-tuple. 57 | """ 58 | 59 | other = copy.copy(self) 60 | if other.output is not None: 61 | output = other.output 62 | other.output = other.input 63 | other.input = output 64 | 65 | return other 66 | 67 | 68 | -------------------------------------------------------------------------------- /finenight/scheme/fsa-test.scm: -------------------------------------------------------------------------------- 1 | (require-extension check) 2 | (include "fsa-builder.scm") 3 | 4 | ;;(declare (uses fsa)) 5 | ;;(use fsa) 6 | 7 | (define myfsa (make-empty-fsa-builder 'a)) 8 | 9 | (fsa-add-edge! myfsa 'a #\b 'b) 10 | (define a-node (get-node myfsa 'a)) 11 | (check (node-label a-node) => 'a) 12 | (define b-node (car (node-transition a-node #\b))) 13 | (check (node-label b-node) => 'b) 14 | 15 | (fsa-add-edge! myfsa 'b #\c 'c) 16 | (define c-node (car (node-transition b-node #\c))) 17 | (check (node-label c-node) => 'c) 18 | 19 | (fsa-remove-edge! myfsa 'b #\c 'c) 20 | (check (node-transition b-node #\c) => '()) 21 | 22 | (define myfsa (make-empty-fsa-builder 'a)) 23 | 24 | (fsa-add-edge! myfsa 'a #\a 'a) 25 | (fsa-add-edge! myfsa 'a #\b 'ab) 26 | (fsa-add-edge! myfsa 'ab #\b 'abb) 27 | (fsa-add-edge! myfsa 'abb #\o 'abbo) 28 | 29 | (fsa-add-final! myfsa 'abbo) 30 | 31 | (define a-node (get-node myfsa 'a)) 32 | (check (node-label (car (node-transition a-node #\a))) => 'a) 33 | 34 | (define ab-node (car (node-transition a-node #\b))) 35 | (check (node-label ab-node) => 'ab) 36 | 37 | (define abb-node (car (node-transition ab-node #\b))) 38 | (check (node-label abb-node) => 'abb) 39 | 40 | (define abbo-node (car (node-transition abb-node #\o))) 41 | (check (node-label abbo-node) => 'abbo) 42 | (check (node-final (get-node myfsa 'abbo)) => #t) 43 | 44 | (check (fsa-builder-accept? myfsa (string->list "abbo")) => #t) 45 | 46 | ; testing deletion of nodes 47 | (define myfsa (make-empty-fsa-builder 'a)) 48 | 49 | (fsa-add-edge! myfsa 'a #\a 'a) 50 | (fsa-add-edge! myfsa 'a #\c 'c) 51 | (fsa-add-edge! myfsa 'a #\b 'b) 52 | (fsa-add-edge! myfsa 'b #\d 'd) 53 | (fsa-add-edge! myfsa 'd #\c 'c) 54 | 55 | (fsa-add-final! myfsa 'c) 56 | 57 | (fsa-remove-node! myfsa (get-node myfsa 'b)) 58 | 59 | (graphviz-export myfsa) 60 | 61 | 62 | 63 | 64 | 65 | 66 | ;(define b (list 'z 'y 'x 'w 'v)) 67 | ;(define myfsa (make-fsa 'a (list 'b 'c) (list (list 'a 'b 'b) (list 'a 'c 'c)))) 68 | 69 | ;(check (node-transition (fsa-initial-state myfsa) 'a 'b) => (list 'b)) 70 | ;(check (final? myfsa 'a) => #f) 71 | ;(check (final? myfsa 'b) => #t) 72 | ;(check (final? myfsa 'c) => #t) 73 | ;(check (eval (car (transition myfsa 'a 'b))) => (list 'z 'y 'x 'w 'v)) 74 | -------------------------------------------------------------------------------- /data/test.dico: -------------------------------------------------------------------------------- 1 | abduction 2 | abrasif 3 | abrasive 4 | absence 5 | absenteisme 6 | accelerometre 7 | accent 8 | acceptation 9 | accessibilite 10 | accident 11 | accommodation 12 | accomplissement 13 | accord 14 | achat 15 | acronyme 16 | acte 17 | actionneur 18 | activation 19 | acuite 20 | adaptatif 21 | adaptative 22 | adequation 23 | adhesion 24 | administration 25 | admittance 26 | adoption 27 | adresse 28 | affaire 29 | affichage 30 | affidavit 31 | affirmation 32 | agence 33 | agrandissement 34 | ajustable 35 | ajustement 36 | alimentation 37 | allongement 38 | aluminium 39 | amelioration 40 | amenagement 41 | amendement 42 | americain 43 | americaine 44 | amortissement 45 | amortisseur 46 | ampleur 47 | an 48 | analyse 49 | ancien 50 | ancienne 51 | ancrage 52 | angle 53 | anneau 54 | annee 55 | annexe 56 | annonce 57 | anodise 58 | anodisee 59 | anthropomorphique 60 | apostrophe 61 | appareil 62 | appareillage 63 | apparent 64 | apparente 65 | appellation 66 | application 67 | applique 68 | apport 69 | apprehension 70 | approbation 71 | approche 72 | approximation 73 | appui 74 | apres-midi 75 | aptitude 76 | arc 77 | architecture 78 | argent 79 | argument 80 | armoire 81 | arret 82 | arriere 83 | arrimage 84 | arrivee 85 | articulation 86 | ascenseur 87 | aspirateur 88 | assemblage 89 | assemblee 90 | assermentation 91 | asservissement 92 | assistance 93 | association 94 | assurance 95 | astuce 96 | asynchrone 97 | atelier 98 | attaque 99 | atteignable 100 | attente 101 | attention 102 | attestation 103 | attrait 104 | attribuable 105 | au-dessous 106 | augmentation 107 | auparavant 108 | aupres 109 | aussi 110 | autant 111 | auto 112 | automatisation 113 | automatisme 114 | automobile 115 | autorisation 116 | autour 117 | avance 118 | avancement 119 | avant-bras 120 | avant-midi 121 | avenement 122 | avenir 123 | aviation 124 | avion 125 | avis 126 | avril 127 | azimut 128 | base-ball 129 | basilaire 130 | biomecanique 131 | biomedicale 132 | boulonnage 133 | celebration 134 | chiffrier 135 | ci-haut 136 | cinematique 137 | collaborationniste 138 | comportementale 139 | conciergerie 140 | condominium 141 | connecteur 142 | connexion 143 | consequemment 144 | decense 145 | maman 146 | -------------------------------------------------------------------------------- /finenight/lisp/package.lisp: -------------------------------------------------------------------------------- 1 | 2 | (in-package :cl-user) 3 | 4 | (defpackage :com.rrette.finenight.utils 5 | (:use :common-lisp) 6 | (:export :copy-hash-table 7 | :equal-set 8 | :for-each-line-in-file 9 | :generate-name 10 | :hash-table-ref/default 11 | :hash-table-update! 12 | :hash-table-update!/default 13 | :hash-values 14 | :hash-keys 15 | :uniqueness-set 16 | :vector-walk 17 | :with-syms)) 18 | 19 | (defpackage :com.rrette.finenight.fsa 20 | (:use :common-lisp 21 | :com.rrette.finenight.utils) 22 | (:export :accept? 23 | :fsa-start-node 24 | :node-add-edge! 25 | :node-arity 26 | :node-reset 27 | :node-destinations 28 | :node-edges 29 | :node-final 30 | :node-label 31 | :node-remove-edge! 32 | :node-remove-dsts-for-input! 33 | :node-symbols 34 | :node-transition 35 | :node-walk 36 | :make-empty-fsa 37 | :make-empty-node 38 | :make-fsa 39 | :make-node 40 | :extract-words)) 41 | 42 | (defpackage :com.rrette.finenight.fsa-builder 43 | (:use :common-lisp 44 | :com.rrette.finenight.utils 45 | :com.rrette.finenight.fsa) 46 | (:export :build-fsa 47 | :build-fsa-builder-with-nodes 48 | :copy-fsa-builder 49 | :fsa-add-edge! 50 | :fsa-add-final! 51 | :fsa-add-final-node! 52 | :fsa-add-node! 53 | :fsa-builder 54 | :fsa-builder-accept? 55 | :fsa-builder-finals 56 | :fsa-builder-initial-state 57 | :fsa-builder-nodes 58 | :fsa-builder-p 59 | :fsa-edges 60 | :fsa-initial-node 61 | :make-fsa-builder 62 | :fsa-remove-edge! 63 | :fsa-remove-node! 64 | :graphviz-export 65 | :graphviz-export-to-file 66 | :make-empty-fsa-builder 67 | :make-fsa-builder 68 | :make-fsa-builder-from-fsa)) 69 | 70 | (defpackage :com.rrette.finenight.iadfa 71 | (:use :common-lisp 72 | :com.rrette.finenight.fsa 73 | :com.rrette.finenight.fsa-builder 74 | :com.rrette.finenight.utils) 75 | (:export :add-edge 76 | :build-fsa 77 | :build-fsa-from-ancestrors 78 | :build-iadfa 79 | :debug-gen-iadfa 80 | :debug-gen-iadfa-from-file 81 | :detect-problems-from-file 82 | :gen-iadfa-from-file 83 | :iadfa-fsa 84 | :iadfa-state-ancestrors 85 | :make-fsa-builder-from-fsa 86 | :nadd-edge 87 | :print-stats 88 | :test-equivalence 89 | :transition)) 90 | -------------------------------------------------------------------------------- /finenight/python/state.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | 4 | class DfaState: 5 | """This class is representing a DFA state. 6 | 7 | Don't use this class directly. 8 | """ 9 | 10 | def __init__(self, state): 11 | raise NotImplemented() 12 | self.name = state.name 13 | 14 | if [s for s in list(state.transitions.keys()) if len(s) > 1]: 15 | raise StateError("A DFA transition is containing more than " + \ 16 | "one state for a symbol") 17 | 18 | self.transitions = dict([(k, state.transitions[k][0]) for k in list(state.transitions.keys())]) 19 | 20 | 21 | 22 | def __str__(self): 23 | """ 24 | This offers a string version of a state. 25 | """ 26 | stringVal = "name: " + self.name + ", transitions:" + str(self.transitions) 27 | 28 | return stringVal 29 | 30 | 31 | def __eq__(lhs, rhs): 32 | okay = True 33 | 34 | if lhs.name != rhs.name: 35 | okay = False 36 | 37 | if lhs.transitions != rhs.transitions: 38 | okay = False 39 | 40 | return okay 41 | 42 | 43 | 44 | def __ne__(lhs, rhs): 45 | return not lhs.__eq__(rhs) 46 | 47 | 48 | 49 | 50 | class State(DfaState): 51 | """This class is representing a NFA state. 52 | """ 53 | 54 | def __init__(self, name, transitions = None, epsilon = None): 55 | """transitions are a map. 56 | ex: { input1 : nameOfOtherState, input2 : nameOfOtherState2 } 57 | """ 58 | self.name = name 59 | if epsilon is None: 60 | epsilon = [] 61 | self.epsilon = epsilon 62 | 63 | if transitions is None: 64 | transitions = {} 65 | 66 | self.transitions = transitions 67 | 68 | for key in list(self.transitions.keys()): 69 | if type(self.transitions[key]) != type([]): 70 | self.transitions = copy.copy(self.transitions) 71 | self.transitions[key] = [self.transitions[key]] 72 | 73 | 74 | 75 | 76 | def toDfaState(self): 77 | return DfaState(self) 78 | 79 | 80 | 81 | 82 | 83 | def __str__(self): 84 | """ 85 | This offers a string version of a state. 86 | """ 87 | 88 | return DfaState.__str__(self) + " epsilon:" + str(self.epsilon) 89 | 90 | 91 | 92 | 93 | def __eq__(lhs, rhs): 94 | okay = DfaState.__eq__(lhs,rhs) 95 | 96 | if lhs.epsilon != rhs.epsilon: 97 | okay = False 98 | 99 | return okay 100 | 101 | 102 | 103 | 104 | def __ne__(lhs, rhs): 105 | return not lhs.__eq__(rhs) 106 | 107 | 108 | -------------------------------------------------------------------------------- /finenight/lisp/fsa-test.lisp: -------------------------------------------------------------------------------- 1 | (require :com.rrette.finenight.fsa "fsa.lisp") 2 | (require :org.ancar.CLUnit "CLUnit.lisp") 3 | 4 | (in-package :com.rrette.finenight) 5 | 6 | (import 'org.ancar.CLUnit::deftest) 7 | 8 | (defun test-instance-fsa-copy () 9 | (let ((fsa (make-fsa))) 10 | (not (eq (copy-fsa fsa) fsa)))) 11 | 12 | (defun test-nodes-not-eq-fsa-copy () 13 | (let ((fsa (make-fsa))) 14 | (not (eq (fsa-nodes (copy-fsa fsa)) (fsa-nodes fsa))))) 15 | 16 | 17 | (defun test-nodes-equalp-fsa-copy () 18 | (let ((fsa (make-fsa))) 19 | (equalp (fsa-nodes (copy-fsa fsa)) (fsa-nodes fsa)))) 20 | 21 | 22 | (defun test-indentity-add-edge () 23 | (let* ((fsa (make-fsa)) 24 | (fsa2 (copy-fsa fsa))) 25 | (setf fsa2 (add-edge '(1 #\b 2) fsa2)) 26 | (not (eq (fsa-nodes fsa2) (fsa-nodes fsa))))) 27 | 28 | (defun nodes-integrity-add-edge () 29 | (let* ((fsa1 (build-fsa '(#\a #\b) '((1 #\a 2) (2 #\b 3)) 1 '(3))) 30 | (fsa2 (add-edge '(1 #\b 3) fsa1))) 31 | (null (transition #\b 1 fsa1)))) 32 | 33 | (defun transition-fsa-without-node () 34 | (null (transition #\b 1 my-fsa))) 35 | 36 | (defun transition-simple () 37 | (and (equal-set (transition #\b 'a my-fsa) '(b)) 38 | (equal-set (transition "b" 'a my-fsa) '(b)) 39 | (equal-set (transition "c" 'a my-fsa) '(d c)))) 40 | 41 | (defun transition-extended () 42 | (and (null (e-transition "a" my-fsa)) 43 | (null (e-transition "da" my-fsa)) 44 | (equal-set (e-transition "bc" my-fsa) '(d c)) 45 | (equal-set (e-transition "d" my-fsa) '(d)))) 46 | 47 | (defun test-accepts () 48 | (and (not (accepts "a" my-fsa)) 49 | (not (accepts "da" my-fsa)) 50 | (accepts "bc" my-fsa) 51 | (accepts "d" my-fsa))) 52 | 53 | (load :fsa-dat) 54 | 55 | (deftest "Instance not EQ test" 56 | :category "FSA copy" 57 | :test-fn #'test-instance-fsa-copy) 58 | 59 | (deftest "Nodes not EQ test" 60 | :category "FSA copy" 61 | :test-fn #'test-nodes-not-eq-fsa-copy) 62 | 63 | (deftest "Nodes equalp test" 64 | :category "FSA copy" 65 | :test-fn #'test-nodes-equalp-fsa-copy) 66 | 67 | (deftest "FSA Nodes Hash Identity test" 68 | :category "FSA add-edge" 69 | :test-fn #'test-indentity-add-edge) 70 | 71 | (deftest "FSA Nodes Hash Intregrity check" 72 | :category "FSA add-edge" 73 | :test-fn #'nodes-integrity-add-edge) 74 | 75 | 76 | (deftest "FSA Simple Transition" 77 | :category "FSA transitions" 78 | :test-fn #'transition-simple) 79 | 80 | (deftest "FSA Transition without node" 81 | :category "FSA transitions" 82 | :test-fn #'transition-fsa-without-node) 83 | 84 | (deftest "FSA Extended Transition" 85 | :category "FSA transitions" 86 | :test-fn #'transition-extended) 87 | 88 | (deftest "FSA Accepts" 89 | :category "FSA transitions" 90 | :test-fn #'test-accepts) 91 | -------------------------------------------------------------------------------- /finenight/scheme/defstruct.scm: -------------------------------------------------------------------------------- 1 | ;;; A Common Lisp-like DEFSTRUCT facility for Scheme. 2 | 3 | 4 | (define-syntax (def-struct struct-name . specs&methods) 5 | (receive (slots methods) 6 | (let* ((meths&specs (reverse specs&methods)) 7 | (maybe-meths (car meths&specs))) 8 | (if (and (pair? maybe-meths) 9 | (pair? (car maybe-meths))) 10 | (return (reverse! (cdr meths&specs)) maybe-meths) 11 | (return specs&methods '#f))) 12 | (let* ((stype-name (concatenate-symbol struct-name '-stype)) 13 | (slot-names (for (s in slots) 14 | (save (if (symbol? s) s (car s))))) 15 | (slotac-names (for (s in slot-names) 16 | (save (concatenate-symbol struct-name ': s)))) 17 | (master-name (concatenate-symbol struct-name '-master)) 18 | (maker-name (concatenate-symbol 'make- struct-name)) 19 | (newer-name (concatenate-symbol struct-name ':new)) 20 | (predicate-name (concatenate-symbol struct-name '?)) 21 | (handler-name (concatenate-symbol struct-name '-handler)) 22 | (handler-form `(object nil 23 | ,@methods 24 | ((print self port) 25 | ;; Normal default is PRINT-STRUCTURE 26 | (print-defstruct self port)) 27 | (((*value t-implementation-env 'hash) self) 28 | (hash-def-struct self))))) 29 | `(block 30 | ;define the structure type, and place in the var structname-STYPE 31 | (define ,stype-name 32 | (make-stype ',struct-name ',slot-names ,handler-form)) 33 | 34 | ;assign the slot accessors to structname:slot variables 35 | ,@@(loop 36 | (for slot in slot-names) 37 | (for slot-accessor in slotac-names) 38 | 39 | (incr i from 2 by 4) 40 | (save `(define-constant ,slot-accessor 41 | (make-structure-accessor ,stype-name 42 | ,i ',slot)))) 43 | 44 | ;assign the niladic structure maker to MAKE-structname 45 | (define ,maker-name 46 | (stype-constructor ,stype-name)) 47 | ;assign the structure maker to the variable structname:NEW 48 | ,@(%def-struct-create-NEW-macros struct-name newer-name maker-name) 49 | ;assign the type predicate to the variable structname? 50 | (define ,predicate-name 51 | (stype-predicator ,stype-name)) 52 | ;assign the handler to the variable structname-HANDLER 53 | (define ,handler-name 54 | (stype-handler ,stype-name)) 55 | ;assign the master to the variable structname-MASTER 56 | (define ,master-name (stype-master ,stype-name)) 57 | ;intialise all the slots in the master 58 | ,@(for (slot in slots) (slotaccessor in slotac-names) 59 | (when (pair? slot)) 60 | (save `(set (,slotaccessor ,master-name) ,(cadr slot)))) 61 | ,stype-name 62 | )))) 63 | 64 | -------------------------------------------------------------------------------- /finenight/lisp/edge.lisp: -------------------------------------------------------------------------------- 1 | (defpackage :com.rrette.finenight 2 | (:use "COMMON-LISP") 3 | (:nicknames "finenight") 4 | (:export "edge-source" 5 | "edge-symbol" 6 | "edge-destination")) 7 | 8 | (in-package :com.rrette.finenight) 9 | (provide :com.rrette.finenight.edge) 10 | 11 | (defstruct edge 12 | src 13 | symb 14 | dst) 15 | 16 | (defstruct fst-edge 17 | src 18 | input-symb 19 | output-symb 20 | dst) 21 | 22 | ;;; This function will build an edge from a 3-tuple, or a fst-edge 23 | ;;; if it's a 4-tuple. 24 | (defun build-edge (e) 25 | (if (equal (length e) 4) 26 | (make-fst-edge 27 | :src (nth 0 e) 28 | :input-symb (nth 1 e) 29 | :output-symb (nth 2 e) 30 | :dst (nth 3 e)) 31 | (make-edge 32 | :src (nth 0 e) 33 | :symb (nth 1 e) 34 | :dst (nth 2 e)))) 35 | 36 | ;;; This function returns the source state identification 37 | ;;; of this edge. 38 | (defmethod edge-source ((e cons)) 39 | (nth 0 e)) 40 | 41 | ;;; This function returns the source state identification 42 | ;;; of this edge. 43 | (defmethod edge-source ((e edge)) 44 | (edge-src e)) 45 | 46 | ;;; This function returns the source state identification 47 | ;;; of this edge. 48 | (defmethod edge-source ((e fst-edge)) 49 | (fst-edge-src e)) 50 | 51 | ;;; This function returns the symbol of this edge. 52 | (defmethod edge-symbol ((e cons)) 53 | (edge-symbol (build-edge e))) 54 | 55 | ;;; This function returns the symbol of this edge. 56 | (defmethod edge-symbol ((e edge)) 57 | (edge-symb e)) 58 | 59 | ;;; This function returns the symbol (input/output) of the edge 60 | (defmethod edge-symbol ((e fst-edge)) 61 | (format nil "~A/~A" (fst-edge-input-symb e) (fst-edge-output-symb e))) 62 | 63 | (defmethod edge-input ((edge fst-edge)) 64 | (fst-edge-input-symb edge)) 65 | 66 | (defmethod edge-input ((edge edge)) 67 | (edge-symbol edge)) 68 | 69 | (defmethod edge-input ((edge cons)) 70 | (edge-input (build-edge edge))) 71 | 72 | (defmethod edge-output ((edge fst-edge)) 73 | (fst-edge-output-symb edge)) 74 | 75 | (defmethod edge-output ((edge edge)) 76 | (edge-symbol edge)) 77 | 78 | (defmethod edge-output ((edge cons)) 79 | (edge-output (build-edge edge))) 80 | 81 | 82 | ;;; This function returns the destination state identification 83 | ;;; of this edge. 84 | (defmethod edge-destination ((e edge)) 85 | (edge-dst e)) 86 | 87 | ;;; This function returns the destination state identification 88 | ;;; of this edge. 89 | (defmethod edge-destination ((e fst-edge)) 90 | (fst-edge-dst e)) 91 | 92 | ;;; This function returns the destination state identification 93 | ;;; of this edge. 94 | (defmethod edge-destination ((e cons)) 95 | (edge-destination (build-edge e))) 96 | 97 | 98 | (defun edgify (edge) 99 | (list (edge-source edge) 100 | (if (edge-symbol edge) 101 | (string (edge-symbol edge))) 102 | (edge-destination edge))) 103 | 104 | -------------------------------------------------------------------------------- /finenight/python/et.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | def ed( X, Y ): 5 | i = len(X) - 1 6 | j = len(Y) - 1 7 | 8 | #last character are same 9 | while True: 10 | if i >=0 and j >=0: 11 | if X[i] == Y[j]: 12 | dis = ed( X[:i], Y[:j]) 13 | break 14 | 15 | 16 | #last two characters are transposed 17 | if i >= 1 and j >= 1: 18 | if X[-2] == Y[-1] and X[-1] == Y[-2]: 19 | dis = 1 + min( ed( X[:-2],Y[:-2] ), 20 | ed( X, Y[:-1] ), 21 | ed( X[:-1], Y ) ) 22 | break 23 | 24 | #test if there's some symbol left to test 25 | if i == -1: 26 | dis = j + 1 27 | break 28 | if j == -1: 29 | dis = i + 1 30 | break 31 | 32 | dis = 1 + min( ed( X[:-1], Y[:-1] ), 33 | ed( X, Y[:-1] ), 34 | ed( X[:-1], Y ) ) 35 | break 36 | return dis 37 | 38 | 39 | def cuted( X, Y, t ): 40 | m = len(X) 41 | n = len(Y) 42 | 43 | l = max(1,n-t) 44 | u = max(m,n+t) 45 | 46 | distance = None 47 | for i in range(l,u): 48 | d = ed(X[:i],Y) 49 | if distance is None or \ 50 | distance > d: 51 | distance = d 52 | 53 | return distance 54 | 55 | 56 | class ErrorTolerantRecognizer: 57 | """ 58 | This class is meant to recognize erroneous words. 59 | """ 60 | def __init__(self, n = None, transitionsStates = None): 61 | """ 62 | This class is only a leveinstein distance one, 63 | so we don't care about distance. 64 | """ 65 | pass 66 | 67 | def recognize(self, word, fsa, t = 2): 68 | words = [] 69 | states = [] 70 | states.append(("", fsa.startState)) 71 | while len(states): 72 | (Yp, qi) = states.pop() 73 | qi = fsa.states[qi] 74 | for (qj,a) in [(a_s[1],a_s[0]) for a_s in list(qi.transitions.items())]: 75 | Y = Yp + a 76 | if cuted(word, Y, t) <= t: 77 | states.append((Y,qj)) 78 | if ed(word, Y) <= t and qj in fsa.finalStates: 79 | words.append(Y) 80 | return words 81 | 82 | 83 | if __name__ == "__main__": 84 | import sys 85 | import pickle 86 | from iadfa import IncrementalAdfa 87 | sys.argv = sys.argv[1:] 88 | 89 | etr = ErrorTolerantRecognizer() 90 | pFile = "test4.pickle" 91 | if "--gen" in sys.argv: 92 | sys.argv.remove("--gen") 93 | file = open("test.dico", "r") 94 | words = [] 95 | for word in file: 96 | if word[-1] == "\n": 97 | word = word[:-1] 98 | words.append(word) 99 | words.sort() 100 | iadfa = IncrementalAdfa(words, sorted = True) 101 | pickle.dump(iadfa, open(pFile,"w")) 102 | else: 103 | iadfa = pickle.load(open(pFile, "r")) 104 | if "--export" in sys.argv: 105 | sys.argv.remove("--export") 106 | iadfa.graphVizExport("test4.dot") 107 | 108 | etr.recognize(sys.argv[0],iadfa,2) 109 | -------------------------------------------------------------------------------- /finenight/python/fscTest.py: -------------------------------------------------------------------------------- 1 | import fsc 2 | import possibleStates 3 | 4 | import unittest 5 | import copy 6 | 7 | class FsaTests(unittest.TestCase): 8 | def testSubsumming(self): 9 | """ 10 | Test if the subsumming test is okay. 11 | """ 12 | areSubsummed = [[(1,0), (0,1)], 13 | [(1,0), (1,1)], 14 | [(1,0), (2,1)], 15 | [(1,0), (0,2)], 16 | [(1,0), (1,2)], 17 | [(1,0), (2,2)], 18 | [(1,0), (3,2)], 19 | [(1,0), (0,3)], 20 | [(1,0), (1,3)], 21 | [(1,0), (2,3)], 22 | [(1,0), (3,3)], 23 | [(1,0), (4,3)], 24 | [(2,0), (1,1)], 25 | [(2,0), (2,1)], 26 | [(2,0), (3,1)], 27 | [(2,0), (0,2)], 28 | [(2,0), (1,2)], 29 | [(2,0), (2,2)], 30 | [(2,0), (3,2)], 31 | [(2,0), (4,2)], 32 | [(2,0), (0,3)], 33 | [(2,0), (1,3)], 34 | [(2,0), (2,3)], 35 | [(2,0), (3,3)], 36 | [(2,0), (4,3)], 37 | [(2,0), (5,3)]] 38 | n = 1 39 | for entry in areSubsummed: 40 | errorMsg = "The entry " + str(entry[0]) + " is supposed to subsume " +\ 41 | "the entry " + str(entry[1]) + " but it isn't" 42 | self.assertTrue(fsc.isSubsumming(fsc.StandardPosition(entry[0][0], entry[0][1]), \ 43 | fsc.StandardPosition(entry[1][0], entry[1][1]), n), msg = errorMsg) 44 | 45 | 46 | 47 | def testNotSubsumming(self): 48 | """ 49 | Test if the subsumming test is okay. 50 | """ 51 | areSubsummed = [[(1,0), (1,0)], 52 | [(1,0), (0,0)], 53 | [(1,0), (2,0)], 54 | [(1,0), (3,0)], 55 | [(1,0), (3,1)], 56 | [(1,0), (4,0)], 57 | [(1,0), (4,1)], 58 | [(1,0), (4,2)], 59 | [(1,0), (5,0)], 60 | [(1,0), (5,1)], 61 | [(1,0), (5,2)], 62 | [(1,0), (5,3)], 63 | [(1,0), (6,0)], 64 | [(1,0), (6,1)], 65 | [(1,0), (6,2)], 66 | [(1,0), (6,3)], 67 | [(1,0), (6,4)]] 68 | n = 1 69 | for entry in areSubsummed: 70 | errorMsg = "The entry " + str(entry[0]) + " is not supposed to subsume " +\ 71 | "the entry " + str(entry[1]) + " but it is" 72 | self.assertTrue(not fsc.isSubsumming(fsc.StandardPosition(entry[0][0], entry[0][1]), \ 73 | fsc.StandardPosition(entry[1][0], entry[1][1]), n), msg = errorMsg) 74 | 75 | ## def testComputeLevAuto(self): 76 | ## print fsc.computeLevAuto(8,1) 77 | 78 | ## def testGenCharVec(self): 79 | ## print fsc.genCharVectors(3) 80 | 81 | 82 | if __name__ == "__main__": 83 | unittest.main() 84 | -------------------------------------------------------------------------------- /finenight/lisp/node.lisp: -------------------------------------------------------------------------------- 1 | (defpackage :com.rrette.finenight 2 | (:use "COMMON-LISP") 3 | (:nicknames "finenight") 4 | (:export "node-transition" 5 | "build-node" 6 | "nadd-edge" 7 | "add-edge")) 8 | 9 | (in-package :com.rrette.finenight) 10 | (provide :com.rrette.finenight.node) 11 | 12 | (require :com.rrette.finenight.edge "edge.lisp") 13 | (require :com.rrette.finenight.utils "utils.lisp") 14 | 15 | 16 | (defstruct (node (:copier nil)) 17 | name 18 | (symbols (make-hash-table :test 'equal)) 19 | edges 20 | epsilons) 21 | 22 | 23 | 24 | (defun copy-node (node) 25 | (make-node :name (node-name node) 26 | :symbols (copy-hash-table (node-symbols node) :test 'equal) 27 | :epsilons (copy-list (node-epsilons node)) 28 | :edges (copy-list (node-edges node)))) 29 | 30 | (defmethod node-access (label (node node)) 31 | (some (lambda (edge) 32 | (if (equal (edge-destination edge) 33 | label) 34 | t)) 35 | (node-edges node))) 36 | 37 | ;;;This function will return a node built with the edges. 38 | ;;;The name of the node will be the source of the first edge. 39 | (defmethod build-node (edges) 40 | (reduce #'(lambda (node edge) 41 | (add-edge edge node)) 42 | edges 43 | :initial-value (make-node))) 44 | 45 | 46 | ;;;This function will return a new node (a copy) with the edge added. 47 | (defmethod add-edge (edge (node node)) 48 | (nadd-edge edge (copy-node node))) 49 | 50 | ;;;This function will return a new node with the edge added. 51 | ;;;The name of the node will be the source of the edge. 52 | (defmethod add-edge (edge (node (eql ()))) 53 | (nadd-edge edge (make-node 54 | :name (edge-source edge)))) 55 | 56 | ;;;This function will add an edge to the current node 57 | (defmethod nadd-edge (edge (n node)) 58 | (let* ((edge (edgify edge)) 59 | (symbols (node-symbols n)) 60 | (edges (node-edges n))) 61 | (setf (node-edges n) (cons edge edges)) 62 | (if (null (edge-input edge)) 63 | (setf (node-epsilons n) (cons edge (node-epsilons n))) 64 | (setf (gethash (string (edge-input edge)) symbols) 65 | (cons edge (gethash (string (edge-input edge)) symbols)))) 66 | n)) 67 | 68 | (defmethod nremove-edge (edge (node node)) 69 | (progn 70 | (setf (node-edges node) (remove (edgify edge) (node-edges node) :test #'equal)) 71 | (setf (gethash (edge-symbol edge) (node-symbols node)) 72 | (remove (edgify edge) (gethash (edge-symbol edge) (node-symbols node)) :test #'equal)) 73 | node)) 74 | 75 | (defmethod remove-edge (edge (node node)) 76 | (let ((n (copy-node node))) 77 | (nremove-edge edge n) 78 | n)) 79 | 80 | 81 | ;;;This will return the epsilons of this state 82 | (defmethod e-close ((node node)) 83 | (uniqueness-set (mapcar (lambda (edge) 84 | (edge-destination edge)) 85 | (node-epsilons node)))) 86 | 87 | (defmethod e-close (node) 88 | (declare (ignore node)) 89 | nil) 90 | 91 | ;;;This will return the destination state for 92 | ;;;the given input. 93 | (defmethod node-transition (input (node node)) 94 | (uniqueness-set 95 | (mapcar (lambda (edge) 96 | (edge-destination edge)) 97 | (gethash (string input) (node-symbols node))))) 98 | 99 | (defmethod node-transition (input node) 100 | (declare (ignore input) 101 | (ignore node))) 102 | 103 | (defun edges-are-equivalent (lhs-edges rhs-edges) 104 | (and (equal (length lhs-edges) 105 | (length rhs-edges)) 106 | (every (lambda (lhs-edge rhs-edge) 107 | (equal (cdr lhs-edge) 108 | (cdr rhs-edge))) 109 | lhs-edges 110 | rhs-edges))) 111 | 112 | -------------------------------------------------------------------------------- /finenight/lisp/utils.lisp: -------------------------------------------------------------------------------- 1 | 2 | (in-package :com.rrette.finenight.utils) 3 | 4 | (defun copy-hash-table (hash &key (test 'eql)) 5 | (declare (hash-table hash)) 6 | (let ((h (make-hash-table :test test))) 7 | (maphash #'(lambda (key x) 8 | (setf (gethash key h) x)) 9 | hash) 10 | h)) 11 | 12 | (defmacro with-syms (syms &rest body) 13 | `(let ,(mapcar #'(lambda (s) 14 | `(,s (gensym))) 15 | syms) 16 | ,@body)) 17 | 18 | (defmacro hash-table-update! (key hash var &rest body) 19 | (with-syms (k h) 20 | `(let ((,k ,key) 21 | (,h ,hash)) 22 | (setf (gethash ,k ,h) 23 | (let ((,var (gethash ,k ,h))) 24 | ,@body))))) 25 | 26 | (defun hash-table-update!/default (func key hash default) 27 | (declare (function func)) 28 | (if (not (nth-value 1 (gethash key hash))) 29 | (setf (gethash key hash) default)) 30 | (setf (gethash key hash) 31 | (funcall func (gethash key hash)))) 32 | 33 | (defun hash-table-ref/default (key hash default) 34 | (if (not (nth-value 1 (gethash key hash))) 35 | (setf (gethash key hash) default) 36 | (gethash key hash))) 37 | 38 | (defun hash-values (hash) 39 | (let ((values nil)) 40 | (with-hash-table-iterator 41 | (my-iterator hash) 42 | (loop 43 | (multiple-value-bind 44 | (entry-p key value) 45 | (my-iterator) 46 | (declare (ignore key)) 47 | (if entry-p 48 | (setf values (cons value values)) 49 | (return))))) 50 | values)) 51 | 52 | (defun hash->alist (hash) 53 | (let ((values nil)) 54 | (with-hash-table-iterator 55 | (my-iterator hash) 56 | (loop 57 | (multiple-value-bind 58 | (entry-p key value) 59 | (my-iterator) 60 | (if entry-p 61 | (setf values (cons (cons key value) values)) 62 | (return))))) 63 | values)) 64 | 65 | (defun hash-keys (hash) 66 | (let ((keys nil)) 67 | (with-hash-table-iterator 68 | (my-iterator hash) 69 | (loop 70 | (multiple-value-bind 71 | (entry-p key value) 72 | (my-iterator) 73 | (declare (ignore value)) 74 | (if entry-p 75 | (setf keys (cons key keys)) 76 | (return))))) 77 | keys)) 78 | 79 | 80 | (defun equal-set (rhs lhs) 81 | (and (eql (list-length lhs) 82 | (list-length rhs)) 83 | (reduce (lambda (ok node) 84 | (if ok 85 | (not (null (member node rhs :test 'equal))))) 86 | lhs 87 | :initial-value t))) 88 | 89 | (defun uniqueness-set (set) 90 | (if (null set) 91 | nil 92 | (if (member (car set) (cdr set)) 93 | (uniqueness-set (cdr set)) 94 | (cons (car set) (uniqueness-set (cdr set)))))) 95 | 96 | 97 | (defun generate-name (index) 98 | (format nil "q~A" index)) 99 | 100 | ;; (defun for-each-line-in-file (file func) 101 | ;; (declare (function func)) 102 | ;; (with-open-file (p ,file :direction :input) 103 | ;; (do ((line (read-line p nil 'eof) 104 | ;; (read-line p nil 'eof))) 105 | ;; ((eql line 'eof)) 106 | ;; (funcall func line)))) 107 | 108 | 109 | (defmacro for-each-line-in-file ((var file) &body body) 110 | (with-syms (stream) 111 | `(with-open-file (,stream ,file :direction :input) 112 | (do ((,var (read-line ,stream nil 'eof) (read-line ,stream nil 'eof))) 113 | ((eql ,var 'eof)) 114 | ,@body)))) 115 | 116 | 117 | (defmacro vector-walk ((index value vector) &rest body) 118 | (with-syms (vec) 119 | `(let ((,vec ,vector)) 120 | (dotimes (,index (array-dimension ,vec 0) nil) 121 | (let ((,value (aref ,vec ,index))) 122 | ,@body))))) 123 | 124 | -------------------------------------------------------------------------------- /finenight/lisp/fsc-test.lisp: -------------------------------------------------------------------------------- 1 | (require :com.rrette.finenight.fsc "fsc.lisp") 2 | (require :org.ancar.CLUnit "CLUnit.lisp") 3 | 4 | (in-package :com.rrette.finenight) 5 | 6 | (import 'org.ancar.CLUnit::deftest) 7 | (import 'org.ancar.CLUnit::run-all-tests) 8 | 9 | (defun test-char-vector1 () 10 | (let ((myvect "abc") 11 | (mycvect '(t nil nil))) 12 | (equal mycvect (generate-characteristic-vector #\a myvect)))) 13 | 14 | (defun test-char-vector2 () 15 | (let ((myvect "aab") 16 | (mycvect '(t t nil))) 17 | (equal mycvect (generate-characteristic-vector #\a myvect)))) 18 | 19 | (defun test-char-vector3 () 20 | (let ((myvect "aaa") 21 | (mycvect '(t t t))) 22 | (equal mycvect (generate-characteristic-vector #\a myvect)))) 23 | 24 | (defun test-char-vector4 () 25 | (let ((myvect "caa") 26 | (mycvect '(nil t t))) 27 | (equal mycvect (generate-characteristic-vector #\a myvect)))) 28 | 29 | (defun test-char-vector5 () 30 | (let ((myvect "bca") 31 | (mycvect '(nil nil t))) 32 | (equal mycvect (generate-characteristic-vector #\a myvect)))) 33 | 34 | (defun test-char-vector6 () 35 | (let ((myvect "dbc") 36 | (mycvect '(nil nil nil))) 37 | (equal mycvect (generate-characteristic-vector #\a myvect)))) 38 | 39 | (defun test-accepting1 () 40 | (let ((pos1 (make-n-position :i 1 :e 1)) 41 | (pos2 (make-n-position :i 2 :e 1)) 42 | (res '(nil nil))) 43 | (and 44 | (not (is-accepting 1 0 pos1)) 45 | (is-accepting 1 1 pos1) 46 | (is-accepting 1 2 pos1) 47 | (not (is-accepting 2 0 pos1)) 48 | (not (is-accepting 2 1 pos1)) 49 | (is-accepting 2 2 pos1) 50 | (not (is-accepting 2 0 pos2)) 51 | (is-accepting 2 1 pos2) 52 | (is-accepting 2 2 pos2) 53 | (not (is-accepting 3 0 pos2)) 54 | (not (is-accepting 3 1 pos2)) 55 | (is-accepting 3 2 pos2) 56 | (not (is-accepting 4 2 pos2))))) 57 | 58 | (defun test-first-occurence () 59 | (and 60 | (equalp (first-occurence (list (make-n-position :i 0 :e 0))) 61 | (list (make-n-position :i 1 :e 0))) 62 | (equalp (first-occurence (list (make-n-position :i 1 :e 0))) 63 | (list (make-n-position :i 2 :e 0))) 64 | (equalp (first-occurence (list (make-n-position :i 1 :e 1))) 65 | (list (make-n-position :i 2 :e 1))) 66 | (equalp (first-occurence (list (make-n-position :i 1 :e 1) 67 | (make-n-position :i 2 :e 1))) 68 | (list (make-n-position :i 2 :e 1) 69 | (make-n-position :i 3 :e 1))))) 70 | 71 | 72 | (defun test-occurs-in () 73 | t) 74 | 75 | (deftest "first-occurence tests" 76 | :category "n-Beta-Delta tests" 77 | :test-fn #'test-first-occurence) 78 | 79 | 80 | (deftest "Characteristic Vector Test 1 (gcv a \"abc\") -> (t nil nil)" 81 | :category "Characteristic Vector Tests" 82 | :test-fn #'test-char-vector1) 83 | (deftest "Characteristic Vector Test 2 (gcv a \"aab\") -> (t t nil)" 84 | :category "Characteristic Vector Tests" 85 | :test-fn #'test-char-vector2) 86 | (deftest "Characteristic Vector Test 3 (gcv a \"aaa\") -> (t t t)" 87 | :category "Characteristic Vector Tests" 88 | :test-fn #'test-char-vector3) 89 | (deftest "Characteristic Vector Test 4 (gcv a \"caa\") -> (nil t t)" 90 | :category "Characteristic Vector Tests" 91 | :test-fn #'test-char-vector4) 92 | (deftest "Characteristic Vector Test 5 (gcv a \"bca\") -> (nil nil t)" 93 | :category "Characteristic Vector Tests" 94 | :test-fn #'test-char-vector5) 95 | (deftest "Characteristic Vector Test 6 (gcv a \"dbc\") -> (nil nil nil)" 96 | :category "Characteristic Vector Tests" 97 | :test-fn #'test-char-vector6) 98 | 99 | (deftest "Accepting Positions Test 1" 100 | :category "Accepting Positions Tests" 101 | :test-fn #'test-accepting1) 102 | 103 | 104 | (run-all-tests) -------------------------------------------------------------------------------- /finenight/python/utils.py: -------------------------------------------------------------------------------- 1 | from fsa import * 2 | 3 | import copy 4 | 5 | def create(start, finals, edges): 6 | states = {} 7 | for e in edges: 8 | if e[1] is None: 9 | states.setdefault(e[0], ({}, []))[1].append(e[2]) 10 | else: 11 | states.setdefault(e[0], ({}, []))[0][e[1]] = e[2] 12 | states.setdefault(e[2], ({}, [])) 13 | 14 | states = [State(s[0], s[1][0], epsilon = s[1][1]) for s in list(states.items())] 15 | alphabet = [] 16 | for s in states: 17 | for t in s.transitions: 18 | if t not in alphabet: 19 | alphabet.append(t) 20 | 21 | states_map = dict([(s.name, s) for s in states]) 22 | 23 | return Nfa(states, alphabet, states_map[start], [states_map[s] for s in finals]) 24 | 25 | 26 | def union(lhs, rhs): 27 | lhs, rhs = binaryOperationRenaming(lhs, rhs, True, None) 28 | 29 | 30 | new = Nfa(list(lhs.states.values()) + list(rhs.states.values()), 31 | lhs.alphabet + [s for s in rhs.alphabet if s not in lhs.alphabet], 32 | lhs.startState, 33 | rhs.finalStates + lhs.finalStates) 34 | new.states[new.startState].epsilon.append(rhs.startState) 35 | return new 36 | 37 | # this function will produce an fsa that will accept a mininum 38 | # of "start" of that fsa to less than "end" fsa 39 | def repeat(lhs, start, end): 40 | 41 | # add the optional ones. 42 | optional = copy.deepcopy(lhs) 43 | for final in optional.finalStates: 44 | optional.states[optional.startState].epsilon.append(final) 45 | 46 | if start > 0: 47 | new = copy.deepcopy(lhs) 48 | for i in range(1, start): 49 | new = new.concatenate(lhs) 50 | else: 51 | new = optional 52 | end = end - 1 53 | 54 | 55 | for i in range(start, end): 56 | new = new.concatenate(optional) 57 | 58 | return new 59 | 60 | 61 | # This function will remove the deadend states. 62 | # This makes the FSA cleaner to display. 63 | def clean(fsa): 64 | deadends = [] 65 | for label,state in list(fsa.states.items()): 66 | if label not in fsa.finalStates: 67 | destinations = [] 68 | for dest in list(state.transitions.values()): 69 | for d in dest: 70 | if d not in destinations: 71 | destinations.append(d) 72 | if label in destinations: 73 | destinations.remove(label) 74 | if len(destinations) == 0: 75 | deadends.append(label) 76 | 77 | for label,state in list(fsa.states.items()): 78 | for input,dest in list(state.transitions.items()): 79 | for d in dest: 80 | if d in deadends: 81 | dest.remove(d) 82 | if len(dest) == 0: 83 | del state.transitions[input] 84 | 85 | 86 | # This function will dump in a text file the current fsa. 87 | # first line correspond to: 88 | # 89 | # start_state final_state1 final_state2 ... 90 | # 91 | # Then every other line is just a tuple of edges. 92 | def dump(fsa, filename): 93 | file = open(filename, "w") 94 | line = str(fsa.startState) 95 | for fs in fsa.finalStates: 96 | line += " " + str(fs) 97 | lines = [line + "\n"] 98 | for label, state in list(fsa.states.items()): 99 | for input, dest in list(state.transitions.items()): 100 | input, output = input.split("|") 101 | for d in dest: 102 | lines.append("%s %s %s %s\n" % (str(label), input, output, str(d))) 103 | file.writelines(lines) 104 | 105 | # This function takes a list of symbol and will create 106 | # a NFA which will recognize the sequence of those symbols. 107 | # It means that for ["a", "b", "c"], this NFA will recognize 108 | # the string sequence "abc". 109 | def seq(symbols): 110 | edges = [] 111 | for i in range(len(symbols)): 112 | edges.append((i, symbols[i], i + 1)) 113 | return create(0, [len(symbols)], edges) 114 | -------------------------------------------------------------------------------- /finenight/scheme/fsa.scm: -------------------------------------------------------------------------------- 1 | ;(define-extension fsa) 2 | 3 | (require-extension format) 4 | ;(require-extension utils-scm) 5 | (require-extension srfi-1) 6 | 7 | 8 | ;; the node consists of a label and a map a symbol to 9 | ;; a destination object. 10 | (define-record node 11 | label 12 | symbols-map 13 | final) 14 | 15 | ;;(print-struct #t) 16 | 17 | (define-record-printer (node x out) 18 | (fprintf out "(node ~S ~S ~A)" 19 | (node-label x) 20 | (node-edges2 x) 21 | (node-final x))) 22 | 23 | 24 | 25 | (define make-empty-node 26 | (lambda (label) 27 | (make-node label (make-hash-table) #f))) 28 | 29 | 30 | (define node-symbols 31 | (lambda (node) 32 | (hash-table-keys (node-symbols-map node)))) 33 | 34 | (define node-destinations 35 | (lambda (node) 36 | (apply append (hash-table-values (node-symbols-map node))))) 37 | 38 | (define node-arity 39 | (lambda (node) 40 | (hash-table-size (node-symbols-map node)))) 41 | 42 | (define node-edges 43 | (lambda (node) 44 | (letrec ((label (node-label node)) 45 | (S (lambda (symbols) 46 | (if (null? symbols) 47 | '() 48 | (append (map (lambda (dest-node) 49 | (list label 50 | (car symbols) 51 | (node-label dest-node))) 52 | (node-transition node (car symbols))) 53 | (S (cdr symbols))))))) 54 | (S (node-symbols node))))) 55 | 56 | (define node-walk 57 | (lambda (node proc) 58 | (hash-table-walk (node-symbols-map node) proc))) 59 | 60 | (define node-edges2 61 | (lambda (node) 62 | (letrec ((label (node-label node)) 63 | (S (lambda (symbols) 64 | (if (null? symbols) 65 | '() 66 | (append (map (lambda (dest-node) 67 | (cons (car symbols) 68 | (node-label dest-node))) 69 | (node-transition node (car symbols))) 70 | (S (cdr symbols))))))) 71 | (S (node-symbols node))))) 72 | 73 | 74 | (define node-add-edge! 75 | (lambda (node input-symbol dst-node) 76 | (hash-table-update!/default (node-symbols-map node) 77 | input-symbol 78 | (lambda (lst) 79 | (cons dst-node lst)) 80 | '()))) 81 | 82 | 83 | (define node-remove-edge! 84 | (lambda (node input-symbol dst-node) 85 | (let ((symbols-map (node-symbols-map node))) 86 | (if (< 1 87 | (length (hash-table-ref/default symbols-map input-symbol '()))) 88 | (hash-table-update!/default symbols-map 89 | input-symbol 90 | (lambda (lst) 91 | (delete! dst-node lst eq?)) 92 | '()) 93 | (hash-table-delete! symbols-map input-symbol)) 94 | node))) 95 | 96 | (define node-remove-dst! 97 | (lambda (node dst-node) 98 | (let ((symbols-map (node-symbols-map node))) 99 | (map (lambda (symbol) 100 | (hash-table-update!/default symbols-map 101 | symbol 102 | (lambda (lst) 103 | (delete! dst-node lst eq?)) 104 | '())) 105 | (node-symbols node))) 106 | node)) 107 | 108 | (define node-remove-dsts-for-input! 109 | (lambda (node input) 110 | (let ((symbols-map (node-symbols-map node))) 111 | (hash-table-delete! symbols-map input) 112 | node))) 113 | 114 | 115 | ;; will return the list of destination nodes for the 116 | ;; given node. 117 | (define node-transition 118 | (lambda (node symbol) 119 | (hash-table-ref (node-symbols-map node) symbol (lambda () '())))) 120 | 121 | 122 | (define node-is-equivalent 123 | (lambda (lhs rhs) 124 | (if (not (eq? (node-final lhs) (node-final rhs))) 125 | #f 126 | (let ((lhs-map (node-symbols-map lhs)) 127 | (rhs-map (node-symbols-map rhs))) 128 | (map-equal? lhs-map rhs-map))))) 129 | 130 | 131 | 132 | (define-record fsa start-node) 133 | 134 | (define make-empty-fsa 135 | (lambda (start-label) 136 | (make-fsa (make-empty-node start-label)))) 137 | 138 | (define accept? 139 | (lambda (fsa word) 140 | (letrec ((T (lambda (node word) 141 | (if (null? word) 142 | (node-final node) 143 | (let ((nodes (node-transition node (car word)))) 144 | (if (null? nodes) 145 | #f 146 | (T (car nodes) (cdr word)))))))) 147 | (T (fsa-start-node fsa) word)))) 148 | 149 | 150 | ;(define-record-printer (fsa x out) 151 | ; (fprintf out 152 | ; "(fsa ~S ~S ~S)" 153 | ; (fsa-initial-state x) (fsa-finals x) (hash-table->alist (fsa-nodes x)))) 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /finenight/lisp/fsa.lisp: -------------------------------------------------------------------------------- 1 | ;(declaim (optimize (speed 3) (space 3) (debug 0))) 2 | 3 | (in-package :com.rrette.finenight.fsa) 4 | 5 | ;; the node consists of a label and a map a symbol to 6 | ;; a destination object. 7 | (defstruct (hash-node (:copier nil)) 8 | label 9 | (symbols-map (make-hash-table :test 'equal)) 10 | (final nil)) 11 | 12 | 13 | (defun make-empty-node (label) 14 | (make-node :label label)) 15 | 16 | (defun node-reset (node) 17 | (clrhash (node-symbols-map node)) 18 | (setf (node-final node) nil)) 19 | 20 | (defun node-arity (node) 21 | (hash-table-count (node-symbols-map node))) 22 | 23 | (defun node-edges (node) 24 | (let ((label (node-label node))) 25 | (labels ((S (symbols) 26 | (if (null symbols) 27 | '() 28 | (concatenate 'list (mapcar #'(lambda (dest-node) 29 | (list label 30 | (car symbols) 31 | (node-label dest-node))) 32 | (node-transition node (car symbols))) 33 | (S (cdr symbols)))))) 34 | (S (node-symbols node))))) 35 | 36 | (defun node-symbols (node) 37 | (hash-keys (node-symbols-map node))) 38 | 39 | (defun node-destinations (node) 40 | (apply #'concatenate 'list (hash-values (node-symbols-map node)))) 41 | 42 | (defmacro node-walk (node proc) 43 | `(maphash ,proc (node-symbols-map ,node))) 44 | 45 | (defun node-sorted-walk (node func &key (test #'char<)) 46 | (let* ((hash (node-symbols-map node)) 47 | (keys (hash-keys hash))) 48 | (setf keys (sort keys test)) 49 | (dolist (key keys) 50 | (funcall func key (gethash key hash))))) 51 | 52 | (defun node-add-edge! (node input-symbol dst-node) 53 | (hash-table-update! input-symbol 54 | (node-symbols-map node) 55 | lst 56 | (cons dst-node lst))) 57 | 58 | (defun node-remove-edge! (node input-symbol dst-node) 59 | (let ((symbols-map (node-symbols-map node))) 60 | (if (< 1 61 | (length (gethash input-symbol symbols-map))) 62 | (hash-table-update! input-symbol 63 | symbols-map 64 | lst 65 | (delete dst-node lst)) 66 | (remhash input-symbol symbols-map)) 67 | node)) 68 | 69 | ;; (define node-remove-dst! 70 | ;; (lambda (node dst-node) 71 | ;; (let ((symbols-map (node-symbols-map node))) 72 | ;; (map (lambda (symbol) 73 | ;; (hash-table-update!/default symbols-map 74 | ;; symbol 75 | ;; (lambda (lst) 76 | ;; (delete! dst-node lst eq?)) 77 | ;; '())) 78 | ;; (node-symbols node))) 79 | ;; node)) 80 | 81 | (defun node-remove-dsts-for-input! (node input) 82 | (let ((symbols-map (node-symbols-map node))) 83 | (remhash input symbols-map) 84 | node)) 85 | 86 | 87 | ;; will return the list of destination nodes for the 88 | ;; given node. 89 | (defun node-transition (node symbol) 90 | ;(format t "~S~%" (com.rrette.finenight.utils::hash->alist (node-symbols-map node))) 91 | (gethash symbol (node-symbols-map node))) 92 | 93 | 94 | ;; (define node-is-equivalent 95 | ;; (lambda (lhs rhs) 96 | ;; (if (not (eq? (node-final lhs) (node-final rhs))) 97 | ;; #f 98 | ;; (let ((lhs-map (node-symbols-map lhs)) 99 | ;; (rhs-map (node-symbols-map rhs))) 100 | ;; (map-equal? lhs-map rhs-map))))) 101 | 102 | 103 | 104 | (defstruct (fsa (:copier nil)) 105 | start-node) 106 | 107 | (defun make-empty-fsa (start-label) 108 | (make-fsa :start-node (make-empty-node start-label))) 109 | 110 | (defun accept? (fsa word) 111 | (labels ((T (node word) 112 | (if (null word) 113 | (node-final node) 114 | (let ((nodes (node-transition node (car word)))) 115 | (if (null nodes) 116 | nil 117 | (T (car nodes) (cdr word))))))) 118 | (T (fsa-start-node fsa) word))) 119 | 120 | (defun extract-words (fsa) 121 | (let ((words nil) 122 | (states (list (list (fsa-start-node fsa) "")))) 123 | (do () 124 | ((null states)) 125 | (destructuring-bind (src-node word) (pop states) 126 | (when (node-final src-node) 127 | (setf words (nconc words (list word)))) 128 | (node-sorted-walk src-node 129 | #'(lambda (input dst-nodes) 130 | (push (list (car dst-nodes) 131 | (concatenate 'string word (string input))) 132 | states)) 133 | :test #'char>))) 134 | words)) 135 | 136 | 137 | (defun save-fsa (fsa) 138 | (write fsa :circle t)) 139 | 140 | ;(define-record-printer (fsa x out) 141 | ; (fprintf out 142 | ; "(fsa ~S ~S ~S)" 143 | ; (fsa-initial-state x) (fsa-finals x) (hash-table->alist (fsa-nodes x)))) 144 | 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /finenight/python/fst.py: -------------------------------------------------------------------------------- 1 | from fsa import Nfa 2 | 3 | import copy 4 | import pdb 5 | 6 | from transition import * 7 | from error import * 8 | from nameGenerator import IndexNameGenerator 9 | from state import State 10 | 11 | 12 | 13 | 14 | class FstState(State): 15 | def __init__(self, name): 16 | State.__init__(self, name) 17 | 18 | def __str__(self): 19 | output = "" 20 | for transitionList in list(self.transitions.values()): 21 | for transition in transitionList: 22 | output += str(transition) + "\n" 23 | return output 24 | 25 | 26 | 27 | def add(self, transition): 28 | """ 29 | With this function you can add a 3-tuple, a 4-tuple 30 | transition or a Transition object. 31 | """ 32 | if transition.__class__ != Transition: 33 | transition = Transition(transition) 34 | 35 | if self.name is None: 36 | self.name = transition.start 37 | elif self.name != transition.start: 38 | raise ConstructionError( "The transition that you are trying to add is not part of this state") 39 | 40 | if transition.input not in self.transitions: 41 | self.transitions[transition.input] = [] 42 | 43 | self.transitions[transition.input].append(transition) 44 | 45 | 46 | 47 | def inverse(self): 48 | other = copy.copy(self) 49 | for key in list(other.transitions.keys()): 50 | other.transitions[key] = [s.inverse() for s in other.transitions[key]] 51 | 52 | return other 53 | 54 | 55 | def __eq__(lhs, rhs): 56 | okay = True 57 | 58 | if lhs.name != rhs.name: 59 | okay = False 60 | 61 | if list(lhs.transitions.keys()) != list(rhs.transitions.keys()): 62 | okay = False 63 | pdb.set_trace() 64 | 65 | if okay is not False: 66 | for key in list(lhs.transitions.keys()): 67 | if lhs.transitions[key] != rhs.transitions[key]: 68 | okay = False 69 | 70 | return okay 71 | 72 | 73 | 74 | def __ne__(lhs, rhs): 75 | return not lhs.__eq__(rhs) 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | class Fst(Nfa): 88 | """This class represent a Finite-State Transducer 89 | Each state(FstState) is a list of transitions (4-tuple or a 90 | Transition) 91 | 92 | (Q, E1*, E2*, Q) , The input state, The input symbol, 93 | The output symbol and the The output state, respectely. 94 | """ 95 | def __init__(self, states, alphabet1, alphabet2, startState, finalStates): 96 | 97 | self.alphabet = alphabet1 98 | self.alphabet2 = alphabet2 99 | 100 | #adding states to the Fst 101 | self.states = {} 102 | for state in states: 103 | self.add(state) 104 | 105 | self.startState = str(startState) 106 | 107 | #we ensure that finalStates is a list 108 | if finalStates.__class__ != list: 109 | finalStates = [finalStates] 110 | 111 | #we ensure that each element of finalStates is a string 112 | self.finalStates = copy.copy(finalStates) 113 | for i in range(len(self.finalStates)): 114 | if hasattr(self.finalStates[i], "name"): 115 | self.finalStates[i] = self.finalStates[i].name 116 | else: 117 | self.finalStates[i] = str(self.finalStates[i]) 118 | 119 | 120 | 121 | def __str__(self): 122 | output = "starting state: " + str(self.startState) + "\n" 123 | output += "final states: " + str(self.finalStates) + "\n" 124 | for state in list(self.states.values()): 125 | output += str(state) 126 | 127 | return output 128 | 129 | 130 | 131 | def add(self, state): 132 | """This function gives you the ability to add a transition 133 | (Transition, or a 4-tuple), or a FstState to this FST. 134 | """ 135 | if state.__class__ == tuple: 136 | state = Transition(state) 137 | 138 | if state.__class__ == Transition: 139 | if state.start not in self.states: 140 | self.states[state.start] = FstState(state.start) 141 | self.states[state.start].add(state) 142 | 143 | if state.__class__ == FstState: 144 | self.states[state.name] = state 145 | 146 | 147 | def inverse(self): 148 | other = copy.copy(self) 149 | other.states = dict([(s.name, s.inverse()) for s in list(other.states.values())]) 150 | 151 | return other 152 | 153 | 154 | 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /finenight/python/iadfa.py: -------------------------------------------------------------------------------- 1 | from fsa import * 2 | from nameGenerator import * 3 | 4 | 5 | class IncrementalAdfa(Dfa): 6 | """This class is an Acyclic Deterministic Finite State Automaton 7 | constructed by a list of words. 8 | """ 9 | 10 | def __init__(self, words, nameGenerator = None, sorted = False): 11 | 12 | if nameGenerator is None: 13 | nameGenerator = IndexNameGenerator() 14 | 15 | self.nameGenerator = nameGenerator 16 | if sorted: 17 | self.createFromSortedListOfWords(words) 18 | else: 19 | self.createFromArbitraryListOfWords(words) 20 | 21 | 22 | 23 | 24 | def getCommonPrefix(self, word): 25 | stateName = self.startState 26 | 27 | index = 0 28 | nextStateName = stateName 29 | while nextStateName is not None: 30 | symbol = word[index] 31 | stateName = nextStateName 32 | if symbol in self.states[stateName].transitions: 33 | nextStateName = self.states[stateName].transitions[symbol] 34 | index += 1 35 | else: 36 | nextStateName = None 37 | 38 | return (stateName, word[index:]) 39 | 40 | 41 | 42 | 43 | def hasChildren(self, stateName): 44 | okay = False 45 | if [s for s in list(self.states[stateName].transitions.values()) if s]: 46 | okay = True 47 | 48 | return okay 49 | 50 | 51 | 52 | 53 | def addSuffix(self, stateName, currentSuffix): 54 | lastState = stateName 55 | while len(currentSuffix) > 0: 56 | newStateName = self.nameGenerator.generate() 57 | symbol = currentSuffix[0] 58 | currentSuffix = currentSuffix[1:] 59 | self.states[stateName].transitions[symbol] = newStateName 60 | self.states[newStateName] = State(newStateName) 61 | stateName = newStateName 62 | self.finalStates.append(stateName) 63 | 64 | 65 | 66 | def markedAsRegistered(self, stateName): 67 | return stateName in self.register 68 | 69 | 70 | def markAsRegistered(self, stateName): 71 | self.register[stateName] = True 72 | 73 | 74 | 75 | 76 | 77 | def equivalentRegisteredState(self, stateName): 78 | equivatentState = None 79 | 80 | for state in list(self.register.keys()): 81 | if self.areEquivalents(state, stateName): 82 | equivatentState = state 83 | 84 | return equivatentState 85 | 86 | 87 | def lastChild(self, stateName): 88 | input = list(self.states[stateName].transitions.keys()) 89 | input.sort() 90 | return (self.states[stateName].transitions[input[-1]], input[-1]) 91 | 92 | 93 | def replaceOrRegister(self, stateName): 94 | #childName = self.finalStates[-1] 95 | childName, lastSymbol = self.lastChild(stateName) 96 | if not self.markedAsRegistered(childName): 97 | if self.hasChildren(childName): 98 | self.replaceOrRegister(childName) 99 | equivalentState = self.equivalentRegisteredState(childName) 100 | if equivalentState is not None: 101 | self.deleteBranch(childName) 102 | self.states[stateName].transitions[lastSymbol] = equivalentState 103 | else: 104 | self.markAsRegistered(childName) 105 | 106 | 107 | def deleteBranch(self, child): 108 | childs = [child] 109 | while len(childs) > 0: 110 | nextChilds = [] 111 | for child in childs: 112 | nextChilds += [s for s in list(self.states[child].transitions.values()) if not self.markedAsRegistered(s)] 113 | self.states.pop(child) 114 | if child in self.finalStates: 115 | self.finalStates.remove(child) 116 | childs = nextChilds 117 | 118 | 119 | 120 | def createFromSortedListOfWords(self, words): 121 | self.register = {} 122 | 123 | self.finalStates = [] 124 | self.startState = self.nameGenerator.generate() 125 | self.states = {self.startState : State(self.startState)} 126 | 127 | lastWord = None 128 | for word in words: 129 | if word.endswith('\n'): 130 | word = word[:-1] 131 | lastStateName, currentSuffix = self.getCommonPrefix(word) 132 | if self.hasChildren(lastStateName): 133 | self.replaceOrRegister(lastStateName) 134 | self.addSuffix(lastStateName, currentSuffix) 135 | self.replaceOrRegister(self.startState) 136 | 137 | 138 | def createFromArbitraryListOfWords(self, words): 139 | self.register = {} 140 | 141 | self.finalStates = [] 142 | self.startState = self.nameGenerator.generate() 143 | self.states = {self.startState : State(self.startState)} 144 | 145 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Moman 2 | ## Description 3 | This was supposed to be a suite of tools to be used by an orthographic/grammatical checker and the checker itself. However, the project is mainly dead right now. But I encourage you to look through the code and use it as inspiration/reference. The tools are currently coded in Python, but I started a while back to rewrite it in Lisp (which will never be finished). Moman, the suite itself, consist of the following tools: 4 | 5 | * [FineNight](#finenight) is the FSA library. 6 | * A FST library. (Not yet implemented) 7 | * [ZSpell](#zspell) is the orthographic checker. 8 | 9 | Mostly, the only part of the tools suite which is worthwhile mentioning is the "Fast String Correction" which is used by [Lucene's](https://lucene.apache.org/) FuzzyQuery. You can read about the inclusion of this project in Lucene by reading Michael McCandless's [article](http://blog.mikemccandless.com/2011/03/lucenes-fuzzyquery-is-100-times-faster.html). 10 | 11 | ## FineNight 12 | The *FineNight* library contains many algorithms for Finite State Automatons. That includes: 13 | * Union of two FSAs 14 | * Intersection of two FSAs 15 | * Complement of a FSAs 16 | * Difference of two FSAs 17 | * Reversal of a FSA 18 | * Closure of a FSA 19 | * Concatenation of two FSAs 20 | * Determination of a NFA 21 | * Equivalence test 22 | * Minimization algorithm 23 | * Construction of an IADFA from a sorted dictionary 24 | * Graphviz support 25 | * Error-Tolerant IADFA (starred in Michael McCandless's [article](http://blog.mikemccandless.com/2011/03/lucenes-fuzzyquery-is-100-times-faster.html) 26 | 27 | Almost all algorithms were taken from the book [Introduction to Automata Theory, Languages, and Computation](#hopcroft01). The minimization algorithm is an implementation of [Brzozowski's method](#brzozowski). In this method, the (possibly non-deterministic) automaton is reversed, determinized, reversed and determinized. I'll eventually add the [Hopcroft's nlog(n) minimization algorithm](#hopcroft). 28 | 29 | ## ZSpell 30 | ZSpell is meant to be a concurrent of aspell, made by Kevin Atkinson. At this time, ZSpell can suggest words with a Levenshtein-distance of one. Before we were using [Kemal Oflazer's algorithm](#oflazer96errortolerant). This algorithm is very slow, but now we use a faster algorithm ([Schulz's and Mihov's algorithm](#schulz02fast)). However, only substitution, removal and insertion are used for the faster algorithm. It means that transpositions errors, like "ehllo" -> "hello", are considered as two operations. 31 | 32 | TODOs includes: 33 | * Add transposition errors for Levenshtein-distance algorithm. 34 | * Add phonetic errors (spelling by sound). 35 | * Add derivation errors. 36 | 37 | ## References 38 | * [John E. Hopcroft](http://www.cs.cornell.edu/Info/Department/Annual95/Faculty/Hopcroft.html), Rajeev Motwani and Jefferey D. Ullman, Introduction to Automata Theory, Languages and Computation, 2nd edition, Adison-Wesley, 2001. 39 | * J. A. Brzozowski, 40 | Canonical regular expressions and minimal state graphs for definite events, 41 | in Mathematical Theory of Automata, Volume 12 of MRI Symposia Series, 42 | pp. 529-561, Polytechnic Press, Polytechnic Institute of Brooklyn, N.Y., 43 | 1962. 44 | * 45 | John E. Hopcroft 46 | , 47 | 48 | An n log n algorithm for minimizing the states in a finite automaton 49 | , 50 | in The Theory of Machines and Computations, Z. Kohavi (ed.), pp. 189-196, 51 | Academic Press, 1971. 52 | * 53 | Kemal Oflazer, 54 | 55 | Error-tolerant Finite State Recognition with Applications to 56 | Morphological Analysis and Spelling Correction 57 | , 58 | Computational Linguistics, 22(1), pp. 73--89, March, 1996. 59 | * 60 | 61 | Klaus U. Schulz 62 | and 63 | Stoyan Mihov, 64 | 65 | Fast String Correction with Levenshtein-Automata, 66 | 67 | International Journal of Document Analysis and Recognition, 5(1):67--85, 2002. 68 | * 69 | 70 | Zbigniew J. Czech 71 | , 72 | 73 | George Havas 74 | and 75 | Bohdan S. Majewski, 76 | 77 | An Optimal Algorithm for Generating Minimal Perfect Hash Functions 78 | , Information Processing Letters, 43(5):257--264, 1992. 79 | -------------------------------------------------------------------------------- /finenight/python/possibleStates.py: -------------------------------------------------------------------------------- 1 | import fsc 2 | from copy import copy 3 | from pdb import set_trace 4 | from pprint import pprint 5 | 6 | ############################################################## 7 | # 8 | # Definitions 9 | # 10 | ############################################################## 11 | # 12 | # 1. Relevant Subworld for a state: NEED to be done. 13 | # 14 | ############################################################## 15 | 16 | 17 | def determineRelevantSubwordLenghts(n, state): 18 | """ 19 | This function register states to their possible relevant subword 20 | lenghts. 21 | 22 | (See Definition 4.0.22 for "relevant subword") 23 | """ 24 | # Find the right-most position. The index of that position 25 | # is state's minimal lenght for a relevant subword. 26 | # 27 | # For a definition of a "relevant subword for a state" 28 | # see "Definitions" section on the top of the current 29 | # source file. 30 | minProfilLen = 0 31 | if len(state) > 0: 32 | rightMostPosition = max(state, key=lambda s:s.i) 33 | minProfilLen = rightMostPosition.i 34 | 35 | return (minProfilLen, 2 * n + 1) 36 | 37 | 38 | def transition(n, profil, pos, withTransitions): 39 | # This is the transition function as described 40 | # by table 4.1 41 | # 42 | # Note that the current profil is the state's 43 | # characteristic vector (CV), not the current 44 | # position's CV. It means that position's CV 45 | # is in fact: 46 | # 47 | # profil[pos.i:min(n - e + 1, w - i)] 48 | # 49 | # (See Definition 4.0.22) 50 | i = pos.i 51 | e = pos.e 52 | w = len(profil) 53 | 54 | positions = [] 55 | if pos.isTransposition: 56 | # if we are a transposition position, the only 57 | # accepting position if profil is [0, 1] 58 | if profil[i] == 1: 59 | return [fsc.StandardPosition(i + 2, e)] 60 | else: 61 | return positions 62 | 63 | if withTransitions and (w - i) >= 2 and profil[i:i + 2] == [0, 1]: 64 | positions += [fsc.TPosition(i, e + 1)] 65 | 66 | # Return directly if this is a match. 67 | if i < w and profil[i] is 1: 68 | return [fsc.StandardPosition(i + 1, e)] 69 | 70 | # Basic positions: deletion, subsitution 71 | positions += [fsc.StandardPosition(i, e + 1), fsc.StandardPosition(i + 1, e + 1)] 72 | 73 | # Addition operation: 74 | if i < w: 75 | k = fsc.positiveK(profil[i:i + min(n - e + 1, len(profil) - i)]) 76 | if k is not None: 77 | positions.append(fsc.StandardPosition(i + k, e + k - 1)) 78 | 79 | # remove positions that goes beyong profil. 80 | positions = [s for s in positions if s.i <= w] 81 | # remove positions that goes beyong allowed edit mistakes 82 | positions = [s for s in positions if s.e <= n] 83 | return positions 84 | 85 | 86 | def getNextState(n, profil, state, withTransitions): 87 | # For each position, use the elementary transitions (See table 4.1). 88 | # Then, will have set of new positions that we will do the reduced 89 | # union of them. 90 | # 91 | # (See Lemma 4.0.21 following explanation for "reduced union" 92 | # definition, and Definition 4.0.28 point 4 for explanation of 93 | # the current process). 94 | nextState = [] 95 | for pos in state: 96 | nextState += transition(n, profil, pos, withTransitions) 97 | 98 | difference = 0 99 | if len(nextState) > 0: 100 | nextState = fsc.reduce(nextState, n) 101 | difference = nextState[0].i - state[0].i 102 | 103 | if difference > 0: 104 | for pos in nextState: 105 | pos.i = pos.i - difference 106 | 107 | return nextState, difference 108 | 109 | 110 | def genAllProfilPowerSet(n): 111 | set = [[] for i in range(2 * n + 2)] 112 | set[0] = [[]] 113 | for i in range(1,2 * n + 2): 114 | for j in [0,1]: 115 | set[i] += [[j] + s for s in set[i - 1]] 116 | 117 | return set 118 | 119 | 120 | def genTransitions(n, withTransitions = True): 121 | allProfils = genAllProfilPowerSet(n) 122 | transitions = [{} for i in range(2 * n + 2)] 123 | 124 | # Add the initial state 125 | processedStates = [] 126 | unprocessedStates = [[fsc.StandardPosition(0,0)]] 127 | while len(unprocessedStates) > 0: 128 | state = unprocessedStates.pop() 129 | processedStates.append(state) 130 | 131 | profilLenMin, profilLenMax = determineRelevantSubwordLenghts(n, state) 132 | for profilLen in range(profilLenMin, profilLenMax + 1): 133 | for profil in allProfils[profilLen]: 134 | # for the current characteristic vector check what 135 | # is the next state. 136 | nextState, difference = getNextState(n, profil, state, withTransitions) 137 | transitions[profilLen].setdefault(str(profil), {}).setdefault(str(state), (nextState, difference)) 138 | if nextState != [] and not nextState in processedStates and not nextState in unprocessedStates: 139 | unprocessedStates.append(nextState) 140 | 141 | return transitions 142 | 143 | 144 | if __name__ == "__main__": 145 | pprint([genTransitions(2)]) 146 | 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /finenight/scheme/sort.scm: -------------------------------------------------------------------------------- 1 | ;;; (sorted? sequence less?) 2 | ;;; is true when sequence is a list (x0 x1 ... xm) or a vector #(x0 ... xm) 3 | ;;; such that for all 1 <= i <= m, 4 | ;;; (not (less? (list-ref list i) (list-ref list (- i 1)))). 5 | 6 | (define (sorted? seq less?) 7 | (cond 8 | ((null? seq) 9 | #t) 10 | ((vector? seq) 11 | (let ((n (vector-length seq))) 12 | (if (<= n 1) 13 | #t 14 | (do ((i 1 (+ i 1))) 15 | ((or (= i n) 16 | (less? (vector-ref seq (- i 1)) 17 | (vector-ref seq i))) 18 | (= i n)) )) )) 19 | (else 20 | (let loop ((last (car seq)) (next (cdr seq))) 21 | (or (null? next) 22 | (and (not (less? (car next) last)) 23 | (loop (car next) (cdr next)) )) )) )) 24 | 25 | 26 | ;;; (merge a b less?) 27 | ;;; takes two lists a and b such that (sorted? a less?) and (sorted? b less?) 28 | ;;; and returns a new list in which the elements of a and b have been stably 29 | ;;; interleaved so that (sorted? (merge a b less?) less?). 30 | ;;; Note: this does _not_ accept vectors. See below. 31 | 32 | (define (merge a b less?) 33 | (cond 34 | ((null? a) b) 35 | ((null? b) a) 36 | (else (let loop ((x (car a)) (a (cdr a)) (y (car b)) (b (cdr b))) 37 | ;; The loop handles the merging of non-empty lists. It has 38 | ;; been written this way to save testing and car/cdring. 39 | (if (less? y x) 40 | (if (null? b) 41 | (cons y (cons x a)) 42 | (cons y (loop x a (car b) (cdr b)) )) 43 | ;; x <= y 44 | (if (null? a) 45 | (cons x (cons y b)) 46 | (cons x (loop (car a) (cdr a) y b)) )) )) )) 47 | 48 | 49 | ;;; (merge! a b less?) 50 | ;;; takes two sorted lists a and b and smashes their cdr fields to form a 51 | ;;; single sorted list including the elements of both. 52 | ;;; Note: this does _not_ accept vectors. 53 | 54 | (define (merge! a b less?) 55 | (define (loop r a b) 56 | (if (less? (car b) (car a)) 57 | (begin 58 | (set-cdr! r b) 59 | (if (null? (cdr b)) 60 | (set-cdr! b a) 61 | (loop b a (cdr b)) )) 62 | ;; (car a) <= (car b) 63 | (begin 64 | (set-cdr! r a) 65 | (if (null? (cdr a)) 66 | (set-cdr! a b) 67 | (loop a (cdr a) b)) )) ) 68 | (cond 69 | ((null? a) b) 70 | ((null? b) a) 71 | ((less? (car b) (car a)) 72 | (if (null? (cdr b)) 73 | (set-cdr! b a) 74 | (loop b a (cdr b))) 75 | b) 76 | (else ; (car a) <= (car b) 77 | (if (null? (cdr a)) 78 | (set-cdr! a b) 79 | (loop a (cdr a) b)) 80 | a))) 81 | 82 | 83 | 84 | ;;; (sort! sequence less?) 85 | ;;; sorts the list or vector sequence destructively. It uses a version 86 | ;;; of merge-sort invented, to the best of my knowledge, by David H. D. 87 | ;;; Warren, and first used in the DEC-10 Prolog system. R. A. O'Keefe 88 | ;;; adapted it to work destructively in Scheme. 89 | 90 | (define (sort! seq less?) 91 | (define (step n) 92 | (cond 93 | ((> n 2) 94 | (let* ((j (quotient n 2)) 95 | (a (step j)) 96 | (k (- n j)) 97 | (b (step k))) 98 | (merge! a b less?))) 99 | ((= n 2) 100 | (let ((x (car seq)) 101 | (y (cadr seq)) 102 | (p seq)) 103 | (set! seq (cddr seq)) 104 | (if (less? y x) (begin 105 | (set-car! p y) 106 | (set-car! (cdr p) x))) 107 | (set-cdr! (cdr p) '()) 108 | p)) 109 | ((= n 1) 110 | (let ((p seq)) 111 | (set! seq (cdr seq)) 112 | (set-cdr! p '()) 113 | p)) 114 | (else 115 | '()) )) 116 | (if (vector? seq) 117 | (let ((n (vector-length seq)) 118 | (vector seq)) ; save original vector 119 | (set! seq (vector->list seq)) ; convert to list 120 | (do ((p (step n) (cdr p)) ; sort list destructively 121 | (i 0 (+ i 1))) ; and store elements back 122 | ((null? p) vector) ; in original vector 123 | (vector-set! vector i (car p)) )) 124 | ;; otherwise, assume it is a list 125 | (step (length seq)) )) 126 | 127 | 128 | ;;; (sort sequence less?) 129 | ;;; sorts a vector or list non-destructively. It does this by sorting a 130 | ;;; copy of the sequence 131 | (define (sort seq less?) 132 | (if (vector? seq) 133 | (list->vector (sort! (vector->list seq) less?)) 134 | (sort! (append seq '()) less?))) -------------------------------------------------------------------------------- /finenight/lisp/fsc.lisp: -------------------------------------------------------------------------------- 1 | (defpackage :com.rrette.finenight 2 | (:use "COMMON-LISP") 3 | (:nicknames "finenight") 4 | (:export "REDUCE-STATE" 5 | "GENERATE-CHARACTERISTIC-VECTOR" 6 | "IS-ACCEPTING" 7 | "N-POSITION")) 8 | 9 | (in-package :com.rrette.finenight) 10 | (provide :com.rrette.finenight.fsc) 11 | 12 | ;;; This is a state. It contains many positions. 13 | ;;; This state is not necesseraly reduced. 14 | (defclass fsc-state () 15 | ((positions :accessor state-positions 16 | :initarg :positions 17 | :initform ()))) 18 | 19 | ;;; This is a state. It contains many positions. 20 | ;;; This state is necesseraly reduced. 21 | (defclass reduced-state (state) 22 | ()) 23 | 24 | ;;; This is a standard position. 25 | (defstruct n-position 26 | (i 0) 27 | (e 0)) 28 | 29 | (defmethod pos-i ((pos n-position)) 30 | (n-position-i pos)) 31 | 32 | (defmethod pos-i ((pos cons)) 33 | (car pos)) 34 | 35 | (defmethod pos-e ((pos n-position)) 36 | (n-position-e pos)) 37 | 38 | (defmethod pos-e ((pos cons)) 39 | (nth 1 pos)) 40 | 41 | (defun clone-pos (pos) 42 | (make-n-position :i (pos-i i) 43 | :e (pos-e e))) 44 | 45 | ;;; This will generate a characteristic vector for the 46 | ;;; character given in argument, from the character-sequence 47 | (defun generate-characteristic-vector (character character-sequence) 48 | (map 'cons (lambda (x) 49 | (eql x character)) character-sequence)) 50 | 51 | ;;; This function will return true if the position is 52 | ;;; in an accepting state, relatively to the w (word len) 53 | ;;; and the n (leveinstein distance). 54 | ;;; 55 | ;;; Note: it's an error to have a pos-i greater than w 56 | (defmethod is-accepting (w n (pos n-position)) 57 | (<= (- w (pos-i pos)) 58 | (- n (pos-e pos)))) 59 | 60 | ;;; This function will return true if the subsummer is really 61 | ;;; subsumming the subsummee. 62 | (defmethod is-subsumming ((subsummer n-position) (subsummee n-position)) 63 | (let ((i (pos-i subsummer)) 64 | (e (pos-e subsummer)) 65 | (j (pos-i subsummee)) 66 | (f (pos-e subsummee))) 67 | (and (< e f) 68 | (<= (abs (- j i)) 69 | (- f e))))) 70 | 71 | ;;; This function will return true if the subsummee is really 72 | ;;; been subsummed by one or more of the subsummers. 73 | (defmethod is-subsummed ((subsummee n-position) subsummers) 74 | (some (lambda (subsummer) 75 | (is-subsumming subsummer subsummee)) 76 | subsummers)) 77 | 78 | ;;; This will return a reduced positions list 79 | ;;; made from the positions given in argument. 80 | (defmethod reduce-state (positions) 81 | (if (null positions) 82 | () 83 | (if (is-subsummed (car positions) (cdr positions)) 84 | (reduce-state (cdr positions)) 85 | (cons (car positions) (reduce-state (cdr positions)))))) 86 | 87 | ;;; This will return a reduced state of M 88 | (defmethod reduce-state ((M fsc-state)) 89 | (make-instance 'reduced-state (reduce-state (fsc-state-positions M)))) 90 | 91 | (defmethod reduce-state ((M reduced-state)) 92 | M) 93 | 94 | ;;; This will return a new state that will be the reduced 95 | ;;; set representing the state with the posistion given in 96 | ;;; argument. 97 | (defmethod add-position ((M reduced-state) (pos n-position)) 98 | (make-instance 'reduced-state 99 | :positions (reduce-state (cons pos (fsc-state-positions M))))) 100 | 101 | (defmethod add-position ((M fsc-state) (pos n-position)) 102 | (make-instance 'fsc-state :positions (cons pos 103 | (fsc-state-positions M)))) 104 | 105 | (defmethod reduced-union ((M fsc-state) (N fsc-state)) 106 | (make-instance 'reduced-state 107 | :positions (reduce-state (append (fsc-state-positions M) 108 | (fsc-state-positions N))))) 109 | 110 | (defmethod relevant-subword (input n pos) 111 | (let* ((w (length input)) 112 | (i (pos-i pos)) 113 | (e (pos-e pos)) 114 | (k (min (1+ (- (* 2 n) e)) (- w i)))) ;we need to verify the 1+ 115 | (butlast (nthcdr i input) (- w k)))) 116 | 117 | 118 | 119 | 120 | (defun get-right-non-subsuming-pos (n pos base-pos) 121 | (let ((positions nil)) 122 | (do ((max-distance (+ (pos-i base-pos) n 1)) 123 | (high (- (pos-e pos) 1) (- high 1)) 124 | (j (1+ (pos-i pos)) (+ j 1))) 125 | ((>= j max-distance)) 126 | (do* ((f (abs high) (+ f 1))) 127 | ((> f n)) 128 | (let ((new-pos (make-n-position :i j :e f))) 129 | (if (and (not (is-subsumming new-pos pos)) 130 | (not (is-subsumming pos new-pos)) 131 | (is-subsumming base-pos new-pos)) 132 | (setf positions (cons new-pos positions)))))) 133 | positions)) 134 | 135 | 136 | (defun powerset (n pos base-pos) 137 | (let ((positions (get-right-non-subsuming-pos n pos base-pos)) 138 | (set (list (list pos)))) 139 | (mapcar (lambda (p) 140 | (setf set (append set (mapcar (lambda (s) 141 | (cons pos s)) 142 | (powerset n p base-pos))))) 143 | positions) 144 | set)) 145 | 146 | 147 | (defun state-is-smaller (lhs rhs) 148 | (if (< (pos-i lhs) (pos-i rhs)) 149 | t 150 | (if (equal (pos-i lhs) (pos-i rhs)) 151 | (if (< (pos-e lhs) (pos-e lhs)) 152 | t)))) 153 | 154 | 155 | (defun possible-states (n &key (f 0)) 156 | (if (> f n) 157 | nil 158 | (append (powerset n 159 | (make-n-position :i 0 :e f) 160 | (make-n-position :i f :e 0)) 161 | (possible-states n :f (+ f 1))))) 162 | 163 | 164 | 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /finenight/scheme/fsa2.scm: -------------------------------------------------------------------------------- 1 | 2 | (define-record fsa) 3 | alphabet 4 | start ;the starting state 5 | finals ;list of final states 6 | nodes) ;the mapping from symbol -> states 7 | 8 | (define final? 9 | (lambda (fsa label) 10 | (if (member label (fsa-finals fsa)) 11 | t))) 12 | 13 | 14 | (define build-fsa 15 | "This function build a fsa. 16 | The 'edges' argument is a list of 3-tuple. 17 | The 'final' argument is a list of vertices." 18 | (lambda (alphabet edges start finals) 19 | (let ((states (make-hash-table))) 20 | (mapcar (lambda (edge) 21 | (add-edge! states edge)))))) 22 | 23 | (define ensure-nodes 24 | (lambda (states nodes) 25 | (if (null? nodes) 26 | '() 27 | (let ((node (hash-table-ref states (car nodes) #f))) 28 | ;; if the node is equal to false, it means it wasn't present before 29 | (if (not node) 30 | (set! node (make-node (car nodes)))) 31 | (cons node (cdr nodes)))))) 32 | 33 | ;;This function adds a node to an FSA. 34 | ;;;This function is the destructive version 35 | ;;;of add-edge. 36 | (define nadd-edge 37 | (lambda (states edge) 38 | (let ((src (edge-source edge)) 39 | (dst (edge-destination edge))) 40 | (nadd-edge edge (gethash src nodes)) 41 | f)) 42 | 43 | (defmethod nremove-edge (edge (fsa fsa)) 44 | (let ((node (fsa-node (edge-source edge) fsa))) 45 | (if node 46 | (progn 47 | (nremove-edge edge node) 48 | (if (is-final node fsa) 49 | (setf (fsa-finals fsa) 50 | (remove (node-name node) (fsa-finals fsa)))))) 51 | fsa)) 52 | 53 | (defmethod is-accessible (label fsa) 54 | (let ((accessors nil)) 55 | (maphash (lambda (key node) 56 | (declare (ignore key)) 57 | (if (node-access label node) 58 | (setf accessors (cons (node-name node) accessors)))) 59 | (fsa-nodes fsa)) 60 | accessors)) 61 | 62 | 63 | (defmethod is-accessible ((node node) fsa) 64 | (is-accessible (node-name node) fsa)) 65 | 66 | (defmethod nremove-node ((node node) fsa) 67 | (nremove-node (node-name node) fsa)) 68 | 69 | (defmethod nremove-node (label fsa) 70 | (progn 71 | (setf (fsa-finals fsa) 72 | (remove label (fsa-finals fsa))) 73 | (remhash label (fsa-nodes fsa)) 74 | fsa)) 75 | 76 | (defun add-node (node fsa) 77 | "This function add a node to the copy of the FSA" 78 | (let ((name (node-name node)) 79 | (nodes (fsa-nodes fsa))) 80 | (if (null (gethash name nodes)) 81 | (setf (gethash name nodes) node)))) 82 | 83 | 84 | (defun are-equivalent (lhs-label rhs-label fsa) 85 | "Returns nil if they are not equivalent, return the rhs-label otherwise" 86 | (let ((lhs (fsa-node lhs-label fsa)) 87 | (rhs (fsa-node rhs-label fsa))) 88 | (if (and (equal (is-final lhs-label fsa) 89 | (is-final rhs-label fsa)) 90 | (edges-are-equivalent (node-edges lhs) 91 | (node-edges rhs))) 92 | rhs-label))) 93 | 94 | 95 | ;;;This function returns the node identified 96 | ;;;by the id specified. 97 | (defmethod fsa-node (id fsa) 98 | "This function returns the node identified by the id specified." 99 | (gethash id (fsa-nodes fsa))) 100 | 101 | (defmethod e-close-nodes (nodes-id fsa) 102 | (uniqueness-set (append nodes-id 103 | (mapcan (lambda (src) 104 | (e-close (fsa-node src fsa))) 105 | nodes-id)))) 106 | 107 | (defmethod transition (input id fsa) 108 | (let ((node (fsa-node id fsa))) 109 | (e-close-nodes 110 | (mapcan (lambda (src) 111 | (node-transition input (fsa-node src fsa))) 112 | (cons id (e-close node))) 113 | fsa))) 114 | 115 | (defmethod transition (input (ids cons) fsa) 116 | (uniqueness-set (mapcan (lambda (id) 117 | (transition input id fsa)) 118 | ids))) 119 | 120 | (defmethod e-transition (word fsa) 121 | "This is the extended transition function for the FSA." 122 | (let ((nodes (cons (fsa-start fsa) nil))) 123 | (reduce (lambda (ids input) 124 | (transition (string input) ids fsa)) 125 | word 126 | :initial-value nodes))) 127 | 128 | (defmethod accepts (word fsa) 129 | "This function returns true if the word is accepted by the FSA." 130 | (some (lambda (node) 131 | (if (member node (fsa-finals fsa)) 132 | t)) 133 | (e-transition word fsa))) 134 | 135 | (defmethod graphviz-export (fsa &key (file nil) (xsize 8) (ysize 11)) 136 | "This function will write the dot description of the FSA in the stream." 137 | (progn 138 | (if (null file) 139 | (graphviz-export-stream fsa :stream t :xsize xsize :ysize ysize) 140 | (with-open-file (stream 141 | file 142 | :direction :output 143 | :if-exists :supersede 144 | :if-does-not-exist :create) 145 | (graphviz-export-stream fsa 146 | :stream stream 147 | :xsize xsize 148 | :ysize ysize))) 149 | fsa)) 150 | 151 | 152 | (defmethod graphviz-export-stream (fsa &key (stream t) (xsize 8) (ysize 11)) 153 | "This function will write the dot description of the FSA in the stream." 154 | (progn 155 | (format stream 156 | "digraph G {~% rankdir = LR;~% size = \"~A, ~A\";~%" 157 | xsize 158 | ysize) 159 | (format stream " rotate = 90;~%") 160 | (if (not (null (fsa-finals fsa))) 161 | (progn 162 | (format stream "~% node [shape = doublecircle];~% ") 163 | (mapcar (lambda (x) 164 | (format stream " \"~A\"" x)) 165 | (fsa-finals fsa)))) 166 | (format stream ";~%~% node [shape = circle];~% ") 167 | (maphash (lambda (key node) 168 | (declare (ignore key)) 169 | (format stream " \"~A\"" (node-name node))) 170 | (fsa-nodes fsa)) 171 | (format stream ";~%~%") 172 | (maphash (lambda (key node) 173 | (declare (ignore key)) 174 | (mapcar (lambda (edge) 175 | (format stream " \"~A\" -> \"~A\" [label = \"~A\"];~%" 176 | (edge-source edge) 177 | (edge-destination edge) 178 | (if (null (edge-symbol edge)) 179 | "epsilon" 180 | (edge-symbol edge)))) 181 | (node-edges node))) 182 | (fsa-nodes fsa)) 183 | (format stream "}~%") 184 | fsa)) 185 | 186 | 187 | -------------------------------------------------------------------------------- /finenight/lisp/iadfa-test.lisp: -------------------------------------------------------------------------------- 1 | ;(require :org.ancar.CLUnit "CLUnit") 2 | ;(require :com.rrette.finenight "finenight") 3 | 4 | (load "CLUnit.lisp") 5 | 6 | (load "finenight") 7 | 8 | ;(in-package :cl-user) 9 | 10 | ;(import 'org.ancar.CLUnit::deftest) 11 | 12 | (in-package :com.rrette.finenight.iadfa) 13 | 14 | (defparameter *my-iadfa* (debug-gen-iadfa '("addendice" 15 | "append" "appendice" 16 | "bappend" "bappendice" 17 | "cappend" "cappendice" 18 | "mormont"))) 19 | 20 | 21 | (defun iadfa-non-branch-suffix () 22 | "This tests that the output of the iadfa isn't screwed up 23 | by the prefix 0--0 of 0--0--0 because the delete branch 24 | don't delete any node" 25 | (test-equivalence '("0-----0" 26 | "0--0" 27 | "0--0--0"))) 28 | 29 | (org.ancar.CLUnit::deftest "IADFA tests" 30 | :category "Destinations" 31 | :test-fn #'iadfa-non-branch-suffix) 32 | 33 | (defun iadfa-test1 () 34 | (test-equivalence '("0-APR-CREDIT-CARD" 35 | "0-APR-CREDIT-CARD-4U" 36 | "0-APR-CREDIT-CARD-APPLICATION" 37 | "0-APR-CREDIT-CARD-OFFERS" 38 | "0-APR-CREDIT-CARD-ONLINE" 39 | "0-APR-CREDIT-CARDS" 40 | "0-APR-CREDITCARD" 41 | "0-APR-CREDITCARDS" 42 | "0-APR-CREDITS-CARD"))) 43 | 44 | (defun iadfa-test2 () 45 | "This situation would cause to have an empty ancestror fsa." 46 | (test-equivalence '("0000" 47 | "0001"))) 48 | 49 | 50 | 51 | (defun iadfa-test3 () 52 | "bad behavior where the common-suffix was going too far. 53 | We had a cycle on the W (1 -W> 2 -O> 3 -O> 1)." 54 | (test-equivalence '("08WL" 55 | "08WOOL" 56 | "08WOOOL" 57 | "08WOOOOL"))) 58 | 59 | (defun iadfa-test4 () 60 | "The stem wasn't long enough." 61 | (test-equivalence '("0-adance" 62 | "0-atransfers" 63 | "0-babobibobu" 64 | "0-balance" 65 | "0-balance-transfers"))) 66 | 67 | (defun iadfa-test5 () 68 | "When we add 0-SUNGKOREA the stem starts within the common suffixes. 69 | So we add the stem within the suffixes which create new words." 70 | (test-equivalence '("0-FORUM" 71 | "0-SSUM" 72 | "0-SSUNGKOREA"))) 73 | 74 | 75 | (defun iadfa-test6 () 76 | "When we add 0-SUNGKOREA the stem starts within the common suffixes. 77 | So we add the stem within the suffixes which create new words. 78 | So we need to make sure that the common-suffix won't go further than 79 | the 0-ASUNGKOREA" 80 | (test-equivalence '("0-ASUNGKOREA" 81 | "0-FORUM" 82 | "0-S" 83 | "0-SUM" 84 | "0-SUNGKOREA"))) 85 | 86 | (defun iadfa-test8 () 87 | "This is an example of a test where we had a bad 88 | update of parents-arities" 89 | (test-equivalence '("0-1" 90 | "0-1-0" 91 | "0-1-1"))) 92 | 93 | (defun iadfa-test9 () 94 | (test-equivalence '("0-1-2" 95 | "0-1-2-0" 96 | "0-1-2-3"))) 97 | 98 | (defun iadfa-test10 () 99 | 100 | (test-equivalence '("0-1" 101 | "0-1-0" 102 | "0-1-1" 103 | "0-1-100" 104 | "0-1-2" 105 | "0-1-2-0" 106 | "0-1-2-3"))) 107 | 108 | (defun iadfa-test11 () 109 | "This situation was causing problems. The 0-2GENION was 110 | disapearing after the 0-2GO addition. 0-2GO was subsuming 111 | the stem of 0-2GENION, so we had GE to add back for the 112 | subsumed, but the calculated stem to add was only G, 113 | since we had the entry 0-0OO. 114 | 115 | The cause was that we were completely consuming the profile, 116 | but we shouldn't eat profile when there's one to consume" 117 | (test-equivalence '("0-0OO" 118 | "0-2-GENION" 119 | "0-2GENION" 120 | "0-2GO"))) 121 | 122 | (org.ancar.CLUnit::deftest "IADFA Test 11" 123 | :category "Subsumed previous stems." 124 | :test-fn #'iadfa-test11) 125 | 126 | (defun iadfa-test12 () 127 | "The 0-7-0 was lost. When we were adding the last entry, 128 | common-prefix was returning a suffix of 7-2 and a previous 129 | stem of 7-0. However, because of 0-462, we were getting a 130 | stem of 7 from common-suffix and a previous stem of 7-0. 131 | 132 | In fact we shouldn't try to get a common prefix that would 133 | produce a stem shorter than the previous stem. 134 | " 135 | (test-equivalence '("0-462" 136 | "0-5-0" 137 | "0-7-0" 138 | "0-7-2"))) 139 | 140 | (org.ancar.CLUnit::deftest "IADFA Test 12" 141 | :category "Subsumed previouss stems." 142 | :test-fn #'iadfa-test12) 143 | 144 | (defun iadfa-test13 () 145 | "The 0-5000 was created. This was caused by the common 146 | prefix node of 0-3000 and 0-300MPH forgot to remove the 147 | ancestror of the node created for the subsubed previous 148 | stem" 149 | (test-equivalence '("0-1000000" 150 | "0-10000000" 151 | "0-300" 152 | "0-3000" 153 | "0-300MPH" 154 | "0-500" 155 | "0-500MPH"))) 156 | 157 | (org.ancar.CLUnit::deftest "IADFA Test 13" 158 | :category "Subsumed previous stems." 159 | :test-fn #'iadfa-test13) 160 | 161 | (defun iadfa-test14 () 162 | "The common suffix was wrongly programmed for previous stem. 163 | The right behavior is not to consume it, and stop when current-suffix 164 | is equal to previous-stem." 165 | (test-equivalence '("0-APR-CREDITS-CARD" 166 | "0-APRCREDIT-CARD" 167 | "0-APRCREDITCARD"))) 168 | 169 | (org.ancar.CLUnit::deftest "IADFA Test 14" 170 | :category "Subsumed previous stems." 171 | :test-fn #'iadfa-test14) 172 | 173 | (org.ancar.CLUnit::deftest "IADFA Test 1" 174 | :category "Destinations" 175 | :test-fn #'iadfa-test1) 176 | 177 | (org.ancar.CLUnit::deftest "IADFA Test 2" 178 | :category "Destinations" 179 | :test-fn #'iadfa-test2) 180 | 181 | (org.ancar.CLUnit::deftest "IADFA Test 3" 182 | :category "Destinations" 183 | :test-fn #'iadfa-test3) 184 | 185 | (org.ancar.CLUnit::deftest "IADFA Test 4" 186 | :category "Destinations" 187 | :test-fn #'iadfa-test4) 188 | 189 | (org.ancar.CLUnit::deftest "IADFA Test 5" 190 | :category "Destinations" 191 | :test-fn #'iadfa-test5) 192 | 193 | (org.ancar.CLUnit::deftest "IADFA Test 6" 194 | :category "Destinations" 195 | :test-fn #'iadfa-test6) 196 | 197 | (org.ancar.CLUnit::deftest "IADFA Test 8" 198 | :category "Destinations" 199 | :test-fn #'iadfa-test8) 200 | 201 | (org.ancar.CLUnit::deftest "IADFA Test 9" 202 | :category "Destinations" 203 | :test-fn #'iadfa-test9) 204 | 205 | (org.ancar.CLUnit::deftest "IADFA Test 10" 206 | :category "Destinations" 207 | :test-fn #'iadfa-test10) 208 | 209 | 210 | 211 | (org.ancar.CLUnit::run-all-tests) 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | -------------------------------------------------------------------------------- /finenight/python/recognize: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import getopt 4 | import os 5 | import pickle 6 | import pprint 7 | import sys 8 | 9 | 10 | 11 | def usage(): 12 | print """Usage: recognize [OPTION]... [word] ... 13 | 14 | If a "word" argument is given, it check if its a valid word, 15 | and if not, it gives suggestions. 16 | 17 | If the iadfa file is not given it will generate one with the 18 | data/test.dico. 19 | 20 | Mandatory arguments to long options are mandatory for short options too. 21 | -d, --dico-file=FILE this is the dictionnary file that will 22 | be used to generate the IADFA. 23 | -e, --export=FILE export the IADFA to a graphviz file. 24 | -i, --iadfa=FILE Use this file for the IADFA. If you 25 | specify the '-d' or '--dico' option, 26 | this is where the IADFA will be stored. 27 | -n, --distance=n this is the levenshtein distance. [default=1] 28 | -o, --oflazer use the [oflazer96errortolerant] algorithm, 29 | instead of the [schulz02fast] faster algorithm. 30 | -t, --transitions-file=FILE the file that contain/will contain the OFlazer 31 | transitions. (only works if you specified that 32 | you want to use OFlazer algorithm. 33 | -v, --verbose verbose mode. It causes the program to print 34 | debugging messages about its progress. 35 | """ 36 | 37 | def generateIADFA(dictFilename, iadfaFilename): 38 | debug("loading IADFA code") 39 | from iadfa import IncrementalAdfa 40 | debug("Reading the dico file.") 41 | f = open(dictFilename, "r"); 42 | # words = [] 43 | # for line in f: 44 | # if line[-1] == "\n": 45 | # line = line[0:-1] 46 | # words.append(line) 47 | #words.sort() 48 | debug("Starting the FSA construction.") 49 | a = IncrementalAdfa(f, sorted = True) 50 | pickle.dump(a,open(iadfaFilename, "w")) 51 | debug("FSA saved.") 52 | return a 53 | 54 | 55 | 56 | def exportFileToPS(fsa, filename): 57 | debug("Exporting the fsa in a graphviz format.") 58 | fsa.graphVizExport(filename) 59 | 60 | 61 | def debug(output): 62 | if verbose is True: 63 | print output 64 | 65 | 66 | verbose = False 67 | config = {"transitionsFile" : "levenshtein.dat", 68 | "iadfaFile": "iadfa.dat", 69 | "dicoFile": None} 70 | 71 | 72 | 73 | def getTransitionStates(filename, distance): 74 | 75 | from possibleStates import genTransitions 76 | transitions = {} 77 | try: 78 | debug("Checking for existing transitions file") 79 | transitions = pickle.load(open(filename, "r")) 80 | debug("Transitions file loaded") 81 | except: 82 | pass 83 | 84 | distanceStr = str(distance) 85 | if transitions.has_key(distanceStr) is False: 86 | debug("Creating the dynamic levenshtein transitions") 87 | transitions[distanceStr] = genTransitions(distance) 88 | try: 89 | pickle.dump(transitions,open(filename, "w")) 90 | debug("The transitions are dumped.") 91 | except IOError, e: 92 | debug("Was unable to save the generated transitions: " + str(e)) 93 | 94 | return transitions[distanceStr] 95 | 96 | 97 | 98 | if __name__ == "__main__": 99 | try: 100 | opts, args = getopt.getopt(sys.argv[1:], "d:e:i:n:ot:vh", ["dico-file=", "export=", "iadfa=", "distance=", "oflazer", "transitions-file=", "verbose", "help"]) 101 | except getopt.GetoptError, e: 102 | # print help information and exit: 103 | print e 104 | usage() 105 | sys.exit(2) 106 | 107 | gen = False 108 | export = False 109 | useFastAlgo = True 110 | distance = None 111 | transitionsFile = config["transitionsFile"] 112 | iadfaFile = config["iadfaFile"] 113 | dicoFile = config["dicoFile"] 114 | for o, a in opts: 115 | if o in ("-d", "--dico-file"): 116 | dicoFile = a 117 | 118 | if o in ("-e", "--export"): 119 | export = True 120 | exportFilename = a 121 | 122 | if o in ("-i", "--iadfa"): 123 | iadfaFile = a 124 | 125 | if o in ("-n", "--distance"): 126 | try: 127 | distance = int(a) 128 | except: 129 | print "The distance given in argument is not a number" 130 | usage() 131 | sys.exit(2) 132 | 133 | if o in ("-o", "--oflazer"): 134 | useFastAlgo = False 135 | 136 | if o in ("-h", "--help"): 137 | usage() 138 | sys.exit(2) 139 | 140 | if o in ("-t", "--transitions-file"): 141 | transitionsFile = a 142 | 143 | if o in ("-v", "--verbose"): 144 | verbose = True 145 | 146 | if os.path.exists(iadfaFile): 147 | try: 148 | debug("Loading the FSA from IADFA file") 149 | a = pickle.load(open(iadfaFile, "r")) 150 | except: 151 | print "There's no valid IADFA file. You should Specify one, or " + \ 152 | "you should generate config's 'iadfaFile' file." 153 | sys.exit(2) 154 | else: 155 | if dicoFile is None: 156 | print "No dico file specified!" 157 | usage() 158 | sys.exit(2) 159 | 160 | a = generateIADFA(dicoFile, iadfaFile) 161 | 162 | if distance is not None: 163 | if distance <= 0: 164 | print "The distance cannot be negative." 165 | usage() 166 | sys.exit(2) 167 | 168 | if distance > 3: 169 | print "You cannot specify a distance greater than 3." 170 | usage() 171 | sys.exit(2) 172 | else: 173 | distance = 1 174 | 175 | if export is True: 176 | exportFileToPS(a, exportFilename) 177 | 178 | transitionStates = None 179 | if len(args) > 0: 180 | if useFastAlgo is True: 181 | debug("Using Schulz's algoritm") 182 | from fsc import ErrorTolerantRecognizer 183 | transitionStates = getTransitionStates(transitionsFile, distance) 184 | else: 185 | debug("Using Flazer's algorithm") 186 | from et import ErrorTolerantRecognizer 187 | 188 | etr = ErrorTolerantRecognizer(distance, transitionStates) 189 | for word in args: 190 | print "Starting the recognizer for " + word 191 | suggestions = etr.recognize(word,a) 192 | print "Suggestions for " + word 193 | for suggestion in suggestions: 194 | print " " + suggestion 195 | else: 196 | debug("No words to be checked.") 197 | -------------------------------------------------------------------------------- /finenight/python/fsc.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import types 3 | import pdb 4 | from functools import reduce 5 | 6 | 7 | class Position: 8 | def __init__(self, i, e, isTransposition): 9 | self.isTransposition = isTransposition 10 | self.i = i 11 | self.e = e 12 | 13 | def __str__(self): 14 | val = str((self.i, self.e)) 15 | if self.isTransposition: 16 | val = 't' + val 17 | return val 18 | 19 | def __repr__(self): 20 | return str(self) 21 | 22 | def __eq__(lhs, rhs): 23 | return lhs.i == rhs.i and \ 24 | lhs.e == rhs.e and \ 25 | lhs.isTransposition == rhs.isTransposition 26 | 27 | def __lt__(lhs, rhs): 28 | if lhs.i < rhs.i: 29 | return True 30 | 31 | if lhs.i >= rhs.i: 32 | return False 33 | 34 | # consider a standard position as lower than a transposition position 35 | if not lhs.isTransposition and rhs.isTransposition: 36 | return True 37 | 38 | if lhs.isTransposition != rhs.isTransposition: 39 | return False 40 | 41 | if lhs.e < rhs.e: 42 | return True 43 | 44 | if lhs.e > rhs.e: 45 | return False 46 | 47 | 48 | return False 49 | 50 | 51 | def StandardPosition(i, e): 52 | return Position(i, e, False) 53 | 54 | def TPosition(i, e): 55 | return Position(i, e, True) 56 | 57 | 58 | def isSubsumming(subsumming, subsummee, n): 59 | i, e = subsumming.i, subsumming.e 60 | j, f = subsummee.i, subsummee.e 61 | 62 | # true if i and j are t-positions 63 | it = subsumming.isTransposition 64 | jt = subsummee.isTransposition 65 | 66 | # see 7.1.3 67 | if it: 68 | if jt: 69 | # 4. position it^e subsumes jt^f iff f > e and i = j 70 | if it and jt and f > e and i == j: 71 | return True 72 | else: 73 | # 3. position it^e subsumes j^f iff n = f > e and i = j 74 | if n == f and f > e and i == j: 75 | return True 76 | 77 | else: 78 | if jt: 79 | # 2. position i^e subsumes jt^f iff f > e and |j-(i-1)| <= f - e 80 | if f > e and (abs(j - (i - 1)) <= (f - e)): 81 | return True 82 | else: 83 | # 1. position i^e subsumes j^f iff e < f and |j-i| <= f - e 84 | if e < f and (abs(j - i) <= (f - e )): 85 | return True 86 | 87 | return False 88 | 89 | 90 | 91 | def reduce(M, n): 92 | # Process entries by number of errors. 93 | # This is because a entry can only be subsumed 94 | # by entries with less errors. 95 | items = {} 96 | for entry in M: 97 | l = items.setdefault(entry.e, []) 98 | item = [entry, False] 99 | if item not in l: 100 | l.append(item) 101 | 102 | keys = list(items.keys()) 103 | keys.sort() 104 | for eIndex in range(len(keys)): 105 | e = keys[eIndex] 106 | for item in items[e]: 107 | if item[1] is False: 108 | pos = item[0] 109 | for f in keys[eIndex + 1:]: 110 | for jIndex in range(len(items[f])): 111 | otherItem = items[f][jIndex] 112 | if otherItem[1] is False: 113 | otherPos = otherItem[0] 114 | if isSubsumming(pos, otherPos, n): 115 | otherItem[1] = True 116 | items[e] = [j for j in items[e] if not j[1]] 117 | union = [] 118 | for key in items: 119 | for item in items[key]: 120 | union.append(item[0]) 121 | union.sort() 122 | 123 | return union 124 | 125 | 126 | def subword(input, n, i, e): 127 | w = len(input) 128 | k = min(2*n - e + 1, w - i) 129 | return input[i:i + k] 130 | 131 | 132 | def positiveK(cString): 133 | i = 0 134 | while i < len(cString) and cString[i] != 1: 135 | i = i + 1 136 | if i == len(cString): 137 | return None 138 | else: 139 | return i + 1 140 | 141 | 142 | def union(M, N, n): 143 | if type(M) is not list: 144 | M = [M] 145 | if type(N) is not list: 146 | N = [N] 147 | 148 | return reduce(M + N, n) 149 | 150 | 151 | def profil( inputWord ): 152 | characters = {} 153 | cVector = [] 154 | currentSymbol = 1 155 | for c in inputWord: 156 | if c not in characters: 157 | characters[c] = currentSymbol 158 | currentSymbol += 1 159 | cVector.append(characters[c]) 160 | return cVector 161 | 162 | 163 | def characterizedVector( character, inputWord ): 164 | cVector = [] 165 | for c in inputWord: 166 | if c == character: 167 | cVector.append(1) 168 | else: 169 | cVector.append(0) 170 | return cVector 171 | 172 | 173 | def genCharVectors(l): 174 | vectors = [[0] * l] 175 | for i in range(pow(2, l) - 1): 176 | vectors.append(addone(vectors[-1])) 177 | return vectors 178 | 179 | 180 | def addone(vec): 181 | if len(vec) == 0: 182 | return [] 183 | if vec[0] == 0: 184 | return [1] + vec[1:] 185 | 186 | if vec[0] == 1: 187 | return [0] + addone(vec[1:]) 188 | 189 | 190 | 191 | 192 | def isLikeStates(state, lowerStates): 193 | 194 | isLike = False 195 | i = 0 196 | while i < len(lowerStates) and isLike == False: 197 | lowerState = lowerStates[i] 198 | state = copy.copy(state) 199 | state.sort() 200 | 201 | difference = state[0][0] - lowerState[0][0] 202 | for index in range(len(state)): 203 | state[index] = state[index][0] - difference, state[index][1] 204 | 205 | if state == lowerState: 206 | isLike = True 207 | 208 | i += 1 209 | 210 | return isLike 211 | 212 | 213 | def delta( n, stateTypeAndIndex, character, input, states ): 214 | (stateType, index) = stateTypeAndIndex 215 | cv = characterizedVector( character, input )[:(2 * n + 1)] 216 | l = len(cv) 217 | w = states[l] 218 | cv = str(cv) 219 | 220 | state = None 221 | if cv in w and str(stateType) in w[cv]: 222 | state = w[cv][str(stateType)] 223 | state = (state[0], state[1] + index) 224 | return state 225 | 226 | 227 | def final(n, state, index, wordLen): 228 | isFinal = False 229 | 230 | j = 0 231 | while j < len(state) and isFinal == False: 232 | i = state[j].i + index 233 | e = state[j].e 234 | if wordLen - i + e <= n: 235 | isFinal = True 236 | j += 1 237 | return isFinal 238 | 239 | 240 | class ErrorTolerantRecognizer: 241 | def __init__(self, n, transitionsStates = None): 242 | if transitionsStates is None: 243 | transitionsStates = handCraftedStates 244 | self.transitionsStates = transitionsStates 245 | self.n = n 246 | 247 | def recognize( self, word, fsa): 248 | words = [] 249 | wordLen = len(word) 250 | 251 | states = [("", fsa.startState, (str([(0,0)]), 0))] 252 | while len(states): 253 | (V, q, M) = states.pop() 254 | for (x, q1) in list(fsa.states[q].transitions.items()): 255 | mPrime = delta( self.n, M, x, word[M[1]:], self.transitionsStates ) 256 | if mPrime[0] != []: 257 | V1 = V + x 258 | states.append((V1, q1, mPrime)) 259 | if q in fsa.finalStates and final(self.n, M[0], M[1], wordLen): 260 | words.append(V) 261 | return words 262 | 263 | 264 | -------------------------------------------------------------------------------- /finenight/lisp/fsa-builder.lisp: -------------------------------------------------------------------------------- 1 | (in-package :com.rrette.finenight.fsa-builder) 2 | 3 | ;; initial-state speak of itself. 4 | ;; final-states is a list of nodes considered as final 5 | ;; transitions is a list of 3-tuple. (source-node input-symbol destination-node) 6 | (defstruct (fsa-builder (:copier nil)) 7 | (initial-state 0) 8 | (nodes (make-hash-table)) 9 | (finals '())) 10 | 11 | 12 | (defun fsa-edges (fsa) 13 | (labels ((E (nodes) 14 | (if (null nodes) 15 | '() 16 | (append (node-edges (car nodes)) 17 | (E (cdr nodes)))))) 18 | (E (hash-values (fsa-builder-nodes fsa))))) 19 | 20 | 21 | 22 | ;; (define fsa-node-ancestrors 23 | ;; (lambda (fsa label) 24 | ;; (hash-table-ref/default (fsa-ancestrors-nodes fsa) 25 | ;; label 26 | ;; '()))) 27 | 28 | ;; (define fsa-remove-ancestror! 29 | ;; (lambda (fsa node) 30 | ;; (map (lambda (child) 31 | ;; (hash-table-update!/default (fsa-ancestrors-nodes fsa) 32 | ;; (node-label child) 33 | ;; (lambda (lst) 34 | ;; (delete! node lst)) 35 | ;; '())) 36 | ;; (node-destinations node)))) 37 | 38 | (defun fsa-add-edge! (fsa src-label input-symbol dst-label) 39 | (let ((src-node (hash-table-update!/default (lambda (x) x) src-label (fsa-builder-nodes fsa) (make-empty-node src-label))) 40 | (dst-node (hash-table-update!/default (lambda (x) x) dst-label (fsa-builder-nodes fsa) (make-empty-node dst-label)))) 41 | (node-add-edge! src-node input-symbol dst-node) 42 | fsa)) 43 | 44 | (defun fsa-remove-node! (fsa node) 45 | (let* ((label (node-label node))) 46 | (remhash label (fsa-builder-nodes fsa)) 47 | (setf (fsa-builder-finals fsa) (delete node (fsa-builder-finals fsa))) 48 | fsa)) 49 | 50 | (defun fsa-remove-edge! (fsa src-label input-symbol dst-label) 51 | (let ((src-node (hash-table-ref/default (fsa-builder-nodes fsa) src-label nil)) 52 | (dst-node (hash-table-ref/default (fsa-builder-nodes fsa) dst-label nil))) 53 | (if (and src-node dst-node) 54 | (node-remove-edge! src-node input-symbol dst-node)) 55 | fsa)) 56 | 57 | (defun build-fsa (initial-label edges finals) 58 | (let ((fsa (reduce #'(lambda (edge fsa) 59 | (fsa-add-edge! fsa (car edge) (cadr edge) (caddr edge))) 60 | edges 61 | :initial-value (make-fsa-builder :initial-state initial-label)))) 62 | (reduce #'(lambda (final fsa) 63 | (fsa-add-final! fsa final)) 64 | finals 65 | :initial-value fsa))) 66 | 67 | (defun make-empty-fsa-builder (initial-label) 68 | (let ((fsa (make-fsa-builder :initial-state initial-label))) 69 | (hash-table-update!/default (lambda (x) x) initial-label (fsa-builder-nodes fsa) (make-empty-node initial-label)) 70 | fsa)) 71 | 72 | (defun get-node (fsa node-label) 73 | (gethash node-label (fsa-builder-nodes fsa))) 74 | 75 | ;(define get-node 76 | ; (lambda (fsa node-label) 77 | ; (my-hash-table-get! (fsa-nodes fsa) node-label (lambda () (make-empty-node node-label))))) 78 | 79 | 80 | (defun get-state (fsa label) 81 | (node-label (get-node fsa label))) 82 | 83 | ;; (define build-fsa 84 | ;; (lambda (alphabet initial-states final-states edges) 85 | ;; (let* ((node-map (make-hash-table)) 86 | ;; (get-node 87 | ;; (lambda (node) 88 | ;; (hash-table-ref node-map 89 | ;; node 90 | ;; (make-node node (make-hash-table) #f))))) 91 | ;; (letrec ((update-final-nodes 92 | ;; (lambda (nodes) 93 | ;; (if (null? nodes) 94 | ;; #f 95 | ;; (set! (node-final (get-node (car nodes))) #t) 96 | ;; (update-final-nodes (cdr nodes))))) 97 | ;; (B (lambda (edges) 98 | ;; (if (null? edges) 99 | ;; ;; 100 | ;; (let* ((edge (car edges)) 101 | ;; (src-node (get-node (source-node edge))) 102 | ;; (dst-node (get-node (destination-node edge)))) 103 | ;; (node-add-edge! src-node 104 | ;; (input-symbol edge) 105 | ;; dst-node)))) 106 | ;; (B (cdr edges)))) 107 | ;; (B edges) 108 | ;; (make-fsa alphabet 109 | ;; (get-node initial-state) 110 | 111 | 112 | ;; this function returns a list of destination nodes 113 | ;; for a given source node and an input symbol 114 | ;; (define transition 115 | ;; (lambda (fsa node input) 116 | ;; (letrec 117 | ;; ((T (lambda (edges) 118 | ;; (if (null? edges) 119 | ;; '() 120 | ;; (let ((edge (car edges))) 121 | ;; (if (and (eq? (source-node edge) node) 122 | ;; (eq? (input-symbol edge) input)) 123 | ;; (cons (destination-node edge) 124 | ;; (T (cdr edges))) 125 | ;; (T (cdr edges)))))))) 126 | ;; (T (edges fsa))))) 127 | 128 | ;; this function returns true if the node is 129 | ;; part of the final states. 130 | (defun final? (node) 131 | (node-final node)) 132 | 133 | 134 | (defun fsa-add-final! (fsa node-label) 135 | (fsa-add-final-node! fsa (get-node fsa node-label))) 136 | 137 | (defun fsa-add-final-node! (fsa node) 138 | (setf (fsa-builder-finals fsa) (append (fsa-builder-finals fsa) (list node))) 139 | (setf (node-final node) t) 140 | fsa) 141 | 142 | (defun fsa-add-node! (fsa node) 143 | (if (node-final node) 144 | (fsa-add-final-node! fsa node)) 145 | (hash-table-update! (node-label node) 146 | (fsa-builder-nodes fsa) 147 | n 148 | (declare (ignore n)) 149 | node)) 150 | 151 | (defun graphviz-export (fsa) 152 | (graphviz-export-to-file fsa "test.dot")) 153 | 154 | (defun graphviz-export-to-file (fsa file) 155 | "This function will write the dot description of the FSA in the stream." 156 | (let ((p (open file :direction :output :if-exists :supersede))) 157 | (format p "digraph G {~% rankdir = LR;~% size = \"8, 10\";~%") 158 | (if (not (null (fsa-builder-finals fsa))) 159 | (progn 160 | (format p "~% node [shape = doublecircle];~% ") 161 | (dolist (x (fsa-builder-finals fsa)) 162 | (format p " \"~A\"" (node-label x))) 163 | (format p ";"))) 164 | (format p "~%~% node [shape = circle];~% ") 165 | (dolist (label (hash-keys (fsa-builder-nodes fsa))) 166 | (format p " \"~A\"" label)) 167 | (format p ";~%~%") 168 | (dolist (node (hash-values (fsa-builder-nodes fsa))) 169 | (dolist (edge (node-edges node)) 170 | (format p 171 | " \"~A\" -> \"~A\" [label = \"~A\"];~%" 172 | (car edge) 173 | (caddr edge) 174 | (if (null (cadr edge)) 175 | "epsilon" 176 | (cadr edge))))) 177 | (format p "}~%") 178 | (close p) 179 | fsa)) 180 | 181 | 182 | 183 | 184 | (defun fsa-initial-node (fsa) 185 | (get-node fsa (fsa-builder-initial-state fsa))) 186 | 187 | 188 | (defun make-fsa-builder-from-fsa (fsa) 189 | (let ((fsa-builder (make-empty-fsa-builder (node-label (fsa-start-node fsa)))) 190 | (nodes (list (fsa-start-node fsa)))) 191 | (labels ((retreive-nodes (n) 192 | (if (null n) 193 | (build-fsa-builder-with-nodes) 194 | (progn 195 | (setf nodes (cons (car n) nodes)) 196 | (retreive-nodes (append (cdr n) (set-difference (node-destinations (car n)) 197 | nodes 198 | :test #'eq)))))) 199 | (build-fsa-builder-with-nodes () 200 | (dolist (node nodes) 201 | (fsa-add-node! fsa-builder node)))) 202 | (retreive-nodes nodes)) 203 | fsa-builder)) 204 | 205 | 206 | (defun fsa-builder-accept? (fsa-builder word) 207 | (labels ((T (node word) 208 | (if (null word) 209 | (node-final node) 210 | (let ((nodes (node-transition node (car word)))) 211 | (if (null nodes) 212 | nil 213 | (T (car nodes) (cdr word))))))) 214 | (T (fsa-initial-node fsa-builder) word))) 215 | 216 | -------------------------------------------------------------------------------- /finenight/scheme/fsa-builder.scm: -------------------------------------------------------------------------------- 1 | ;(define-extension fsa-builder) 2 | 3 | ;(require-extension fsa) 4 | 5 | ;; initial-state speak of itself. 6 | ;; final-states is a list of nodes considered as final 7 | ;; transitions is a list of 3-tuple. (source-node input-symbol destination-node) 8 | (define-record fsa-builder initial-state nodes finals) 9 | 10 | (define-record-printer (fsa-builder x out) 11 | (fprintf out "(fsa ~S ~S ~S)" 12 | (fsa-builder-initial-state x) (fsa-builder-finals x) (hash-table->alist (fsa-builder-nodes x)))) 13 | 14 | (define make-fsa-builder-from-fsa 15 | (lambda (fsa) 16 | (let ([fsa-builder (make-empty-fsa-builder (node-label (fsa-start-node fsa)))] 17 | [nodes (list (fsa-start-node fsa))]) 18 | (letrec ([retreive-nodes (lambda (n) 19 | (if (null? n) 20 | (build-fsa-builder-with-nodes) 21 | (begin 22 | (set! nodes (cons (car n) nodes)) 23 | (retreive-nodes (append (cdr n) (lset-difference eq? 24 | (node-destinations (car n)) 25 | nodes))))))] 26 | [build-fsa-builder-with-nodes 27 | (lambda () 28 | (for-each (lambda (node) 29 | (fsa-add-node! fsa-builder node)) 30 | nodes))]) 31 | (retreive-nodes nodes)) 32 | fsa-builder))) 33 | 34 | 35 | 36 | (define fsa-initial-node 37 | (lambda (fsa) 38 | (get-node fsa (fsa-builder-initial-state fsa)))) 39 | 40 | 41 | (define fsa-edges 42 | (lambda (fsa) 43 | (letrec ((E (lambda (nodes) 44 | (if (null? nodes) 45 | '() 46 | (append (node-edges (car nodes)) 47 | (E (cdr nodes))))))) 48 | (E (hash-table-values (fsa-builder-nodes fsa)))))) 49 | 50 | ;; (define node-is-equivalent 51 | ;; (lambda (lhs rhs) 52 | ;; (letrec ((edges-are-equivalent 53 | ;; (lambda (lhs-edges rhs-edges) 54 | ;; (cond ((null? lhs-edges) #t) 55 | ;; ((not (member (car lhs-edges) rhs-edges)) #f) 56 | ;; (else (edges-are-equivalent (cdr lhs-edges) rhs-edges)))))) 57 | ;; (if (not (equal? (node-final lhs) (node-final rhs))) 58 | ;; #f 59 | ;; (let ((lhs-edges (node-edges2 lhs)) 60 | ;; (rhs-edges (node-edges2 rhs))) 61 | ;; (cond ((not (equal? (length lhs-edges) (length rhs-edges))) #f) 62 | ;; (else (edges-are-equivalent lhs-edges rhs-edges)))))))) 63 | 64 | 65 | (define map-equal? 66 | (lambda (lhs rhs) 67 | (and (eq? (hash-table-size lhs) (hash-table-size rhs)) 68 | (equal? lhs rhs)))) 69 | 70 | 71 | 72 | ;; (define fsa-node-ancestrors 73 | ;; (lambda (fsa label) 74 | ;; (hash-table-ref/default (fsa-ancestrors-nodes fsa) 75 | ;; label 76 | ;; '()))) 77 | 78 | ;; (define fsa-remove-ancestror! 79 | ;; (lambda (fsa node) 80 | ;; (map (lambda (child) 81 | ;; (hash-table-update!/default (fsa-ancestrors-nodes fsa) 82 | ;; (node-label child) 83 | ;; (lambda (lst) 84 | ;; (delete! node lst)) 85 | ;; '())) 86 | ;; (node-destinations node)))) 87 | 88 | (define fsa-add-edge! 89 | (lambda (fsa src-label input-symbol dst-label) 90 | (let ((src-node (hash-table-update!/default (fsa-builder-nodes fsa) src-label (lambda (x) x) (make-empty-node src-label))) 91 | (dst-node (hash-table-update!/default (fsa-builder-nodes fsa) dst-label (lambda (x) x) (make-empty-node dst-label)))) 92 | (node-add-edge! src-node input-symbol dst-node) 93 | fsa))) 94 | 95 | (define fsa-remove-node! 96 | (lambda (fsa node) 97 | (let* ((label (node-label node))) 98 | (hash-table-delete! (fsa-builder-nodes fsa) label) 99 | (fsa-builder-finals-set! fsa (delete! node (fsa-builder-finals fsa))) 100 | fsa))) 101 | 102 | (define fsa-remove-edge! 103 | (lambda (fsa src-label input-symbol dst-label) 104 | (let ((src-node (hash-table-ref/default (fsa-builder-nodes fsa) src-label #f)) 105 | (dst-node (hash-table-ref/default (fsa-builder-nodes fsa) dst-label #f))) 106 | (if (and src-node dst-node) 107 | (node-remove-edge! src-node input-symbol dst-node)) 108 | fsa))) 109 | 110 | (define build-fsa 111 | (lambda (initial-label edges finals) 112 | (let ((fsa (fold (lambda (edge fsa) 113 | (fsa-add-edge! fsa (car edge) (cadr edge) (caddr edge))) 114 | (make-empty-fsa-builder initial-label) 115 | edges))) 116 | (fold (lambda (final fsa) 117 | (fsa-add-final! fsa final)) 118 | fsa 119 | finals)))) 120 | 121 | (define make-empty-fsa-builder 122 | (lambda (initial-label) 123 | (let ((fsa (make-fsa-builder initial-label (make-hash-table) (list)))) 124 | (hash-table-update!/default (fsa-builder-nodes fsa) initial-label (lambda (x) x) (make-empty-node initial-label)) 125 | fsa))) 126 | 127 | (define get-node 128 | (lambda (fsa node-label) 129 | (hash-table-ref/default (fsa-builder-nodes fsa) node-label #f))) 130 | 131 | ;(define get-node 132 | ; (lambda (fsa node-label) 133 | ; (my-hash-table-get! (fsa-nodes fsa) node-label (lambda () (make-empty-node node-label))))) 134 | 135 | 136 | (define get-state 137 | (lambda (fsa label) 138 | (node-label (get-node fsa label)))) 139 | 140 | ;; (define build-fsa 141 | ;; (lambda (alphabet initial-states final-states edges) 142 | ;; (let* ((node-map (make-hash-table)) 143 | ;; (get-node 144 | ;; (lambda (node) 145 | ;; (hash-table-ref node-map 146 | ;; node 147 | ;; (make-node node (make-hash-table) #f))))) 148 | ;; (letrec ((update-final-nodes 149 | ;; (lambda (nodes) 150 | ;; (if (null? nodes) 151 | ;; #f 152 | ;; (set! (node-final (get-node (car nodes))) #t) 153 | ;; (update-final-nodes (cdr nodes))))) 154 | ;; (B (lambda (edges) 155 | ;; (if (null? edges) 156 | ;; ;; 157 | ;; (let* ((edge (car edges)) 158 | ;; (src-node (get-node (source-node edge))) 159 | ;; (dst-node (get-node (destination-node edge)))) 160 | ;; (node-add-edge! src-node 161 | ;; (input-symbol edge) 162 | ;; dst-node)))) 163 | ;; (B (cdr edges)))) 164 | ;; (B edges) 165 | ;; (make-fsa alphabet 166 | ;; (get-node initial-state) 167 | 168 | 169 | ;; this function returns a list of destination nodes 170 | ;; for a given source node and an input symbol 171 | ;; (define transition 172 | ;; (lambda (fsa node input) 173 | ;; (letrec 174 | ;; ((T (lambda (edges) 175 | ;; (if (null? edges) 176 | ;; '() 177 | ;; (let ((edge (car edges))) 178 | ;; (if (and (eq? (source-node edge) node) 179 | ;; (eq? (input-symbol edge) input)) 180 | ;; (cons (destination-node edge) 181 | ;; (T (cdr edges))) 182 | ;; (T (cdr edges)))))))) 183 | ;; (T (edges fsa))))) 184 | 185 | ;; this function returns true if the node is 186 | ;; part of the final states. 187 | (define final? 188 | (lambda (node) 189 | (node-final node))) 190 | 191 | 192 | (define fsa-add-final! 193 | (lambda (fsa node-label) 194 | (fsa-add-final-node! fsa (get-node fsa node-label)))) 195 | 196 | (define fsa-add-final-node! 197 | (lambda (fsa node) 198 | (fsa-builder-finals-set! fsa (append (fsa-builder-finals fsa) (list node))) 199 | (node-final-set! node #t) 200 | fsa)) 201 | 202 | (define fsa-add-node! 203 | (lambda (fsa node) 204 | (if (node-final node) 205 | (fsa-add-final-node! fsa node)) 206 | (hash-table-update!/default (fsa-builder-nodes fsa) (node-label node) (lambda (n) node) node))) 207 | 208 | (define graphviz-export 209 | (lambda (fsa) 210 | (graphviz-export-to-file fsa "test.dot"))) 211 | 212 | (define graphviz-export-to-file 213 | (lambda (fsa file) 214 | "This function will write the dot description of the FSA in the stream." 215 | (let ((p (open-output-file file))) 216 | (display (format "digraph G {~% rankdir = LR;~% size = \"8, 10\";~%") 217 | p) 218 | ;(display (format " rotate = 90;~%") 219 | ; p) 220 | (if (not (null? (fsa-builder-finals fsa))) 221 | (begin 222 | (display (format "~% node [shape = doublecircle];~% ") 223 | p) 224 | (map (lambda (x) 225 | (display (format " \"~A\"" (node-label x)) 226 | p)) 227 | (fsa-builder-finals fsa)) 228 | (display ";"))) 229 | (display (format "~%~% node [shape = circle];~% ") 230 | p) 231 | (map (lambda (label) 232 | (display (format " \"~A\"" label) 233 | p)) 234 | (hash-table-keys (fsa-builder-nodes fsa))) 235 | (display (format ";~%~%") 236 | p) 237 | (map (lambda (node) 238 | (map (lambda (edge) 239 | (display (format " \"~A\" -> \"~A\" [label = \"~A\"];~%" 240 | (car edge) 241 | (caddr edge) 242 | (if (null? (cadr edge)) 243 | "epsilon" 244 | (cadr edge))) 245 | p)) 246 | (node-edges node))) 247 | (hash-table-values (fsa-builder-nodes fsa))) 248 | (display (format "}~%") 249 | p) 250 | (close-output-port p) 251 | fsa))) 252 | 253 | 254 | (define fsa-builder-accept? 255 | (lambda (fsa-builder word) 256 | (letrec ((T (lambda (node word) 257 | (if (null? word) 258 | (node-final node) 259 | (let ((nodes (node-transition node (car word)))) 260 | (if (null? nodes) 261 | #f 262 | (T (car nodes) (cdr word)))))))) 263 | (T (fsa-initial-node fsa-builder) word)))) 264 | -------------------------------------------------------------------------------- /finenight/scheme/iadfa.scm: -------------------------------------------------------------------------------- 1 | ;(define-extension iadfa) 2 | 3 | (require-extension srfi-1) 4 | (include "utils-scm.scm") 5 | (include "fsa.scm") 6 | 7 | (define-record iadfa 8 | register 9 | index ;; this is used for automatic node name generation 10 | fsa 11 | final) 12 | 13 | ;; This will return the node's last child added. 14 | (define last-child 15 | (lambda (node) 16 | (let ((lst-node (node-transition node (last-input node)))) 17 | (car lst-node)))) 18 | 19 | (define last-child-for-input 20 | (lambda (node input) 21 | (let ((lst-node (node-transition node input))) 22 | (car lst-node)))) 23 | 24 | ;; This returns the last node's symbol (alphabetical order) 25 | ;; 26 | ;; We rely on the fact that hash-table-keys returns a reversed 27 | ;; sorted list of the keys. 28 | (define last-input 29 | (lambda (node) 30 | (car (hash-table-keys (node-symbols-map node))))) 31 | 32 | (define has-children? 33 | (lambda (node) 34 | (> (hash-table-size (node-symbols-map node)) 35 | 0))) 36 | 37 | (define common-prefix 38 | (lambda (word node prefix) 39 | (if (eq? 0 (length word)) 40 | (cons node prefix) 41 | (let ((next-node (node-transition node (car word)))) 42 | (if (null? next-node) 43 | (cons node prefix) 44 | (common-prefix (cdr word) 45 | (car next-node) 46 | (append prefix (list (car word))))))))) 47 | 48 | (define marked-as-registered 49 | (lambda (iadfa state) 50 | (hash-table-exists? (iadfa-register iadfa) state))) 51 | 52 | (define iadfa-state-ancestrors 53 | (lambda (iadfa label) 54 | (let ((register (iadfa-register iadfa))) 55 | (if (hash-table-exists? register label) 56 | (hash-table-fold (hash-table-ref register label) 57 | (lambda (key nodes ancestrors) 58 | (append (map 59 | (lambda (node) 60 | (node-label node)) nodes) 61 | ancestrors)) 62 | '()) 63 | '())))) 64 | 65 | (define iadfa-state-ancestrors-for-input 66 | (lambda (iadfa label input) 67 | (let ((register (iadfa-register iadfa))) 68 | (if (hash-table-exists? register label) 69 | (map (lambda (node) 70 | (node-label node)) 71 | (hash-table-ref/default (hash-table-ref register label) 72 | input 73 | '())) 74 | '())))) 75 | 76 | (define iadfa-node-ancestrors 77 | (lambda (iadfa label input) 78 | (let ((register (iadfa-register iadfa))) 79 | (if (hash-table-exists? register label) 80 | (hash-table-ref/default (hash-table-ref register label) 81 | input 82 | '()) 83 | '())))) 84 | 85 | 86 | (define append-parent-to-registered 87 | (lambda (iadfa parent input child) 88 | (if (eq? 1 (hash-table-size (node-symbols-map parent))) 89 | (hash-table-update!/default (iadfa-register iadfa) 90 | (node-label child) 91 | (lambda (hash) 92 | (hash-table-update!/default hash 93 | input 94 | (lambda (lst) 95 | (cons parent lst)) 96 | '()) 97 | hash) 98 | (make-hash-table))))) 99 | 100 | 101 | (define delete-parent-to-registered 102 | (lambda (iadfa parent input child) 103 | (let ((register (iadfa-register iadfa))) 104 | (if (hash-table-exists? register (node-label child)) 105 | (hash-table-update! register 106 | (node-label child) 107 | (lambda (hash) 108 | (hash-table-update!/default hash 109 | input 110 | (lambda (lst) 111 | (delete! parent lst eq?)) 112 | '()) 113 | hash)))))) 114 | 115 | 116 | (define delete-parent-to-registered-childs 117 | (lambda (iadfa node) 118 | (let ((symbols-map (node-symbols-map node))) 119 | ; (if (eq? 1 (hash-table-size symbols-map)) 120 | (hash-table-walk 121 | symbols-map 122 | (lambda (symbol destinations) 123 | (fold (lambda (dst iadfa) 124 | (delete-parent-to-registered iadfa 125 | node 126 | symbol 127 | dst) 128 | iadfa) 129 | iadfa 130 | destinations)))))) 131 | 132 | (define mark-as-registered 133 | (lambda (iadfa parent child) 134 | (if (eq? (iadfa-final iadfa) #f) 135 | (iadfa-final-set! iadfa child)) 136 | (append-parent-to-registered iadfa 137 | parent 138 | (last-input parent) 139 | child))) 140 | 141 | 142 | (define generate-state 143 | (lambda (iadfa) 144 | (let ((name (iadfa-index iadfa))) 145 | (iadfa-index-set! iadfa (+ 1 (iadfa-index iadfa))) 146 | name))) 147 | 148 | 149 | (define build-iadfa 150 | (lambda () 151 | (make-iadfa (make-hash-table) 152 | 1 153 | (make-fsa (make-empty-node 0)) 154 | #f))) 155 | 156 | (define gen-iadfa 157 | (lambda (words) 158 | (let ((iadfa (fold (lambda (word iadfa) 159 | (handle-word iadfa (string->list word))) 160 | (build-iadfa) 161 | words))) 162 | (replace-or-register iadfa (fsa-start-node (iadfa-fsa iadfa)))))) 163 | 164 | (define gen-iadfa-from-file 165 | (lambda (file) 166 | (let ((iadfa (build-iadfa))) 167 | (for-each-line-in-file 168 | file 169 | (lambda (line) 170 | (display (format "~A ~%" line)) 171 | (handle-word iadfa 172 | (string->list line)))) 173 | (iadfa-fsa (replace-or-register iadfa (fsa-start-node (iadfa-fsa iadfa))))))) 174 | 175 | 176 | (define handle-word 177 | (lambda (iadfa word) 178 | (let* ((fsa (iadfa-fsa iadfa)) 179 | (common (common-prefix word (fsa-start-node fsa) '())) 180 | (common-prefix (cdr common)) 181 | (last-node (car common)) 182 | (current-suffix (list-tail word (length common-prefix)))) 183 | (if (has-children? last-node) 184 | (replace-or-register iadfa last-node)) 185 | (add-suffix last-node current-suffix iadfa) 186 | (delete-parent-to-registered-childs iadfa last-node) 187 | iadfa))) 188 | 189 | (define replace-or-register 190 | (lambda (iadfa node) 191 | (let* ((fsa (iadfa-fsa iadfa)) 192 | (child (last-child node))) 193 | (if (marked-as-registered iadfa child ) 194 | iadfa 195 | (let () 196 | (if (has-children? child) 197 | (replace-or-register iadfa child)) 198 | (handle-equivalent-states iadfa node child) 199 | iadfa))))) 200 | 201 | 202 | (define equivalent-registered-states 203 | (lambda (iadfa node) 204 | (if (has-children? node) 205 | (find-equivalent-states iadfa node) 206 | (find-equivalent-final-states iadfa node)))) 207 | 208 | (define find-equivalent-final-states 209 | (lambda (iadfa node) 210 | (iadfa-final iadfa))) 211 | 212 | 213 | (define find-equivalent-states 214 | (lambda (iadfa node) 215 | (let ((fsa (iadfa-fsa iadfa)) 216 | (input (last-input node))) 217 | (any (lambda (other) 218 | (if (and (not (eq? other node)) 219 | (eq? (node-final node) (node-final other))) 220 | other 221 | #f)) 222 | (iadfa-node-ancestrors iadfa 223 | (node-label (last-child-for-input node input)) 224 | input))))) 225 | 226 | 227 | (define handle-equivalent-states 228 | (lambda (iadfa node child) 229 | (let* ((fsa (iadfa-fsa iadfa)) 230 | (equivalent (equivalent-registered-states iadfa child))) 231 | (if equivalent 232 | (begin 233 | (replace-last-child node equivalent iadfa) 234 | (delete-branch iadfa child)) 235 | (mark-as-registered iadfa node child)) 236 | iadfa))) 237 | 238 | 239 | ;; (define replace-last-child 240 | ;; (lambda (node new-child iadfa) 241 | ;; (let* ((fsa (iadfa-fsa iadfa)) 242 | ;; (input (last-input node))) 243 | ;; (current-child (last-child node))) 244 | ;; (fsa-remove-edge! fsa (node-label node) input (node-label current-child)) 245 | ;; (fsa-add-edge! fsa (node-label node) input (node-label new-child)) 246 | ;; (append-parent-to-registered iadfa (node-label node) (node-label new-child))))) 247 | 248 | (define replace-last-child 249 | (lambda (node new-child iadfa) 250 | (let* ((fsa (iadfa-fsa iadfa)) 251 | (input (last-input node)) 252 | (current-child (last-child-for-input node input))) 253 | (node-remove-edge! node input current-child) 254 | (node-add-edge! node input new-child) 255 | (append-parent-to-registered iadfa node input new-child) 256 | node))) 257 | 258 | 259 | (define delete-branch 260 | (lambda (iadfa child) 261 | (let ((fsa (iadfa-fsa iadfa))) 262 | (if (has-children? child) 263 | (let ((input (last-input child))) 264 | (delete-parent-to-registered iadfa 265 | child 266 | input 267 | (last-child-for-input child input)))) 268 | iadfa))) 269 | 270 | (define add-suffix 271 | (lambda (node current-suffix iadfa) 272 | (let ((fsa (iadfa-fsa iadfa)) 273 | (last-node node)) 274 | (fold (lambda (input fsa) 275 | (let ((new-node (make-empty-node (generate-state iadfa)))) 276 | (node-add-edge! last-node input new-node) 277 | (set! last-node new-node) 278 | fsa)) 279 | (iadfa-fsa iadfa) 280 | current-suffix) 281 | (node-final-set! last-node #t) 282 | iadfa))) 283 | 284 | 285 | -------------------------------------------------------------------------------- /finenight/scheme/chicken.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (C) 2005 José Pablo Ezequiel "Pupeno" Fernández Silva 3 | # 4 | # This file is part of scons-chicken. 5 | # 6 | # scons-chicken is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. 7 | # scons-chicken is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 8 | # You should have received a copy of the GNU General Public License along with scons-chicken; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 9 | 10 | import SCons.Tool 11 | import os 12 | import os.path 13 | from SCons.Builder import Builder 14 | from SCons.Node.FS import File 15 | from string import strip, split 16 | 17 | def generate(env): 18 | env["CHICKEN"] = env.Detect("chicken") or "chicken" 19 | env["CHICKENPROFLAGS"] = "" 20 | env["CHICKENEXTFLAGS"] = "-dynamic -feature chicken-compile-shared -feature compiling-extension" 21 | env["CHICKENREPOSITORY"] = strip(os.popen("chicken-setup -repository").read()) + "/" 22 | 23 | def chickenProGenerator(source, target, env, for_signature): 24 | """ Generate the actions to compile a Chicken program. """ 25 | actions = [] 26 | for s, t in zip(source, target): 27 | actions.append("$CHICKEN %s -output-file %s $CHICKENPROFLAGS" % (s, t)) 28 | return actions 29 | 30 | env.ChickenPro = Builder(generator = chickenProGenerator, 31 | sufix = ".c", 32 | src_sufix = ".scm") 33 | 34 | def chickenExtGenerator(source, target, env, for_signature): 35 | """ Generate the actions to compile a Chicken extension. """ 36 | actions = [] 37 | for s, t in zip(source, target): 38 | actions.append("$CHICKEN %s -output-file %s $CHICKENEXTFLAGS" % (s, t)) 39 | return actions 40 | 41 | env.ChickenExt = Builder(generator = chickenExtGenerator, 42 | sufix = ".c", 43 | src_sufix = ".scm") 44 | 45 | def ChickenProgram(env, target, source = None, *args, **kw): 46 | """Pseudo builder to make a Chicken program.""" 47 | 48 | # Check if we have Chiken instaled. 49 | conf = env.Configure() 50 | if not conf.CheckLibWithHeader("chicken", 51 | "chicken.h", 52 | "c", 53 | "C_alloc(C_SIZEOF_LIST(3));"): 54 | print "It seems you don't have Chicken installed or it is not" 55 | print "installed correctly. For more information:" 56 | print "http://www.call-with-current-continuation.org/" 57 | exit(1) 58 | env = conf.Finish() 59 | 60 | # If no source provided, source is what is on target and target should be generated. 61 | if not source: 62 | source = target 63 | if isinstance(source, list): 64 | target = split(source[0], ".")[0] 65 | else: 66 | target = split(source, ".")[0] 67 | 68 | # Separate Scheme sources from the rest 69 | schemeSources, schemeAsCSources, otherSources = groupSources(source) 70 | 71 | # Compile Scheme sources into C using Chicken (for programs). 72 | env.ChickenPro(env, schemeAsCSources, schemeSources) 73 | 74 | # Add the needed compilation flags. They are added in this way because ParseConfig adds it to the environment and the same environment might be used for both, programs and extensions, and their cflags are conflicting. 75 | ccflags = strip(os.popen("csc -cflags").read()) 76 | if kw.has_key("CCFLAGS"): 77 | kw["CCFLAGS"] += ccflags 78 | else: 79 | kw["CCFLAGS"] = ccflags 80 | 81 | # Add the needed libraries. 82 | env.ParseConfig("csc -libs") 83 | 84 | return apply(env.Program, (target, schemeAsCSources + otherSources) + args, kw) 85 | 86 | def ChickenExtension(env, target, source = None, *args, **kw): 87 | """Pseudo builder to make a Chicken extension.""" 88 | 89 | # If no source provided, source is what is on target and target should be generated. 90 | if not source: 91 | source = target 92 | if isinstance(source, list): 93 | target = split(source[0], ".")[0] 94 | else: 95 | target = split(source, ".")[0] 96 | target = [target, target + ".setup"] 97 | 98 | # Separate Scheme sources from the rest 99 | schemeSources, schemeAsCSources, otherSources = groupSources(source) 100 | 101 | # Compile Scheme sources into C using Chicken (for programs). 102 | env.ChickenExt(env, schemeAsCSources, schemeSources) 103 | 104 | # Add the needed compilation flags. They are added in this way because ParseConfig adds it to the environment and the same environment might be used for both, programs and extensions, and their cflags are conflicting. 105 | ccflags = strip(os.popen("chicken-config -cflags").read()) 106 | if kw.has_key("CCFLAGS"): 107 | kw["CCFLAGS"] += ccflags 108 | else: 109 | kw["CCFLAGS"] = ccflags 110 | 111 | # Add the needed libraries. 112 | env.ParseConfig("chicken-config -libs") 113 | 114 | kw["SHLIBPREFIX"] = "" 115 | lib = apply(env.SharedLibrary, (target, schemeAsCSources + otherSources) + args, kw) 116 | 117 | if kw.has_key("DOCUMENTATION"): 118 | documentation = kw["DOCUMENTATION"] 119 | else: 120 | documentation = "" 121 | 122 | if kw.has_key("SYNTAX"): 123 | syntax = kw["SYNTAX"] 124 | else: 125 | syntax = False 126 | 127 | if kw.has_key("REQUIRES"): 128 | requires = kw["REQUIRES"] 129 | else: 130 | requires = [] 131 | 132 | # Generate the .setup file. 133 | setup = chickenSetup(os.path.splitext(str(lib[0]))[0] + ".setup", lib[0], documentation, syntax, requires) 134 | 135 | # Clean the .setup file when cleaning the library. 136 | env.Clean(lib, setup) 137 | 138 | return lib, setup 139 | 140 | # Attach the pseudo-Builders to the Environment so they can be called like a real Builder. 141 | env.ChickenProgram = ChickenProgram 142 | env.ChickenExtension = ChickenExtension 143 | 144 | def groupSources(sources): 145 | """ Separate the Scheme sources from the rest and generate the file names that the compiled-to-c sources are going to have. """ 146 | 147 | if not isinstance(sources, list): 148 | sources = [sources] 149 | 150 | # Lists for the names of the scheme sources and others respectively. 151 | schemeSources = [] 152 | schemeAsCSources = [] 153 | otherSources = [] 154 | 155 | # Separate sources into scheme, generated and other sources 156 | for s in sources: 157 | if os.path.splitext(s)[1] == ".scm": 158 | schemeSources.append(s) 159 | schemeAsCSources.append(os.path.splitext(s)[0]+".c") 160 | else: 161 | otherSources.append(s) 162 | 163 | return schemeSources, schemeAsCSources, otherSources 164 | 165 | def chickenSetup(setup, files, documentation = None, syntax = False, requires = None): 166 | """ This procedure works like a builder and it builds the .setup files. 167 | Parameters: 168 | 1. env (any way to fix this ?) 169 | 2. Name of the .setup file to generate. 170 | 3. Name or list of names of the .so files that will be linked from the setup file. 171 | Optional parameters: 172 | documentation = Where is the HTML documentation. 173 | syntax = Whether (true or false) this contain syntax extensions. 174 | requires = other or list of other required extensions.""" 175 | 176 | def makeLispList(head, items, prefix = ""): 177 | """ This procedure builds a string that resembles a Lisp list of strings. 178 | The first parameter is the header of the Lisp-like list. 179 | The second parameter is either a string or a list of strings that 180 | will form the Lisp-like list. 181 | Prefix is an optional parameter that will be prepended to each item 182 | on the list.""" 183 | 184 | def buildPath(item): 185 | """ Procedure that builds a path using the prefix and a string or 186 | File object.""" 187 | if isinstance(item, str): 188 | return prefix + item 189 | elif isinstance(item, list): 190 | return prefix + str(item[0]) 191 | elif isinstance(item, File): 192 | return prefix + item.name 193 | else: 194 | print "Type not recognized to build .setup file." 195 | return "" 196 | 197 | 198 | l = "(" + head 199 | 200 | if isinstance(items, list): 201 | for i in items: 202 | l += " \"" + buildPath(i) + "\" " 203 | else: 204 | l += " \"" + buildPath(items) + "\"" 205 | 206 | l += ")" 207 | return l 208 | 209 | # Open the list (a .setup is a list). 210 | content = "(" 211 | 212 | # Make a list of the sources, the .so files. All located on CHICKENREPOSITOR. 213 | content += makeLispList("files", files, env["CHICKENREPOSITORY"]) 214 | 215 | # Add the documentation. 216 | if documentation: 217 | content += "\n(documentation \"" + documentation + "\")" 218 | 219 | # Is this a syntax extension ? 220 | if syntax == True: 221 | content += "\n(syntax)" 222 | 223 | # What other extensions are necesary by this one ? 224 | if requires: 225 | # Make a list of extensions. 226 | content += "\n" + makeLispList("requires", requires) 227 | 228 | # Close the list. 229 | content += ")\n" 230 | 231 | # Write the list (being hold as a string on setup) to the file. 232 | setupFile = open(setup, "w") 233 | setupFile.write(content) 234 | setupFile.close() 235 | 236 | # Return an object representing the file for further handling. 237 | return env.File(setup) 238 | 239 | def findLibs(output, initialFlags = None): 240 | """ Parse the output of a config command, like chicken-config, and finds the libs and libpaths. """ 241 | flags = {"LIBPATH":[], 242 | "LIBS":[]} 243 | 244 | print output 245 | 246 | if initialFlags: 247 | flags.update(initialFlags) 248 | 249 | output = split(output) 250 | for item in output: 251 | if item[:2] == "-L": 252 | flags["LIBPATH"].append(item[2:]) 253 | elif item[:2] == "-l": 254 | flags["LIBS"].append(item[2:]) 255 | 256 | return flags 257 | 258 | def exists(env): 259 | return env.Detect(["chicken"]) 260 | -------------------------------------------------------------------------------- /finenight/lisp/CLUnit.lisp: -------------------------------------------------------------------------------- 1 | ;;;; -*- Mode: LISP; Syntax: Common-Lisp; Package: USER; Base:10 -*- 2 | ;;;; 3 | ;;;; Author: Frank A. Adrian 4 | ;;;; 5 | ;;;; Release history: 6 | ;;;; 20021126 - Release 1.3 7 | ;;;; 20021125 - Release 1.2a 8 | ;;;; 20021124 - Release 1.2 9 | ;;;; 20010605 - Release 1.1 10 | ;;;; 20010527 - Release 1.0 11 | ;;;; 12 | ;;;; Modification history: 13 | ;;;; 20021126 - Fixed compilation issues 14 | ;;;; 20021125 - Fixed :nconc-name issue for Corman Lisp 15 | ;;;; 20021124 - Fixed "AND error", switched from test object to structure 16 | ;;;; 20010605 - Added licensing text, compare-fn keyword. 17 | ;;;; 20010604 - Added :input-form and :output-form options, 18 | ;;;; failed-tests function 19 | ;;;; 20010524 - Code readied for public distribution. 20 | ;;;; 20010219 - Added list-* functions. 21 | ;;;; 20000614 - Added input-fn, output-fn. 22 | ;;;; 20000520 - Added categories. 23 | ;;;; 20000502 - Added deftest. 24 | ;;;; 20000428 - Initial Revision. 25 | ;;;; 26 | ;;;; Copyright (c) 2000-2002. Frank A. Adrian. All rights reserved. 27 | ;;;; 28 | ;;;; This library is free software; you can redistribute it and/or 29 | ;;;; modify it under the terms of the GNU Lesser General Public 30 | ;;;; License as published by the Free Software Foundation; either 31 | ;;;; version 2.1 of the License, or (at your option) any later version. 32 | ;;;; 33 | ;;;; This library is distributed in the hope that it will be useful, 34 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 35 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 36 | ;;;; Lesser General Public License for more details. 37 | ;;;; 38 | ;;;; You should have received a copy of the GNU Lesser General Public 39 | ;;;; License along with this library; if not, write to the Free Software 40 | ;;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 41 | ;;;; 42 | ;;;; The author also requests that any changes and/or improvents to the 43 | ;;;; code be shared with the author for use in subsequent releases. Author's 44 | ;;;; E-mail: fadrian@ancar.org. 45 | ;;;; 46 | ;;;; 47 | 48 | (defpackage :org.ancar.CLUnit 49 | (:use "COMMON-LISP") 50 | ;Kill the next form in Corman and Franz Lisps because their defpackage :documentation 51 | ;option is not present. 52 | #-(or :cormanlisp excl) 53 | (:documentation 54 | "This package contains a unit testing environment for Common Lisp. 55 | All tests are held in the system image. Each test has a name and 56 | a category. All tests in the system can be run, as can all tests 57 | in a given category. 58 | 59 | The tests are specified by a test function that is normally written 60 | so as to take no input and to return T if the test passes. Optionally, 61 | an input function and/or an output function can also be specified. 62 | If an input function is specified, the test function is applied to 63 | the return value(s) of the input function. If the output function 64 | is specified, then the return value(s) of the test function is 65 | compared (via #'eql) to the return value(s) of the output function 66 | to check if the test succeeded. 67 | 68 | The package provides several functions and a deftest macro that makes 69 | specifying a test simple: 70 | clear-tests: Remove all tests from the system. 71 | remove-test: Remove a test from the system by name. 72 | run-category: Run all tests from a given category. 73 | run-all-tests: Run all the tests in the system. 74 | list-categories: List the categories of tests in the system. 75 | list-tests: List all of the tests in the system. 76 | run-named-test: Run the test of the given name (mainly for 77 | debugging use after a given test has not 78 | passed). 79 | failed-tests: Return a list of all tests that failed during the 80 | last run-all-tests or run-category call. 81 | deftest: Define a test for the system.")) 82 | 83 | (in-package :org.ancar.CLUnit) 84 | (provide :org.ancar.CLUnit) 85 | 86 | (defconstant *not-categorized* "*UNCATEGORIZED*") 87 | (defun t-func () t) 88 | (defun nil-func () nil)` 89 | (defun equal-func (x y) (funcall (symbol-function 'equal) x y)) 90 | 91 | (defun print-test (test str depth) 92 | (declare (ignore depth)) 93 | (print-unreadable-object (test str :type t :identity t) 94 | (format str "~A/~A" (descr test) (category test)))) 95 | 96 | (defstruct (test (:conc-name nil) (:print-function print-test)) 97 | 98 | "Test holds information that enables test to be located and run. 99 | Slots: 100 | descr: Test name. 101 | category: Category test belongs to. 102 | test-fn: Function run for test - by default, a zero-input, 103 | boolean output function. T means the test succeeded. 104 | compare-fn: Function that compares test function output to the 105 | expected output. Takes 2 lists of values. 106 | input-fn: Function that provides input to the test. When this 107 | item is used, test-fn is applied to the values returned 108 | by this function. 109 | output-fn: Function that provides data that the output of test-fn 110 | is compared against." 111 | descr (category *not-categorized*) test-fn compare-fn input-fn output-fn) 112 | 113 | 114 | (defvar *all-tests* nil 115 | "Currently, this is a simple list of tests. If the number of tests 116 | starts becoming too large, this should probably turn into a hash-table 117 | of tests hashed on category name.") 118 | 119 | (defun clear-tests () 120 | "Remove all tests from the system." 121 | (setf *all-tests* nil)) 122 | 123 | (defun remove-test (test-name) 124 | "Remove the test with the given name." 125 | ;(format t "In remove-test~%") 126 | (setf *all-tests* 127 | (delete-if #'(lambda (i) (string-equal (descr i) test-name)) *all-tests*))) 128 | 129 | (defun run-unprotected (test) 130 | "Run a test. No protection against errors." 131 | (let* ((input-fn (input-fn test)) 132 | (output-fn (output-fn test)) 133 | (test-fn (test-fn test)) 134 | (has-specified-input-fn input-fn)) 135 | 136 | (unless input-fn (setf input-fn #'nil-func)) 137 | (unless output-fn (setf output-fn #'t-func)) 138 | (let ((test-input (multiple-value-list (funcall input-fn)))) 139 | ;(format t "~&Input: ~A~%" test-input) 140 | (let ((vals (multiple-value-list 141 | (if has-specified-input-fn 142 | (apply test-fn test-input) 143 | (funcall test-fn)))) 144 | (tvals (multiple-value-list (funcall output-fn)))) 145 | ;(format t "~&Test output: ~A~%Expected output: ~A~%" 146 | ; vals tvals) 147 | (funcall (compare-fn test) vals tvals))))) 148 | 149 | (defun run-protected (test) 150 | "Protect the test while running with ignore-errors." 151 | (let ((vals (multiple-value-list (ignore-errors (run-unprotected test))))) 152 | ;(format t "~&vals: ~A~%" vals) 153 | (unless (eq (car vals) t) 154 | (if (cadr vals) 155 | (format t "~&~A occurred in test ~S~%" 156 | (cadr vals) (descr test)) 157 | (format t "~&Output did not match expected output in test ~S~%" 158 | (descr test)))) 159 | vals)) 160 | 161 | (defun test-or-tests (count) 162 | "This is for Corman Lisp which does not handle ~[ quite correctly." 163 | (if (eq count 1) "test" "tests")) 164 | 165 | (defvar *failed-tests* nil 166 | "Holds the set of failed tests from last test run.") 167 | 168 | (defun failed-tests () 169 | "Return the set of tests that failed during the last test run" 170 | *failed-tests*) 171 | 172 | (defun run-tests (tests &optional (protected t)) 173 | "Run the set of tests passed in." 174 | (let ((passed-tests nil) 175 | (failed-tests nil)) 176 | (loop for test in tests do 177 | (let ((test-result (if protected 178 | (car (run-protected test)) 179 | (run-unprotected test)))) 180 | (if (eq test-result t) 181 | (push test passed-tests) 182 | (push test failed-tests)))) 183 | (setf *failed-tests* failed-tests) 184 | (let ((passed-count (length passed-tests)) 185 | (failed-count (length failed-tests))) 186 | (format t "~&~A ~A run; ~A ~A passed; ~A ~A failed.~%" 187 | (+ passed-count failed-count) (test-or-tests (+ passed-count failed-count)) 188 | passed-count (test-or-tests passed-count) 189 | failed-count (test-or-tests failed-count)) 190 | (values (null failed-tests) failed-count passed-count)))) 191 | 192 | (defun filter-tests (category) 193 | "Filter tests by category." 194 | (remove-if #'(lambda (test) ;(format t "~&~A~A~%" category (category test)) 195 | (not (string-equal category (category test)))) 196 | *all-tests*)) 197 | 198 | (defun run-category (category) 199 | "Run all the tests in a given category." 200 | (run-tests (filter-tests category))) 201 | 202 | (defun run-all-tests (&optional (protected t)) 203 | "Run all tests in the system." 204 | (run-tests *all-tests* protected)) 205 | 206 | (defmacro form-to-fn (form) 207 | "Return a function that will return the form when evaluated. 208 | Will be used when we add input-form and output-form parameters to 209 | deftest." 210 | `#'(lambda () ,form)) 211 | 212 | (defmacro deftest (description &key category 213 | test-fn 214 | (input-fn nil input-fn-present) 215 | (output-fn nil output-fn-present) 216 | (input-form nil input-form-present) 217 | (output-form nil output-form-present) 218 | compare-fn) 219 | 220 | "Use of :input-fn and :output-fn keywords override use of :input-form and 221 | :output-form keywords respectively." 222 | 223 | (let ((mia-args-gen (gensym)) 224 | (cat-gen (gensym)) 225 | (inst-gen (gensym)) 226 | (ifmfn `#'(lambda () ,input-form)) 227 | (ofmfn `#'(lambda () ,output-form)) 228 | (cf-gen (gensym)) 229 | (tf-gen (gensym))) 230 | `(let (,mia-args-gen 231 | (,cat-gen ,category) 232 | (,cf-gen ,compare-fn) 233 | (,tf-gen ,test-fn)) 234 | (push :descr ,mia-args-gen) (push ,description ,mia-args-gen) 235 | (when ,cat-gen 236 | (push :category ,mia-args-gen) (push ,cat-gen ,mia-args-gen)) 237 | (push :compare-fn ,mia-args-gen) (push (if ,cf-gen ,cf-gen #'equal) ,mia-args-gen) 238 | (push :test-fn ,mia-args-gen) (push (if ,tf-gen ,tf-gen #'t-func) ,mia-args-gen) 239 | (when (and ,output-form-present (not ,output-fn-present)) 240 | (push :output-fn ,mia-args-gen) (push ,ofmfn ,mia-args-gen)) 241 | (when ,output-fn-present 242 | (push :output-fn ,mia-args-gen) (push ,output-fn ,mia-args-gen)) 243 | (when (and ,input-form-present (not ,input-fn-present)) 244 | (push :input-fn ,mia-args-gen) (push ,ifmfn ,mia-args-gen)) 245 | (when ,input-fn-present 246 | (push :input-fn ,mia-args-gen) (push ,input-fn ,mia-args-gen)) 247 | (let ((,inst-gen (apply #'make-test (nreverse ,mia-args-gen)))) 248 | (remove-test (descr ,inst-gen)) 249 | (push ,inst-gen *all-tests*))))) 250 | 251 | (defun list-categories () 252 | "List all of the categories in the system." 253 | (let (cats) 254 | (loop for test in *all-tests* doing 255 | (setf cats (adjoin (category test) cats :test #'string-equal))) 256 | cats)) 257 | 258 | (defun list-tests (&optional category) 259 | "List the tets in the system / category." 260 | (let ((tests (if category (filter-tests category) *all-tests*))) 261 | (loop for test in tests collecting 262 | (concatenate 'string (descr test) "/" (category test))))) 263 | 264 | (defun run-named-test (name &optional protected) 265 | "Run the given test in either protected or unprotected mode." 266 | (let ((test (find name *all-tests* :key #'descr :test #'string-equal))) 267 | (when test 268 | (if protected 269 | (run-protected test) 270 | (run-unprotected test))))) 271 | 272 | (export '(run-category 273 | run-all-tests 274 | clear-tests 275 | remove-test 276 | deftest 277 | list-categories 278 | list-tests 279 | run-named-test 280 | failed-tests )) 281 | 282 | (in-package "COMMON-LISP-USER") 283 | (use-package :org.ancar.CLUnit) 284 | 285 | ;;; 286 | ;;; Self test... 287 | ;;; 288 | 289 | ;; tests basic test definition 290 | (load-time-value (progn 291 | 292 | (deftest "test1" :category "CLUnit-pass1" 293 | :test-fn #'(lambda () (eq (car '(a)) 'a))) 294 | 295 | ;; tests input-fn 296 | (deftest "test-2" :category "CLUnit-pass1" 297 | :input-fn #'(lambda () '(a)) 298 | :test-fn #'(lambda (x) (eq (car x) 'a))) 299 | 300 | ;; tests output-fn 301 | (deftest "test-3" :category "CLUnit-pass1" 302 | :input-fn #'(lambda () '(a)) 303 | :output-fn #'(lambda () 'a) 304 | :test-fn #'(lambda (x) (car x))) 305 | 306 | ;; tests remove-test, run-category, and multiple-values in test-fn and 307 | ;; output-fn 308 | (deftest "meta" :category "CLUnit-meta" 309 | :input-fn #'(lambda () (remove-test "test1")) 310 | :test-fn #'(lambda (x) (declare (ignore x)) (run-category "CLUnit-pass1")) 311 | :output-fn #'(lambda () (values t 0 2))) 312 | 313 | ;; tests multiple values from input-fn to test-fn 314 | (deftest "test1" :category "CLUnit-pass2" 315 | :input-fn #'(lambda () (values 'a '(b))) 316 | :test-fn #'cons 317 | :output-fn #'(lambda () '(a b))) 318 | 319 | ;;check error trapping 320 | (deftest "meta2" :category "CLUnit-meta" 321 | :input-fn 322 | #'(lambda () (deftest "Error test" :category "CLUnit-pass3" 323 | :test-fn #'(lambda () 324 | (remove-test "Error test") (error "Dummy error")))) 325 | :test-fn #'(lambda (x) (declare (ignore x)) (run-category "CLUnit-pass3")) 326 | :output-fn #'(lambda () (values nil 1 0))) 327 | 328 | ;;check input-form 329 | (deftest "testx" :category "CLUnit" 330 | :input-form '(a b c) 331 | :test-fn #'car 332 | :output-fn #'(lambda () 'a)) 333 | 334 | ;;check output form 335 | (deftest "testx2" :category "CLUnit" 336 | :input-form '(a b c) 337 | :test-fn #'car 338 | :output-form 'a) 339 | 340 | ;;check multiple input-forms 341 | (deftest "testx3" :category "CLUnit" 342 | :input-form (values '(1 2 3) '(10 20 30)) 343 | :test-fn #'(lambda (&rest lists) (car lists)) 344 | :output-fn #'(lambda () '(1 2 3))) 345 | 346 | ;;check multiple output-forms 347 | (deftest "testx4" :category "CLUnit" 348 | :input-form (values '(1 2 3) '(10 20 30)) 349 | :test-fn #'(lambda (&rest lists) (apply #'values lists)) 350 | :output-fn #'(lambda () (values '(1 2 3) '(10 20 30)))) 351 | 352 | ;;check failed-tests 353 | (deftest "meta5" :category "CLUnit-meta" 354 | :input-fn 355 | #'(lambda () (deftest "Error test" :category "CLUnit-pass4" 356 | :test-fn #'(lambda () 357 | (remove-test "Error test") (error "Dummy error")))) 358 | :test-fn #'(lambda (x) (declare (ignore x)) 359 | (run-category "CLUnit-pass4") 360 | (values (length (failed-tests)) (org.ancar.CLUnit::descr (car (failed-tests))))) 361 | :output-fn #'(lambda () (values 1 "Error test"))) 362 | 363 | (deftest "Test compare-fn" 364 | :test-fn #'(lambda () "abc") 365 | :output-form "abc" 366 | :compare-fn #'(lambda (rlist1 rlist2) 367 | (not (null (reduce #'(lambda (x y) (and x y)) 368 | (mapcar #'string-equal rlist1 rlist2) :initial-value t))))) 369 | 370 | ;;; run self test 371 | (when (run-all-tests) 372 | (format t "~&CLUnit self-test passed.~%") 373 | (clear-tests) 374 | (values)))) 375 | -------------------------------------------------------------------------------- /finenight/lisp/iadfa.lisp: -------------------------------------------------------------------------------- 1 | ;(declaim (optimize (speed 3) (space 3) (debug 0))) 2 | ;(declaim (optimize (speed 0) (space 0) (debug 3))) 3 | 4 | (in-package :com.rrette.finenight.iadfa) 5 | 6 | (defstruct iadfa 7 | (ancestrors (make-array 1000000 :initial-element nil :fill-pointer 0)) 8 | (parent-arities (make-array 1000000 :fill-pointer 0)) 9 | (index 0) ;; this is used for automatic node name generation 10 | (unused-nodes nil) 11 | (fsa (make-fsa :start-node (make-empty-node 0))) 12 | final) 13 | 14 | (defun ancestror-transition (iadfa node input final) 15 | (declare (iadfa iadfa)) 16 | (let ((ancestrors (aref (iadfa-ancestrors iadfa) (node-label node)))) 17 | (if (not ancestrors) 18 | nil 19 | (reduce #'(lambda (result node) 20 | (if (eq (node-final node) final) 21 | node 22 | result)) 23 | (gethash input ancestrors '()) 24 | :initial-value nil)))) 25 | 26 | (defun node-add-ancestror! (iadfa dst-node input src-node) 27 | (let ((ancestrors (aref (iadfa-ancestrors iadfa) (node-label dst-node)))) 28 | (if (not ancestrors) 29 | (progn 30 | (setf ancestrors (make-hash-table)) 31 | (setf (aref (iadfa-ancestrors iadfa) (node-label dst-node)) ancestrors))) 32 | (incf (aref (iadfa-parent-arities iadfa) (node-label dst-node))) 33 | (hash-table-update! input ancestrors nodes 34 | (cons src-node nodes)))) 35 | 36 | (defun node-remove-ancestror! (iadfa dst-node input src-node) 37 | (let ((ancestrors (aref (iadfa-ancestrors iadfa) (node-label dst-node)))) 38 | (if ancestrors 39 | (hash-table-update! input ancestrors nodes 40 | (remove src-node nodes))))) 41 | 42 | (defun get-fresh-node (iadfa) 43 | (if (null (iadfa-unused-nodes iadfa)) 44 | (progn 45 | (let ((new-label (generate-state iadfa))) 46 | (when (>= new-label (length (iadfa-ancestrors iadfa))) 47 | (vector-push 0 (iadfa-parent-arities iadfa)) 48 | (vector-push (make-hash-table) (iadfa-ancestrors iadfa))) 49 | (make-empty-node new-label))) 50 | (let* ((unused-nodes (iadfa-unused-nodes iadfa)) 51 | (new-node (car unused-nodes))) 52 | (setf (iadfa-unused-nodes iadfa) (cdr unused-nodes)) 53 | new-node))) 54 | 55 | (defun remove-ancestror-to-childs (iadfa node) 56 | (node-walk node #'(lambda (input destination-nodes) 57 | (dolist (dst-node destination-nodes iadfa) 58 | (node-remove-ancestror! iadfa dst-node input node))))) 59 | 60 | (defun reclaim-branch (iadfa node node-end) 61 | (declare (ignore node-end)) 62 | ;(node-reset node-end) 63 | (do ((i (node-label node) (+ i 1))) 64 | ((>= i (iadfa-index iadfa))) 65 | (setf (aref (iadfa-parent-arities iadfa) i) 0) 66 | (clrhash (aref (iadfa-ancestrors iadfa) i)))) 67 | 68 | ;; (do () 69 | ;; ((null nodes-to-clean)) 70 | ;; (let ((node-to-clean (pop nodes-to-clean))) 71 | ;; (setf (aref (iadfa-ancestrors iadfa) (node-label node-to-clean)) nil) 72 | ;; (setf nodes-to-clean (append nodes-to-clean 73 | ;; (node-destinations node-to-clean))) 74 | ;; (node-reset node-to-clean) 75 | ;; (setf (iadfa-unused-nodes iadfa) 76 | ;; (append (iadfa-unused-nodes iadfa) (list node-to-clean))))))) 77 | 78 | 79 | (defun remove-last-nodes (iadfa node node-end) 80 | (reclaim-branch iadfa node node-end) 81 | (setf (iadfa-index iadfa) (node-label node))) 82 | 83 | (defun delete-branch (iadfa stem-start-node stem-start-input stem-end-node) 84 | (decf (aref (iadfa-parent-arities iadfa) (node-label (car (node-destinations stem-end-node))))) 85 | (remove-ancestror-to-childs iadfa stem-end-node) 86 | (when (not (eq stem-start-node stem-end-node)) 87 | (let ((old-node (car (node-transition stem-start-node stem-start-input)))) 88 | (remove-last-nodes iadfa old-node stem-end-node))) 89 | (node-remove-dsts-for-input! stem-start-node stem-start-input)) 90 | 91 | 92 | (defun build-fsa-from-ancestrors (iadfa) 93 | (let ((fsa (make-fsa-builder))) 94 | (vector-walk (label node-ancestrors (iadfa-ancestrors iadfa)) 95 | (if node-ancestrors 96 | (maphash #'(lambda (input nodes) 97 | (dolist (node nodes nil) 98 | (fsa-add-edge! fsa label input (node-label node)))) 99 | node-ancestrors))) 100 | fsa)) 101 | 102 | (defun iadfa-state-ancestrors-for-input (iadfa dst-label input) 103 | (let ((ancestrors (aref (iadfa-ancestrors iadfa) dst-label))) 104 | (if ancestrors 105 | (mapcar #'(lambda (node) 106 | (node-label node)) 107 | (gethash input ancestrors)) 108 | '()))) 109 | 110 | (defun node-ancestrors (iadfa node) 111 | (aref (iadfa-parent-arities iadfa) (node-label node))) 112 | 113 | (defun node-ancestrors-for-input (iadfa dst-node input) 114 | (iadfa-state-ancestrors-for-input iadfa (node-label dst-node) input)) 115 | 116 | (defun generate-state (iadfa) 117 | (let ((name (iadfa-index iadfa))) 118 | (setf (iadfa-index iadfa) (+ 1 (iadfa-index iadfa))) 119 | name)) 120 | 121 | (defun build-iadfa () 122 | (let ((iadfa (make-iadfa))) 123 | (setf (iadfa-fsa iadfa) (make-fsa :start-node (get-fresh-node iadfa))) 124 | (setf (iadfa-final iadfa) (get-fresh-node iadfa)) 125 | (setf (node-final (iadfa-final iadfa)) t) 126 | iadfa)) 127 | 128 | (defun common-prefix (iadfa word node) 129 | (let ((stem '()) 130 | (prefix-stem '()) 131 | (stem-start-node node) 132 | (stem-start-input (car word)) 133 | (stem-end-node nil) 134 | (profile '()) 135 | (found-stem '())) 136 | (labels ((c-prefix (word node prefix) 137 | (if (not found-stem) 138 | (if (< 1 (node-arity node)) 139 | (progn 140 | (setf stem-start-node node) 141 | (setf stem-start-input (car word)) 142 | (setf stem '()) 143 | (setf prefix-stem '()) 144 | (setf profile '())))) 145 | (if (eq (iadfa-final iadfa) node) 146 | (progn 147 | (delete-branch iadfa stem-start-node stem-start-input stem-end-node) 148 | (values stem-start-node (append stem word) (append profile (make-list (- (length word) 1) :initial-element nil)))) 149 | (let ((next-node (node-transition node (car word)))) 150 | (if (null next-node) 151 | (progn 152 | (if found-stem 153 | (let ((prefix-symbol (car (node-symbols node))) 154 | (prefix-node (car (node-destinations node)))) 155 | ;; we are in a suffix of a subsumed stem 156 | ;; the node should have only one destination. 157 | (delete-branch iadfa stem-start-node stem-start-input stem-end-node) 158 | (values stem-start-node 159 | (append stem word) 160 | (append profile (make-list (- (length word) 1) :initial-element nil)) 161 | (append stem (list prefix-symbol)) 162 | prefix-node)) 163 | (values node word (make-list (length word) :initial-element nil)))) 164 | (progn (setf next-node (car next-node)) 165 | (setf stem (append stem (list (car word)))) 166 | (setf profile (append profile (list (node-final next-node)))) 167 | (when (not found-stem) 168 | (setf prefix-stem (append prefix-stem (list (car word)))) 169 | (setf stem-end-node node) 170 | (when (< 1 (node-ancestrors iadfa next-node)) 171 | (setf found-stem t))) 172 | (c-prefix (cdr word) 173 | next-node 174 | (append prefix 175 | (list (car word)))))))))) 176 | (c-prefix word node '())))) 177 | 178 | 179 | 180 | 181 | (defun c-suffix (iadfa current-suffix node prefix-node profile sub-stem) 182 | (if (or (= 1 (length current-suffix)) 183 | (= 1 (length profile)) 184 | ;(= 1 (length sub-stem)) 185 | (= (length sub-stem) (length current-suffix))) 186 | (values node (reverse current-suffix) (reverse profile)) 187 | (let ((next-node (ancestror-transition iadfa node (car current-suffix) (car profile)))) 188 | ;; (if (equal '(#\0 #\- #\7) current-suffix) 189 | ;; (break)) 190 | (if (or (not next-node) 191 | (eq next-node prefix-node) 192 | (eq next-node (fsa-start-node (iadfa-fsa iadfa)))) 193 | (values node (reverse current-suffix) (reverse profile)) 194 | (c-suffix iadfa 195 | (cdr current-suffix) 196 | next-node 197 | prefix-node 198 | (cdr profile) 199 | sub-stem))))) 200 | 201 | 202 | (defun common-suffix (iadfa current-suffix node prefix-node profile sub-stem) 203 | ;; this function takes a suffix to be consumed 204 | ;; and a node to start from and the current stem 205 | (c-suffix iadfa (reverse current-suffix) node prefix-node (reverse profile) sub-stem)) 206 | 207 | (defun iadfa-add-edge! (iadfa src-node input dst-node) 208 | (node-add-edge! src-node input dst-node) 209 | (node-add-ancestror! iadfa dst-node input src-node)) 210 | 211 | (defun add-stem (iadfa prefix-node suffix-node current-stem profile sub-prefix sub-node) 212 | (let ((last-node prefix-node) 213 | (last-input (car (last current-stem))) 214 | (processing-stem (butlast current-stem)) 215 | (sub-prefix sub-prefix)) 216 | (reduce #'(lambda (iadfa input) 217 | (let ((new-node (get-fresh-node iadfa))) 218 | (setf (node-final new-node) (car profile)) 219 | (setf profile (cdr profile)) 220 | (iadfa-add-edge! iadfa last-node input new-node) 221 | (when sub-prefix 222 | (when (= 1 (length sub-prefix)) 223 | (iadfa-add-edge! iadfa last-node (car sub-prefix) sub-node) 224 | (remove-ancestror-to-childs iadfa last-node)) 225 | (setf sub-prefix (cdr sub-prefix))) 226 | (setf last-node new-node) 227 | iadfa)) 228 | processing-stem 229 | :initial-value iadfa) 230 | (iadfa-add-edge! iadfa last-node last-input suffix-node) 231 | (when (= 1 (length sub-prefix)) 232 | (iadfa-add-edge! iadfa last-node (car sub-prefix) sub-node) 233 | (remove-ancestror-to-childs iadfa last-node)) 234 | iadfa)) 235 | 236 | 237 | (defun handle-word (iadfa word) 238 | (let* ((fsa (iadfa-fsa iadfa))) 239 | (multiple-value-bind (prefix-node current-suffix profile sub-prefix sub-node) (common-prefix iadfa word (fsa-start-node fsa)) 240 | (multiple-value-bind (suffix-node current-stem current-profile) 241 | (common-suffix iadfa current-suffix (iadfa-final iadfa) prefix-node profile sub-prefix) 242 | (add-stem iadfa prefix-node suffix-node current-stem current-profile sub-prefix sub-node) 243 | (if (> (node-arity prefix-node) 1) 244 | (remove-ancestror-to-childs iadfa prefix-node))) 245 | iadfa))) 246 | 247 | (defun gen-iadfa (words) 248 | (reduce #'(lambda (iadfa word) 249 | (handle-word iadfa (concatenate 'list word))) 250 | words 251 | :initial-value (build-iadfa))) 252 | 253 | (defun debug-gen-iadfa (words) 254 | (let ((index 0)) 255 | (reduce #'(lambda (iadfa word) 256 | (handle-word iadfa (concatenate 'list word)) 257 | (graphviz-export-to-file (make-fsa-builder-from-fsa (iadfa-fsa iadfa)) 258 | (concatenate 'string "output/iadfa" 259 | (format nil "~A" index) ".dot")) 260 | (graphviz-export-to-file (build-fsa-from-ancestrors iadfa) 261 | (concatenate 'string "output/iadfa-ances" 262 | (format nil "~A" index) ".dot")) 263 | (setf index (+ index 1)) 264 | iadfa) 265 | words 266 | :initial-value (build-iadfa)))) 267 | 268 | (defun gen-iadfa-from-file (file &key dump) 269 | (let ((iadfa (build-iadfa)) 270 | (index 0) 271 | (last-time (get-internal-real-time)) 272 | (nb-per-hours 0) 273 | (nb-hours-for-all 0)) 274 | (for-each-line-in-file (line file) 275 | (handle-word iadfa (concatenate 'list line)) 276 | (when (member index dump) 277 | (graphviz-export-to-file (make-fsa-builder-from-fsa (iadfa-fsa iadfa)) (concatenate 'string "output/iadfa" (format nil "~A" index) ".dot")) 278 | (graphviz-export-to-file (build-fsa-from-ancestrors iadfa) (concatenate 'string "output/iadfa-ances" (format nil "~A" index) ".dot"))) 279 | (incf index) 280 | (if (zerop (mod index 1000)) 281 | (let ((current-time (get-internal-real-time))) 282 | (setf nb-per-hours (float (* 1000 (/ 1 (/ (- current-time last-time) 283 | internal-time-units-per-second)) 60 60))) 284 | (setf nb-hours-for-all (float (/ (* 65000 (/ (- current-time last-time) 285 | internal-time-units-per-second)) 60 60))) 286 | (setf last-time current-time) 287 | (format t "~2,12$ w/h ~2,2$ Hours ~A ~A ~%" nb-per-hours nb-hours-for-all index line))) 288 | iadfa) 289 | iadfa)) 290 | 291 | 292 | (defun debug-gen-iadfa-from-file (file) 293 | (let ((iadfa (build-iadfa)) 294 | (index 0) 295 | (last-time (get-internal-real-time)) 296 | (nb-per-hours 0) 297 | (nb-hours-for-all 0)) 298 | (for-each-line-in-file (line file) 299 | (format t "~,2F w/h ~,2F Hours ~A ~A ~%" nb-per-hours nb-hours-for-all index line) 300 | (handle-word iadfa (concatenate 'list line)) 301 | (graphviz-export-to-file (make-fsa-builder-from-fsa (iadfa-fsa iadfa)) (concatenate 'string "output/iadfa" (format nil "~A-~A" index line) ".dot")) 302 | (graphviz-export-to-file (build-fsa-from-ancestrors iadfa) (concatenate 'string "output/iadfa-ances" (format nil "~A" index) ".dot")) 303 | (incf index) 304 | (if (zerop (mod index 1000)) 305 | (let ((current-time (get-internal-real-time))) 306 | (setf nb-per-hours (float (* 1000 (/ 1 (/ (- current-time last-time) internal-time-units-per-second)) 60 60))) 307 | (setf nb-hours-for-all (float (/ (* 65000 (/ (- current-time last-time) internal-time-units-per-second)) 60 60))) 308 | (setf last-time current-time))) 309 | iadfa) 310 | iadfa)) 311 | 312 | 313 | (defmacro test-equivalence (words) 314 | (with-syms (w iadfa output) 315 | `(let* ((,w ,words) 316 | (,iadfa (debug-gen-iadfa ,w)) 317 | (,output nil)) 318 | (setf ,output (extract-words (iadfa-fsa ,iadfa))) 319 | (format t "input:~%~S~%output:~%~S~%" ,w ,output) 320 | (equal ,w ,output)))) 321 | 322 | 323 | (defun detect-problems (words) 324 | (let ((iadfa (build-iadfa)) 325 | (words-to-be-checked nil)) 326 | (dolist (word words) 327 | (setf words-to-be-checked (nconc words-to-be-checked (list word))) 328 | (handle-word iadfa (concatenate 'list word)) 329 | (when (not (equal words-to-be-checked 330 | (extract-words (iadfa-fsa iadfa)))) 331 | (return))) 332 | ;; We got the first entry that trigger the problem. 333 | ;; we need now to see which entry is needed to start 334 | ;; the problem 335 | words-to-be-checked)) 336 | 337 | 338 | (defun detect-first-starting-problematic-word (words-to-be-checked) 339 | (let ((wtbc (cdr words-to-be-checked)) 340 | (last-word (car words-to-be-checked))) 341 | (do ((iadfa (gen-iadfa wtbc) (gen-iadfa wtbc))) 342 | ((null wtbc)) 343 | (if (equal wtbc 344 | (extract-words (iadfa-fsa iadfa))) 345 | (return (cons last-word wtbc))) 346 | (setf last-word (car wtbc)) 347 | (setf wtbc (cdr wtbc))))) 348 | 349 | (defun filter-non-problematic-words (words-to-be-checked) 350 | (let ((problematics-words (list (car words-to-be-checked))) 351 | (last-word (cadr words-to-be-checked)) 352 | (words-to-be-checked (cddr words-to-be-checked))) 353 | (do ((iadfa (gen-iadfa (append problematics-words words-to-be-checked)) 354 | (gen-iadfa (append problematics-words words-to-be-checked)))) 355 | ((null words-to-be-checked)) 356 | (if (equal (append problematics-words words-to-be-checked) 357 | (extract-words (iadfa-fsa iadfa))) 358 | (setf problematics-words (nconc problematics-words (list last-word)))) 359 | (setf last-word (car words-to-be-checked)) 360 | (setf words-to-be-checked (cdr words-to-be-checked))) 361 | (setf problematics-words (nconc problematics-words (list last-word))) 362 | problematics-words)) 363 | 364 | 365 | (defun detect-problems-from-file (filename) 366 | (let ((words-to-be-checked nil)) 367 | (let ((iadfa (build-iadfa))) 368 | (for-each-line-in-file (word filename) 369 | (setf words-to-be-checked (nconc words-to-be-checked (list word))) 370 | (format t "Processing word [~A].~%" word) 371 | (handle-word iadfa (concatenate 'list word)) 372 | (when (not (equal words-to-be-checked 373 | (extract-words (iadfa-fsa iadfa)))) 374 | (format t "Word [~A] triggered a problem.~%" word) 375 | (return)) 376 | nil)) 377 | ;; We got the first entry that trigger the problem. 378 | ;; we need now to see which entry is needed to start 379 | ;; the problem 380 | (setf words-to-be-checked 381 | (detect-first-starting-problematic-word words-to-be-checked)) 382 | (setf words-to-be-checked 383 | (filter-non-problematic-words words-to-be-checked)) 384 | words-to-be-checked)) 385 | 386 | 387 | (defun print-stats (iadfa) 388 | (format t 389 | "ancestrors length: ~A~%parent-arities length: ~A~%" 390 | (length (iadfa-ancestrors iadfa)) 391 | (length (iadfa-parent-arities iadfa)))) 392 | 393 | 394 | 395 | 396 | ;; (defun dump-words (iadfa) 397 | ;; (let ((fsa (iadfa-fsa)) 398 | ;; (states (list (cons "" (fsa-start-node start))))) 399 | ;; states)) 400 | 401 | --------------------------------------------------------------------------------