;************************************************************************ ; Non-linear matching and efficient (cyclic) unification ; on ordered trees (terms) ; ; One (in case of matching) or both (for unification) trees may ; contain variables, whose names begin with an underscore (see the ; pattern-var? predicate). A variable whose name is a lone underscore ; _ is an anonymous, wildcard variable, like the one in Prolog. ; ; The result of matching or unification is a set of bindings of ; pattern variables to terms. We call this set 'environment' (env). ; In the term re-writing literature such a set of bindings is usually ; called 'substitutions'. Procedures 'match-tree' and 'unify-trees' ; accept the initial environment as an argument and return the ; resulting environment -- the substitutions to make both trees ; identical. The initial environment argument lets the user pass the ; existing substitutions so the user can match or unify a set of terms ; incrementally. Often though the initial environment is '(). If the ; match or unification fail, they return #f. ; ; Besides match-tree and unify-trees procedures, this file defines ; procedures that apply substitutions to terms and ancillary functions. ; ; IMPORT ; A prelude appropriate for your Scheme system ; (myenv-bigloo.scm, myenv-mit.scm, etc.) ; ; $Id: tree-unif.scm,v 3.0 2003/07/04 23:03:18 oleg Exp oleg $ (define (pattern-var? x) (and (symbol? x) (char=? #\_ (string-ref (symbol->string x) 0)))) ; The following macro runs built-in test cases -- or does not run, ; depending on which of the two lines below you commented out (define-macro (run-test . body) `(begin (display "\n-->Test\n") ,@body)) ;(define-macro (run-test . body) '(begin #f)) (cond-expand (gambit ; (time ...) is present in Gambit #f) (else (define-macro (time form) `(pp ,form))) ) ; A non-linear match of two ordered trees, one of which may contain variables ; match-tree TREE1 TREE2 ENV -> ENV ; ; TREE1 may contain variables (as decided by the pattern-var? ; predicate above). TREE1 may contain several occurrences of the same ; variable. All these occurrences must match the same value. A ; variable match is entered into the environment. A variable _ is an ; exception: its match is never entered into the environment. The ; function returns the resulting environment or #f if the match fails. (define (match-tree tree1 tree2 env) (cond ((eq? tree1 tree2) env) ; terms are identical ((pair? tree1) ; Recursively match pairs (and-let* ( ((pair? tree2)) (env-new (match-tree (car tree1) (car tree2) env))) (match-tree (cdr tree1) (cdr tree2) env-new))) ((null? tree1) #f) ; if tree2 had been '(), see 1st cond ((eq? '_ tree1) env) ; _ matches everything ((pattern-var? tree1) (cond ((assq tree1 env) => (lambda (prev-binding) ; variable occurred before (if (eq? (cdr prev-binding) tree2) env ; try eq? first, it's faster (and (equal? (cdr prev-binding) tree2) env)))) (else (cons (cons tree1 tree2) env)) ; new variable, enter fresh binding )) (else (and (equal? tree1 tree2) env)))) (run-test (let ((test (lambda (tree1 tree2 expected) (assert (equal? expected (match-tree tree1 tree2 '())))))) (test '_x '(seq 1 (seq-empty)) '((_x seq 1 (seq-empty)))) (test '(seq-empty) '(seq 1 (seq-empty)) #f) (test '(seq 1 (seq-empty)) '(seq 1 (seq-empty)) '()) (test '(seq _x (seq-empty)) '(seq 1 (seq-empty)) '((_x . 1))) (test '(seq _x _y) '(seq 1 (seq-empty)) '((_y seq-empty) (_x . 1))) (test '(seq _x _y _z) '(seq 1 (seq-empty)) #f) (test '(seq _x (seq _y _z)) '(seq 1 (seq 2 (seq-empty))) '((_z seq-empty) (_y . 2) (_x . 1))) (test '(seq _x (seq _x _z)) '(seq 1 (seq 2 (seq-empty))) #f) (test '(seq _x (seq _x _z)) '(seq 1 (seq 1 (seq-empty))) '((_z seq-empty) (_x . 1))) )) ; A match/unification of two ordered trees, both of which may contain ; variables. The unifier is capable of finding cyclic substitutions. ; unify-trees TREE1 TREE2 ENV -> ENV ; ; The trees may contain several occurrences of the same variable. All ; these occurrences must match the same value. A variable match is ; entered into the environment. A variable _ is an exception: its ; match is never entered into the environment. ; The function unify-trees returns the resulting environment or #f if the ; unification fails. ; ; The semantics of unification: ; if A and B are two terms and (unify-trees A B init-env) succeeds and ; yields E, which is _acyclic_, then ; (match-tree (substitute-vars-recursively A E) ; (substitute-vars-recursively B E) ; '()) ; succeeds. ; ; The code isn't too dumb: it does not suffer from the exponential ; blow-up in time and in space, as is the case with the conventional ; unification algorithm (recursive descent with the composition of ; substitutions) due to Robinson, 1965. In fact, the algorithm below ; is linear in space: the size of the 'env' (the substitutions) is ; proportional to the size of the input trees. ; ; Often unify-trees function should be followed by normalize-subst ; below. Splitting the unification into two phases is for efficiency ; reasons: If we are interested only in the fact if two terms unify, ; we don't need to run normalize-subst. (define (genvar) (string->symbol (string-append "_" (symbol->string (gensym))))) (define (unify-trees tree1 tree2 env) (cond ((eq? tree1 tree2) env) ; especially helps if both tree1 & tree2 bound ((eq? '_ tree1) env) ; _ matches anything ((eq? '_ tree2) env) ; _ matches anything ((pattern-var? tree1) (cond ((assq tree1 env) => ; tree1 is a bound var (lambda (prev-binding) ;(cout "prev binding: " tree1 "," tree2 "," env nl) (unify-trees (cdr prev-binding) tree2 env))) ; tree1 is a free variable ((and (pattern-var? tree2) (assq tree2 env)) => ; tree2 is a bound var ; binding a free variable to a bound. Search for a substantial binding ; or a loop. If we find a loop tree1->tree2->...->tree1 ; then we do not enter the binding to tree1, because tree1 is not ; actually constrained. ; The other benefit of the loop below is that it prevents ; chains of bindings a->b, b->c, c->d ... d->nonvar ; We straighten out such chains. cf. union-find algorithm. (lambda (tree2-binding) (let loop ((binding tree2-binding)) (cond ((eq? tree1 (cdr binding)) env) ; loop: no binding needed ((and (pattern-var? (cdr binding)) (assq (cdr binding) env)) => loop) (else (cons (cons tree1 (cdr binding)) env)))))) ((pattern-var? tree2) ; tree2 is a free variable, tree2 /= tree1 (cons (cons tree1 tree2) env)) ((pair? tree2) ; tree1 is free, tree2 is a tree (and-let* ((car-var (genvar)) (cdr-var (if (null? (cdr tree2)) '() (genvar))) (car-env (unify-trees car-var (car tree2) (cons (cons tree1 (cons car-var cdr-var)) env)))) (unify-trees cdr-var (cdr tree2) car-env))) (else ; tree1 is free, tree2 is atomic (cons (cons tree1 tree2) env)))) ((pattern-var? tree2) (unify-trees tree2 tree1 env)) ((null? tree1) #f) ; tree2 is not a var, is not eq? tree1 ((pair? tree1) (and-let* (((pair? tree2)) (car-env (unify-trees (car tree1) (car tree2) env))) (unify-trees (cdr tree1) (cdr tree2) car-env))) (else (and (equal? tree1 tree2) env)))) (run-test (display "testing tree unification") (newline) (let ((test (lambda (tree1 tree2 expected) (display "unifying ") (display tree1) (display " and ") (display tree2) (let ((result (unify-trees tree1 tree2 '()))) (display " =>") (pp result) (newline) (assert (equal? expected result)))))) (test '(M abc) '(M abc) '()) (test '(M abc) '(M abcd) #f) (test '(M abc) '(M _ _) #f) (test '(M abc) '(M _) '()) (test '(M _ a _ ) '(M a _ b) '()) (test '(M _a _b) '(M _b _a) '((_a . _b))) (test '(M _a abc) '(M _b _a) '((_b . abc) (_a . _b))) (pp (unify-trees '_b '(seq _a) '())) (pp (unify-trees '(M (seq _a)) '(M _a) '())) (pp (unify-trees '(M (seq _a)) '(M _b) '())) (pp (unify-trees '(M (seq _a) _a) '(M _b _c) '())) (pp (unify-trees '(M (seq _a) _a) '(M _b (seq _b)) '())) (test '(M (seq _a) _a) '(M (seq _b) _b) '((_a . _b))) ;(test '_x '(seq 1 (seq-empty)) '((_x seq 1 (seq-empty)))) ;(unify-trees '_x '(seq 1 (seq-empty)) '()) (test '(seq-empty) '(seq 1 (seq-empty)) #f) (test '(seq 1 (seq-empty)) '(seq 1 (seq-empty)) '()) (test '(seq _x (seq-empty)) '(seq 1 (seq-empty)) '((_x . 1))) ;(test '(seq _x _y) '(seq 1 (seq-empty)) '((_y seq-empty) (_x . 1))) (test '(seq _x _y _z) '(seq 1 (seq-empty)) #f) ;(test '(seq _x (seq _y _z)) '(seq 1 (seq 2 (seq-empty))) ; '((_z seq-empty) (_y . 2) (_x . 1))) (test '(seq _x (seq _x _z)) '(seq 1 (seq 2 (seq-empty))) #f) ;(test '(seq _x (seq _x _z)) '(seq 1 (seq 1 (seq-empty))) ; '((_z seq-empty) (_x . 1))) (pp (unify-trees '(seq _x (seq _x _z)) '(seq 1 (seq 1 (seq-empty))) '())) ; A few tests from Dan Friedman's "Poor-man's Logic system tutorial" (test '(_x _x) '(3 4) #f) (test '(_x 4) '(3 _x) #f) (test '(_x _y) '(3 4) '((_y . 4) (_x . 3))) (test '(_x 4) '(3 _y) '((_y . 4) (_x . 3))) (test '(_x 4) '(_y _y) '((_y . 4) (_x . _y))) (test '(_x 4 3) '(_y _y _x) #f) (test '(_x 4 3 _w) '(3 _y _x _z) '((_w . _z) (_y . 4) (_x . 3))) ;(test '(p _x _x) '(p _y (f _y)) #f) ; it unifies! (pp (unify-trees '(p (f a) (g _x)) '(p _x _y) '())) (pp (unify-trees '(p (g _x) (f a)) '(p _y _x) '())) ; Tests for a bad unification from Franz Baader, Wayne Snyder, ; "Unification theory" (1999). ; The naive algorithm exponentially explodes on the following (display "Testing pathological, exponential cases") (newline) (time (pp (unify-trees '(h _x1 (f _y0 _y0) _y1) '(h (f _x0 _x0) _y1 _x1) '()))) (time (pp (unify-trees '(h _x1 _x2 (f _y0 _y0) (f _y1 _y1) _y2) '(h (f _x0 _x0)(f _x1 _x1) _y1 _y2 _x2) '()))) (time (pp (unify-trees '(h _x1 _x2 _x3 _x4 (f _y0 _y0) (f _y1 _y1) (f _y2 _y2) (f _y3 _y3) _y4) '(h (f _x0 _x0)(f _x1 _x1)(f _x2 _x2)(f _x3 _x3) _y1 _y2 _y3 _y4 _x4) '()))) )) ; Determine if 'pred' holds for every element of the list (define (every? pred lst) (or (null? lst) (and (pred (car lst)) (every? pred (cdr lst))))) ; apply a leaf-transformer to every leaf of the tree and return the result. ; If the leaf-transformer returns #f, the corresponding leaf is ; removed from the resulting tree ; See term-variables function below (define (tree-traverse leaf-transformer tree) (cond ((null? tree) tree) ((not (pair? tree)) (leaf-transformer tree)) (else (let ((new-car (tree-traverse leaf-transformer (car tree))) (new-cdr (or (tree-traverse leaf-transformer (cdr tree)) '()))) (if (and new-car (not (null? new-car))) (cons new-car new-cdr) new-cdr))))) ; Find all the vars in term and substitute them with values given ; the env. It's an error if a term contains an unbound var (define (substitute-vars term env) (tree-traverse (lambda (leaf) (cond ((not (pattern-var? leaf)) leaf) ((assq leaf env) => cdr) (else (error "undefined pattern var " term " in env " env)))) term)) ; The same as above but accept an open term (that is, a term ; with variables that are not bound in env). Leave the free variables ; as they are. (define (substitute-vars-open term env) (tree-traverse (lambda (leaf) (cond ((not (pattern-var? leaf)) leaf) ((assq leaf env) => cdr) (else leaf))) term)) (run-test (assert (equal? 1 (substitute-vars '_x '((_x . 1))))) (assert (equal? '(a 1 (b (1 2) 1)) (substitute-vars '(a _x (b _y _x)) '((_x . 1) (_y 1 2))))) (assert (equal? 1 (substitute-vars-open '_x '((_x . 1))))) (assert (equal? '(a 1 (b (1 2) 1)) (substitute-vars-open '(a _x (b _y _x)) '((_x . 1) (_y 1 2))))) (assert (equal? '(a 1 (b (1 2) 1 _z)) (substitute-vars-open '(a _x (b _y _x _z)) '((_x . 1) (_y 1 2))))) ) ; The following is a naive version of substitute-vars-recursively. ; That version is commented out because it has bad space and time complexity. ; For example, ; (display "normalization of substitution:") (newline) ; (let ; ((normalize-subst ; (lambda (subst) ; (map (lambda (binding) ; (cons (car binding) ; (substitute-vars-recursively (car binding) subst))) ; subst)))) ; ; Gives (using Gambit-C 3.0 on Pentium IV 2GHz): ; (time (pp (normalize-subst ; (unify-trees '(h _x1 (f _y0 _y0) _y1) ; '(h (f _x0 _x0) _y1 _x1) '())))) ; 6 ms real time ; 2 ms cpu time (2 user, 0 system) ; no collections ; 55120 bytes allocated ; (time (pp (normalize-subst ; (unify-trees '(h _x1 _x2 (f _y0 _y0) (f _y1 _y1) _y2) ; '(h (f _x0 _x0) (f _x1 _x1) _y1 _y2 _x2) '())))) ; 7 ms real time ; 7 ms cpu time (7 user, 0 system) ; 1 collection accounting for 1 ms cpu time (1 user, 0 system) ; 227816 bytes allocated ; (time (pp (normalize-subst ; (unify-trees ; '(h _x1 _x2 _x3 _x4 (f _y0 _y0) (f _y1 _y1) (f _y2 _y2) (f _y3 _y3) _y4) ; '(h (f _x0 _x0) (f _x1 _x1) (f _x2 _x2) (f _x3 _x3) _y1 _y2 _y3 _y4 _x4) ; '())))) ; 79 ms real time ; 60 ms cpu time (60 user, 0 system) ; 5 collections accounting for 5 ms cpu time (5 user, 0 system) ; 2045824 bytes allocated ; ; ; Substitute vars in term recursively, while avoiding loops ; ; If the variable is not found in the current env, leave it alone ; (define (substitute-vars-recursively term env) ; (define (remq el lst) ; (cond ; ((null? lst) lst) ; ((eq? el (car lst)) (cdr lst)) ; (else (cons (car lst) (remq el (cdr lst)))))) ; (define (resolve-var var) ; (cond ; ((not (pattern-var? var)) var) ; ((assq var env) => ; (lambda (binding) ; (substitute-vars-recursively (cdr binding) (remq binding env)))) ; (else var))) ; ;(cerr "binding " term ", env " env nl) ; (tree-traverse resolve-var term)) ; Normalize substitutions: ; normalize-subst ENVI ACCEPT-CYCLIC? -> (ENVO | #f) ; ; ENVI is a set of bindings as returned by unify-trees. Each binding in ; ENVI should have the form ; var->var ; var->atom ; var->(cons var-or-atom var-or-atom) ; ; ENVO is, informally speaking, a transitive closure of ENVI. If ; ENVI is acyclic, then ENVO is the fixpoint of composing ENVI with ; itself. To be more precise, ENVO is a set of bindings. It contains ; as many bindings as ENVI. Each binding of ENVO has the form ; var -> term ; If ENVI is acyclic, then term is either variable-free or contains ; only the variables that were not bound in ENVI. If ENVI is cyclic, ; then term may contain variables that are bound in ENVI (and ENVO). ; If ENVI is cyclic, it may contain bindings that are: ; - simply cyclic: the result of unifying (f _y) and _y ; - simply mutually recursive: unifying (p _y _a) and (p (f _a) (g _y)) ; - complex mutually recursive: unifying ; (p _y _a) and (p (f _y _a) (g _y _a)) ; I don't know how deeply I should unroll mutually-recursive ; bindings. At present, normalize-subst does not unroll them at all. ; SWI Prolog seems to unroll cyclic substitutions a few steps when ; printing them out. Sometimes this approach works out, and sometimes ; it doesn't: ; [user]. ; eq(X,X). ; ; ?- eq(Y,f(Y)). ; Y = f(f(f(f(f(f(f(f(f(f(...)))))))))) % Looks good ; ; ?- eq(p(Y,A),p(f(A),g(Y))). ; Y = f(g(f(g(f(g(f(g(f(g(...)))))))))) % Still looks good ; A = g(f(g(f(g(f(g(f(g(f(...)))))))))) ; ; ?- eq(p(Y,A),p(f(Y,A),g(Y,A))). ; ...long-and-unwieldy output... ; ; If the flag ACCEPT-CYCLIC? is #f and ENVI turns out to be cyclic, we ; return #f. If ACCEPT-CYCLIC? is #t, we always return the ; environment, which may contain recursive and mutually recursive ; bindings. Setting the flag ACCEPT-CYCLIC? to #f essentially enables ; the occurs-check (see below). ; ; A careful reader might note that our unification algorithm does not ; have an explicit occurs-check. Many Prolog systems in fact omit the ; occurs check altogether for performance reasons: see SICTus Prolog ; Manual, p. 50. [http://citeseer.nj.nec.com/laboratory01sicstus.html] ; The manual states that unifying without the occurs-check is wrong ; but practical. In our algorithm however, cyclic and acyclic ; unifications have exactly the same time and space complexities. The ; occurs-check (passing #f as ACCEPT-CYCLIC?) does _not_ impose any ; penalty at all. ; ; Algorithm: breadth-first iterative composition of ENVI with itself ; until the fixpoint is reached or a set of mutually-recursive ; bindings is established. The tests below show that the algorithm has ; a very nice time and space complexities, even in pathological cases. ; The nice complexities stem from the fact that we avoid repeated ; traversals and rebuilding of terms searching for variables to ; substitute, as we did in the naive case. ; ; Because we store bindings in an associative list and use assq to ; locate a binding, we cannot generally expect a linear time ; complexity (although we do achieve it in many, including the ; pathological, cases). Storing bindings in a hash table will decrease ; the time complexity. ; ; It seems that our algorithm is somewhat similar to Gerard Huet's ; linear unification algorithm. The latter has quite different intuition: ; re-writing of equations into a canonical form. In a sense, ; our unify-trees generates equations and normalize-subst strongly ; normalizes them, in O(n) iterations. (define (normalize-subst env accept-cyclic?) ; Partition the env into ground bindings and composite bindings. ; init-ground-terms ENVI -> GROUND-ENV NONGR-ENV ; where each binding in GROUND-ENV is either ; var->atom or ; var->free_var ; where free_var is a variable that is not bound by ENVI ; Each binding in NONGR-ENV has the form ; var-> (cons var-or-atom var-or-atom) (define (init-ground-terms env) (let loop ((env env) (ground '()) (non-ground '())) (if (null? env) (values ground non-ground) (let* ((binding (car env)) (env (cdr env)) (var (car binding)) (term (cdr binding))) (cond ((pair? term) ; composite binding (loop env ground (cons binding non-ground))) ((pattern-var? term) (cond ((assq term ground) => (lambda (ground-binding) (loop env (cons (cons var (cdr ground-binding)) ground) non-ground))) ; This case isn't common as unify-trees tries to avoid ; bindings of the form var->bound_var ((assq term non-ground) => (lambda (non-ground-binding) (loop env ground (cons (cons var (cdr non-ground-binding)) non-ground)))) ; Reduce the chain var->bound_var. Again, it's uncommon ((assq term env) => (lambda (unk-binding) (loop (cons (cons var (cdr unk-binding)) env) ground non-ground))) ; term is a free pattern-var. It's considered ground (else (loop env (cons binding ground) non-ground)) )) (else ; term is an atom (loop env (cons binding ground) non-ground))))))) ; Perform one step of the environment composition ; keep-resolving GROUND-ENV NONGR-ENV -> GROUND-ENV' NONGR-ENV' CHANGED? ; We compose NONGR-ENV with GROUND-ENV, add the composed bindings to the ; output GROUND-ENV' and return the left-over bindings in NONGR-ENV'. ; A boolean flag CHANGED? tells the success of the composition. ; Each binding of NONGR-ENV has the form ; var-> (cons var-or-atom1 var-or-atom2) ; That binding can be composed with GROUND-ENV iff both var-or-atom1 and ; var-or-atom2 are (i) atoms, (ii) one is an atom and the other is a variable ; bound in GROUND-ENV, (iii) both are variables bound in GROUND-ENV. ; The composed binding becomes a ground binding and moved to GROUND-ENV'. (define (keep-resolving ground non-ground) (let loop ((env non-ground) (ground ground) (non-ground '()) (changed? #f)) (if (null? env) (values ground non-ground changed?) (let* ((binding (car env)) (env (cdr env)) (var (car binding)) (term1 (cadr binding)) (term2 (cddr binding)) (term1-var? (pattern-var? term1)) (term2-var? (pattern-var? term2)) (term1-gb (and term1-var? (assq term1 ground))) (term2-gb (and term2-var? (assq term2 ground)))) (cond ((and term1-gb term2-gb) ; successful composition (loop env (cons (cons var (cons (cdr term1-gb) (cdr term2-gb))) ground) non-ground #t)) ((or (eq? term1-var? (not term1-gb)) ; Could not find one (eq? term2-var? (not term2-gb))) ; ground binding (loop env ground (cons binding non-ground) changed?)) (term1-gb ; term2 is atom, term1 resolves to ground (loop env (cons (cons var (cons (cdr term1-gb) term2)) ground) non-ground #t)) (term2-gb ; term1 is atom, term2 resolves to ground (loop env (cons (cons var (cons term1 (cdr term2-gb))) ground) non-ground #t)) (else ; both term1 and term2 are atoms (loop env (cons binding ground) non-ground #t))))))) ; The fixpoint algorithm (define (iter ground non-ground changed?) (cond ((null? non-ground) ground) (changed? (call-with-values (lambda () (keep-resolving ground non-ground)) iter)) (accept-cyclic? ; potentially we may wish to unroll (append non-ground ground)) ; non-ground for a few steps (else #f))) ; Get the ball rolling... (call-with-values (lambda () (call-with-values (lambda () (init-ground-terms env)) keep-resolving)) iter) ) (run-test (display "Testing the normalization of substitutions:") (newline) (pp (normalize-subst (unify-trees '(p (g _x) (f a)) '(p _y _x) '()) #t)) (assert (equal? '((_a . _b)) (normalize-subst (unify-trees '(M _a _b) '(M _b _a) '()) #t))) (pp (normalize-subst (unify-trees '(p _x _x) '(p _y (f _y)) '()) #t)) (pp (normalize-subst (unify-trees '(p _x _x) '(p _y (f (g _y))) '()) #t)) ; This example occurs in the termination prover ; It requires a cyclic substitution (pp (normalize-subst (unify-trees '(M foo _v) '(M foo (succ _v)) '()) #t)) (display "Testing cyclic: simple, simple/complex mutually-recursive") (newline) (pp (normalize-subst (unify-trees '(f _y) '_y '()) #t)) (pp (normalize-subst (unify-trees '(p _y _a) '(p (f _a) (g _y)) '()) #t)) (pp (normalize-subst (unify-trees '(p _y _a) '(p (f _y _a) (g _y _a)) '()) #t)) (assert (equal? '((_a . _d) (_b . _d) (_c . _d)) (normalize-subst (unify-trees '(f _a _b _c _d) '(f _b _c _d _a) '()) #f))) (assert (equal? '((_a . _d) (_b . _d) (_c . _d)) (normalize-subst (unify-trees '(f _a _b _c _d) '(f _d _a _b _c) '()) #f))) ; Tests for a bad unification from Franz Baader, Wayne Snyder, ; "Unification theory" (1999). ; The naive algorithm exponentially explodes on the following (display "Testing pathological, exponential cases") (newline) (time (pp (normalize-subst (unify-trees '(h _x1 (f _y0 _y0) _y1) '(h (f _x0 _x0) _y1 _x1) '()) #t))) (time (pp (normalize-subst (unify-trees '(h _x1 _x2 (f _y0 _y0) (f _y1 _y1) _y2) '(h (f _x0 _x0)(f _x1 _x1) _y1 _y2 _x2) '()) #t))) (time (pp (normalize-subst (unify-trees '(h _x1 _x2 _x3 _x4 (f _y0 _y0) (f _y1 _y1) (f _y2 _y2) (f _y3 _y3) _y4) '(h (f _x0 _x0)(f _x1 _x1)(f _x2 _x2)(f _x3 _x3) _y1 _y2 _y3 _y4 _x4) '()) #t))) ) ; Timings for the above test (Gambit-C 3.0 on Pentium IV 2GHz): ; Notice the linear increase in time and space requirement as we ; double the length of the terms to unify ; (time (pp (normalize-subst ; (unify-trees '(h _x1 (f _y0 _y0) _y1) ; '(h (f _x0 _x0) _y1 _x1) '()) #t))) ; 1 ms real time ; 1 ms cpu time (1 user, 0 system) ; no collections ; 37456 bytes allocated ; ; (time (pp (normalize-subst ; (unify-trees '(h _x1 _x2 (f _y0 _y0) (f _y1 _y1) _y2) ; '(h (f _x0 _x0) (f _x1 _x1) _y1 _y2 _x2) '()) #t))) ; 14 ms real time ; 4 ms cpu time (4 user, 0 system) ; 1 collection accounting for 1 ms cpu time (1 user, 0 system) ; 96456 bytes allocated ; ; (time (pp (normalize-subst ; (unify-trees ; '(h _x1 _x2 _x3 _x4 (f _y0 _y0) (f _y1 _y1) (f _y2 _y2) (f _y3 _y3) _y4) ; '(h (f _x0 _x0) (f _x1 _x1) (f _x2 _x2) (f _x3 _x3) _y1 _y2 _y3 _y4 _x4) ; '()) #t))) ; 19 ms real time ; 9 ms cpu time (9 user, 0 system) ; no collections ; 294480 bytes allocated ; Substitute vars in term recursively, while avoiding loops ; If the variable is not found in the current env, leave it alone (define (substitute-vars-recursively term env) (substitute-vars-open term (normalize-subst env #t))) (run-test (assert (equal? 1 (substitute-vars-recursively '_x '((_x . 1))))) (assert (equal? '(a 1 (b (1 2) 1)) (substitute-vars-recursively '(a _x (b _y _x)) '((_x . 1) (_y 1 2))))) (assert (equal? '(a _y _y _y) (substitute-vars-recursively '(a _x _x _y) '((_x . _y))))) (assert (equal? '(a 1 1 1) (substitute-vars-recursively '(a _x _x _y) '((_x . _y) (_y . 1))))) (assert (equal? '(a _x _x _x) (substitute-vars-recursively '(a _x _x _y) '((_x . _y) (_y . _x))))) ) (run-test (display "Testing unify and substitute-vars-recursively together") (newline) (let ((test ; unify tree1 and tree2, use the substitution on term ; to produce 'expected' (lambda (tree1 tree2 term expected) (display "unifying ") (display tree1) (display " and ") (display tree2) (let* ((env (unify-trees tree1 tree2 '())) (_ (begin (display " =>") (pp env) (newline))) (subst-term (substitute-vars-recursively term env))) (display " subst term =>") (pp subst-term) (newline) (assert (equal? expected subst-term)))))) (test '(_x 4) '(_y _y) '(_x _y) '(4 4)) (test '(_r (_v (_w _x) 8)) '(_r (_u (abc _u) _x)) '(_u _x _w _v) '(8 8 abc 8)) (test '(p (f a) (g _x)) '(p _x _y) '(_y _x) '((g (f a)) (f a))) (test '(p (g _x) (f a)) '(p _y _x) '(_y _x) '((g (f a)) (f a))) (test '(p a _x (h (g _z))) '(p _z (h _y) (h _y)) '(_y _x _z) '((g a) (h (g a)) a)) ; both _x and _y receive (f (f . _g124)) (let ((lst (substitute-vars-recursively '(_x _y) (unify-trees '(p _x _x) '(p _y (f _y)) '())))) (assert (equal? (car lst) (cadr lst)))) )) ; Give the list of all the vars in the term, with no duplicates (define (term-variables term) (define (flatten-unique lst dest) (cond ((null? lst) dest) ((pair? lst) (flatten-unique (cdr lst) (flatten-unique (car lst) dest))) ((memq lst dest) dest) (else (cons lst dest)))) (flatten-unique (tree-traverse (lambda (leaf) (and (pattern-var? leaf) leaf)) term) '())) (run-test (assert (equal? '() (term-variables '(a)))) (assert (equal? '(_a) (term-variables '(_a)))) (assert (equal? '() (term-variables '(add (imm 0) x)))) (assert (equal? '(_z _x) (term-variables '(seq _x (seq _x _z))))) (assert (equal? '(_z _x _) (term-variables '(seq _ (seq _x _z))))) (assert (equal? '(_r _x _c) (term-variables '(M add _c (M mult _x (M polynomial _x _r)))))) (assert (equal? '(_y _x) (term-variables '(((M _x) => _x) (M a _x _y) => (M b _y _x))))) ) ; Return a term where all the variable names are unique (define (alfa-convert term) (let* ((vars (term-variables term)) (new-vars (map (lambda (_) (genvar)) vars))) (substitute-vars term (map cons vars new-vars)))) (run-test (cout "Testing alfa-convert..." nl) (pp (alfa-convert '( ( (M even (seq _c _r)) => (M odd _r) ) ( (M even (seq _ _r)) => (M odd _r) ) ( (M odd (seq-empty)) => (no) ) ( (M odd (seq _c _r)) => (M even _r) )) )) ) ; Check if two trees can be unified without creating cyclic substitutions (define (unify-simple? tree1 tree2) (and-let* ((env (unify-trees tree1 tree2 '())) ((normalize-subst env #f))) #t)) (run-test (display "testing unify-simple?") (newline) (let ((test (lambda (tree1 tree2 expected) (display "unifying ") (display tree1) (display " and ") (display tree2) (let ((result (unify-simple? tree1 tree2))) (display " =>") (display result) (newline) (assert (equal? expected result)))))) (test '(M abc) '(M abc) #t) (test '(M abc) '(M abcd) #f) (test '(M abc) '(M _ _) #f) (test '(M abc) '(M _) #t) (test '(M _ a _ ) '(M a _ b) #t) (test '(M _a _b) '(M _b _a) #t) (test '(M _a abc) '(M _b _a) #t) (test '_b '(seq _a) #t) (test '(M (seq _a)) '(M _a) #f) ; cyclic unification! (test '(M (seq _a)) '(M _b) #t) (test '(M (seq _a) _a) '(M _b _c) #t) (test '(M (seq _a) _a) '(M _b (seq _b)) #f) ; cyclic unification! (test '(M foo _v) '(M foo (succ _v)) #f) ; cyclic! (test '(M (seq _a) _a) '(M (seq _b) _b) #t) (test '_x '(seq 1 (seq-empty)) #t) (test '_ '(seq 1 (seq-empty)) #t) (test '(seq-empty) '(seq 1 (seq-empty)) #f) (test '(seq 1 (seq-empty)) '(seq 1 (seq-empty)) #t) (test '(seq _x (seq-empty)) '(seq 1 (seq-empty)) #t) (test '(seq _x _y) '(seq 1 (seq-empty)) #t) (test '(seq _x _y _z) '(seq 1 (seq-empty)) #f) (test '(seq _x (seq _y _z)) '(seq 1 (seq 2 (seq-empty))) #t) (test '(seq _x (seq _x _z)) '(seq 1 (seq 2 (seq-empty))) #f) (test '(seq _x (seq _x _z)) '(seq 1 (seq 1 (seq-empty))) #t) (test '(seq _x (seq _x _z)) '(seq 1 (seq 1 (seq-empty))) #t) ; A few tests from Dan Friedman's "Poor-man's Logic system tutorial" (test '(_x _x) '(3 4) #f) (test '(_x 4) '(3 _x) #f) (test '(_x _y) '(3 4) #t) (test '(_x 4) '(3 _y) #t) (test '(_x 4) '(_y _y) #t) (test '(_x 4 3) '(_y _y _x) #f) (test '(_x 4 3 _w) '(3 _y _x _z) #t) (test '(p _x _x) '(p _y (f _y)) #f) ; cyclic (test '(p (f a) (g _x)) '(p _x _y) #t) (test '(p (g _x) (f a)) '(p _y _x) #t) ))