;************************************************************************
;	    Non-linear matching and efficient (cyclic) unification
;			on ordered trees (terms)
;		
; One (in case of matching) or both (for unification) trees may
; contain variables, whose names begin with an underscore (see the
; pattern-var? predicate). A variable whose name is a lone underscore
; _ is an anonymous, wildcard variable, like the one in Prolog.
;
; The result of matching or unification is a set of bindings of
; pattern variables to terms. We call this set 'environment' (env).
; In the term re-writing literature such a set of bindings is usually
; called 'substitutions'.  Procedures 'match-tree' and 'unify-trees'
; accept the initial environment as an argument and return the
; resulting environment -- the substitutions to make both trees
; identical. The initial environment argument lets the user pass the
; existing substitutions so the user can match or unify a set of terms
; incrementally. Often though the initial environment is '(). If the
; match or unification fail, they return #f.
;
; Besides match-tree and unify-trees procedures, this file defines
; procedures that apply substitutions to terms and ancillary functions.
;
; IMPORT
; A prelude appropriate for your Scheme system
;	(myenv-bigloo.scm, myenv-mit.scm, etc.)
;
; $Id: tree-unif.scm,v 3.0 2003/07/04 23:03:18 oleg Exp oleg $

(define (pattern-var? x)
  (and (symbol? x) (char=? #\_ (string-ref (symbol->string x) 0))))

; The following macro runs built-in test cases -- or does not run,
; depending on which of the two lines below you commented out
(define-macro (run-test . body) `(begin (display "\n-->Test\n") ,@body))
;(define-macro (run-test . body) '(begin #f))

(cond-expand
  (gambit
    ; (time ...) is present in Gambit
    #f)
  (else
    (define-macro (time form) `(pp ,form)))
)

; A non-linear match of two ordered trees, one of which may contain variables
;	match-tree TREE1 TREE2 ENV -> ENV
;
; TREE1 may contain variables (as decided by the pattern-var?
; predicate above).  TREE1 may contain several occurrences of the same
; variable.  All these occurrences must match the same value.  A
; variable match is entered into the environment. A variable _ is an
; exception: its match is never entered into the environment.  The
; function returns the resulting environment or #f if the match fails.

(define (match-tree tree1 tree2 env)
  (cond
   ((eq? tree1 tree2) env)		; terms are identical
   ((pair? tree1)			; Recursively match pairs
    (and-let* (
	    ((pair? tree2))
	    (env-new (match-tree (car tree1) (car tree2) env)))
	   (match-tree (cdr tree1) (cdr tree2) env-new)))
   ((null? tree1) #f)			; if tree2 had been '(), see 1st cond
   ((eq? '_ tree1) env)			; _ matches everything
   ((pattern-var? tree1)
    (cond 
     ((assq tree1 env) => 
      (lambda (prev-binding)		; variable occurred before
	(if (eq? (cdr prev-binding) tree2) env ; try eq? first, it's faster
	  (and (equal? (cdr prev-binding) tree2) env))))
     (else
      (cons (cons tree1 tree2) env))	; new variable, enter fresh binding
      ))
   (else
    (and (equal? tree1 tree2) env))))

(run-test
 (let ((test 
	(lambda (tree1 tree2 expected)
	(assert (equal? expected (match-tree tree1 tree2 '()))))))
   (test '_x '(seq 1 (seq-empty)) '((_x seq 1 (seq-empty))))
   (test '(seq-empty) '(seq 1 (seq-empty)) #f)
   (test '(seq 1 (seq-empty)) '(seq 1 (seq-empty)) '())
   (test '(seq _x (seq-empty)) '(seq 1 (seq-empty)) '((_x . 1)))
   (test '(seq _x _y) '(seq 1 (seq-empty)) '((_y seq-empty) (_x . 1)))
   (test '(seq _x _y _z) '(seq 1 (seq-empty)) #f)
   (test '(seq _x (seq _y _z)) '(seq 1 (seq 2 (seq-empty)))
	 '((_z seq-empty) (_y . 2) (_x . 1)))
   (test '(seq _x (seq _x _z)) '(seq 1 (seq 2 (seq-empty))) #f)
   (test '(seq _x (seq _x _z)) '(seq 1 (seq 1 (seq-empty)))
	 '((_z seq-empty) (_x . 1)))
))

; A match/unification of two ordered trees, both of which may contain
; variables. The unifier is capable of finding cyclic substitutions.
;	unify-trees TREE1 TREE2 ENV -> ENV
;
; The trees may contain several occurrences of the same variable.  All
; these occurrences must match the same value.  A variable match is
; entered into the environment. A variable _ is an exception: its
; match is never entered into the environment.  
; The function unify-trees returns the resulting environment or #f if the
; unification fails.
;
; The semantics of unification: 
; if A and B are two terms and (unify-trees A B init-env) succeeds and
; yields E, which is _acyclic_, then
; (match-tree (substitute-vars-recursively A E)
;             (substitute-vars-recursively B E)
;             '())
; succeeds.
;
; The code isn't too dumb: it does not suffer from the exponential
; blow-up in time and in space, as is the case with the conventional
; unification algorithm (recursive descent with the composition of
; substitutions) due to Robinson, 1965. In fact, the algorithm below
; is linear in space: the size of the 'env' (the substitutions) is
; proportional to the size of the input trees.  
;
; Often unify-trees function should be followed by normalize-subst
; below.  Splitting the unification into two phases is for efficiency
; reasons: If we are interested only in the fact if two terms unify,
; we don't need to run normalize-subst.

(define (genvar)
  (string->symbol (string-append "_" (symbol->string (gensym)))))

(define (unify-trees tree1 tree2 env)
 (cond
   ((eq? tree1 tree2) env)	; especially helps if both tree1 & tree2 bound
   ((eq? '_ tree1) env)	        ; _ matches anything
   ((eq? '_ tree2) env)	        ; _ matches anything
   ((pattern-var? tree1)
    (cond
     ((assq tree1 env) =>	; tree1 is a bound var
      (lambda (prev-binding)
	;(cout "prev binding: " tree1 "," tree2 "," env nl)
	(unify-trees (cdr prev-binding) tree2 env)))
				; tree1 is a free variable
     ((and (pattern-var? tree2) (assq tree2 env)) => ; tree2 is a bound var
       ; binding a free variable to a bound. Search for a substantial binding
       ; or a loop. If we find a loop tree1->tree2->...->tree1
       ; then we do not enter the binding to tree1, because tree1 is not
       ; actually constrained.
       ; The other benefit of the loop below is that it prevents
       ; chains of bindings a->b, b->c, c->d ... d->nonvar
       ; We straighten out such chains. cf. union-find algorithm.
      (lambda (tree2-binding)
	(let loop ((binding tree2-binding))
	  (cond
	    ((eq? tree1 (cdr binding)) env)  ; loop: no binding needed
	    ((and (pattern-var? (cdr binding)) (assq (cdr binding) env))
	      => loop)
	    (else (cons (cons tree1 (cdr binding)) env))))))
     ((pattern-var? tree2)	; tree2 is a free variable, tree2 /= tree1
       (cons (cons tree1 tree2) env))
     ((pair? tree2)		; tree1 is free, tree2 is a tree
      (and-let*
	((car-var (genvar)) 
	 (cdr-var (if (null? (cdr tree2)) '() (genvar)))
	 (car-env (unify-trees car-var (car tree2)
		    (cons (cons tree1 (cons car-var cdr-var)) env))))
	(unify-trees cdr-var (cdr tree2) car-env)))
     (else			; tree1 is free, tree2 is atomic
      (cons (cons tree1 tree2) env))))
   ((pattern-var? tree2)
    (unify-trees tree2 tree1 env))
   ((null? tree1) #f)			; tree2 is not a var, is not eq? tree1
   ((pair? tree1)
    (and-let*
      (((pair? tree2))
       (car-env (unify-trees (car tree1) (car tree2) env)))
      (unify-trees (cdr tree1) (cdr tree2) car-env)))
   (else
    (and (equal? tree1 tree2) env))))

(run-test
 (display "testing tree unification") (newline)
 (let ((test 
	(lambda (tree1 tree2 expected)
	  (display "unifying ") (display tree1)
	  (display " and ") (display tree2)
	  (let ((result (unify-trees tree1 tree2 '())))
	    (display " =>") (pp result) (newline)
	    (assert (equal? expected result))))))
   (test '(M abc) '(M abc) '())
   (test '(M abc) '(M abcd) #f)
   (test '(M abc) '(M _ _) #f)
   (test '(M abc) '(M _) '())
   (test '(M _ a _ ) '(M a _ b) '())
   (test '(M _a _b) '(M _b _a) '((_a . _b)))
   (test '(M _a abc) '(M _b _a) '((_b . abc) (_a . _b)))
   (pp (unify-trees '_b '(seq _a) '()))
   (pp (unify-trees '(M (seq _a)) '(M _a) '()))
   (pp (unify-trees '(M (seq _a)) '(M _b) '()))
   (pp (unify-trees '(M (seq _a) _a) '(M _b _c) '()))
   (pp (unify-trees '(M (seq _a) _a) '(M _b (seq _b)) '()))
   (test '(M (seq _a) _a) '(M (seq _b) _b) '((_a . _b)))

   ;(test '_x '(seq 1 (seq-empty)) '((_x seq 1 (seq-empty))))
   ;(unify-trees  '_x '(seq 1 (seq-empty)) '())
   (test '(seq-empty) '(seq 1 (seq-empty)) #f)
   (test '(seq 1 (seq-empty)) '(seq 1 (seq-empty)) '())
   (test '(seq _x (seq-empty)) '(seq 1 (seq-empty)) '((_x . 1)))
   ;(test '(seq _x _y) '(seq 1 (seq-empty)) '((_y seq-empty) (_x . 1)))
   (test '(seq _x _y _z) '(seq 1 (seq-empty)) #f)
   ;(test '(seq _x (seq _y _z)) '(seq 1 (seq 2 (seq-empty)))
	; '((_z seq-empty) (_y . 2) (_x . 1)))
   (test '(seq _x (seq _x _z)) '(seq 1 (seq 2 (seq-empty))) #f)
   ;(test '(seq _x (seq _x _z)) '(seq 1 (seq 1 (seq-empty)))
	; '((_z seq-empty) (_x . 1)))
   (pp (unify-trees '(seq _x (seq _x _z)) '(seq 1 (seq 1 (seq-empty))) '()))

   ; A few tests from Dan Friedman's "Poor-man's Logic system tutorial"
   (test '(_x _x) '(3 4) #f)
   (test '(_x 4) '(3 _x) #f)
   (test '(_x _y) '(3 4) '((_y . 4) (_x . 3)))
   (test '(_x 4) '(3 _y) '((_y . 4) (_x . 3)))
   (test '(_x 4) '(_y _y) '((_y . 4) (_x . _y)))
   (test '(_x 4 3) '(_y _y _x) #f)
   (test '(_x 4 3 _w) '(3 _y _x _z) '((_w . _z) (_y . 4) (_x . 3)))
   ;(test '(p _x _x) '(p _y (f _y)) #f) ; it unifies!
   (pp (unify-trees '(p (f a) (g _x)) '(p _x _y) '()))
   (pp (unify-trees '(p (g _x) (f a)) '(p _y _x) '()))

   ; Tests for a bad unification from Franz Baader, Wayne Snyder,
   ; "Unification theory" (1999).
   ; The naive algorithm exponentially explodes on the following
   (display "Testing pathological, exponential cases") (newline)
    (time (pp (unify-trees
 	 '(h _x1 (f _y0 _y0) _y1)
 	 '(h (f _x0 _x0) _y1 _x1)
 	 '())))

   (time (pp (unify-trees
	 '(h _x1 _x2 (f _y0 _y0) (f _y1 _y1) _y2)
	 '(h (f _x0 _x0)(f _x1 _x1) _y1 _y2  _x2)
	 '())))

   (time (pp (unify-trees
	 '(h _x1 _x2 _x3 _x4 
	     (f _y0 _y0) (f _y1 _y1) (f _y2 _y2) (f _y3 _y3) _y4)
	 '(h (f _x0 _x0)(f _x1 _x1)(f _x2 _x2)(f _x3 _x3)
	     _y1 _y2 _y3 _y4 _x4)
	 '())))
))

; Determine if 'pred' holds for every element of the list
(define (every? pred lst)
  (or (null? lst)
      (and (pred (car lst))
	   (every? pred (cdr lst)))))

; apply a leaf-transformer to every leaf of the tree and return the result.
; If the leaf-transformer returns #f, the corresponding leaf is
; removed from the resulting tree
; See term-variables function below

(define (tree-traverse leaf-transformer tree)
  (cond
   ((null? tree) tree)
   ((not (pair? tree))
    (leaf-transformer tree))
   (else 
    (let ((new-car (tree-traverse leaf-transformer (car tree)))
	  (new-cdr (or (tree-traverse leaf-transformer (cdr tree)) '())))
      (if (and new-car (not (null? new-car)))
	  (cons new-car new-cdr) new-cdr)))))

; Find all the vars in term and substitute them with values given
; the env. It's an error if a term contains an unbound var

(define (substitute-vars term env)
  (tree-traverse 
   (lambda (leaf)
     (cond
      ((not (pattern-var? leaf)) leaf)
      ((assq leaf env) => cdr)
      (else
       (error "undefined pattern var " term " in env " env))))
   term))

; The same as above but accept an open term (that is, a term
; with variables that are not bound in env). Leave the free variables
; as they are.
(define (substitute-vars-open term env)
  (tree-traverse 
   (lambda (leaf)
     (cond
      ((not (pattern-var? leaf)) leaf)
      ((assq leaf env) => cdr)
      (else leaf)))
   term))

(run-test
 (assert (equal? 1 (substitute-vars '_x '((_x . 1)))))
 (assert (equal? '(a 1 (b (1 2) 1))
		 (substitute-vars '(a _x (b _y _x)) '((_x . 1) (_y 1 2)))))
 (assert (equal? 1 (substitute-vars-open '_x '((_x . 1)))))
 (assert (equal? '(a 1 (b (1 2) 1))
		 (substitute-vars-open 
		   '(a _x (b _y _x)) '((_x . 1) (_y 1 2)))))
 (assert (equal? '(a 1 (b (1 2) 1 _z))
		 (substitute-vars-open 
		   '(a _x (b _y _x _z)) '((_x . 1) (_y 1 2)))))
)


; The following is a naive version of substitute-vars-recursively.
; That version is commented out because it has bad space and time complexity.
; For example,
;     (display "normalization of substitution:") (newline)
;     (let
;       ((normalize-subst
; 	 (lambda (subst)
; 	   (map (lambda (binding)
; 		  (cons (car binding)
; 		    (substitute-vars-recursively (car binding) subst)))
; 	     subst))))
;
; Gives (using Gambit-C 3.0 on Pentium IV 2GHz):
; (time (pp (normalize-subst 
;         (unify-trees '(h _x1 (f _y0 _y0) _y1) 
;                      '(h (f _x0 _x0) _y1 _x1) '()))))
;     6 ms real time
;     2 ms cpu time (2 user, 0 system)
;     no collections
;     55120 bytes allocated
; (time (pp (normalize-subst 
;         (unify-trees '(h _x1 _x2 (f _y0 _y0) (f _y1 _y1) _y2) 
;                      '(h (f _x0 _x0) (f _x1 _x1) _y1 _y2 _x2) '()))))
;     7 ms real time
;     7 ms cpu time (7 user, 0 system)
;     1 collection accounting for 1 ms cpu time (1 user, 0 system)
;     227816 bytes allocated
; (time (pp (normalize-subst 
;         (unify-trees 
;     '(h _x1 _x2 _x3 _x4 (f _y0 _y0) (f _y1 _y1) (f _y2 _y2) (f _y3 _y3) _y4) 
;     '(h (f _x0 _x0) (f _x1 _x1) (f _x2 _x2) (f _x3 _x3) _y1 _y2 _y3 _y4 _x4)
;     '()))))
;     79 ms real time
;     60 ms cpu time (60 user, 0 system)
;     5 collections accounting for 5 ms cpu time (5 user, 0 system)
;     2045824 bytes allocated
;
; ; Substitute vars in term recursively, while avoiding loops
; ; If the variable is not found in the current env, leave it alone
; (define (substitute-vars-recursively term env)
;   (define (remq el lst)
;     (cond
;       ((null? lst) lst)
;       ((eq? el (car lst)) (cdr lst))
;       (else (cons (car lst) (remq el (cdr lst))))))
;   (define (resolve-var var)
;     (cond
;       ((not (pattern-var? var)) var)
;       ((assq var env) => 
; 	(lambda (binding)
; 	  (substitute-vars-recursively (cdr binding) (remq binding env))))
;       (else var)))
;   ;(cerr "binding " term ", env " env nl)
;   (tree-traverse resolve-var term))


; Normalize substitutions:
;	normalize-subst ENVI ACCEPT-CYCLIC? -> (ENVO | #f)
;
; ENVI is a set of bindings as returned by unify-trees. Each binding in
; ENVI should have the form
;         var->var
;         var->atom
;         var->(cons var-or-atom var-or-atom)
;
; ENVO is, informally speaking, a transitive closure of ENVI. If
; ENVI is acyclic, then ENVO is the fixpoint of composing ENVI with
; itself. To be more precise, ENVO is a set of bindings. It contains
; as many bindings as ENVI. Each binding of ENVO has the form
;	  var -> term
; If ENVI is acyclic, then term is either variable-free or contains
; only the variables that were not bound in ENVI. If ENVI is cyclic,
; then term may contain variables that are bound in ENVI (and ENVO).
; If ENVI is cyclic, it may contain bindings that are:
;    - simply cyclic: the result of unifying (f _y) and _y
;    - simply mutually recursive: unifying (p _y _a) and (p (f _a) (g _y))
;    - complex mutually recursive: unifying
;          (p _y _a) and (p (f _y _a) (g _y _a))
; I don't know how deeply I should unroll mutually-recursive
; bindings. At present, normalize-subst does not unroll them at all.
; SWI Prolog seems to unroll cyclic substitutions a few steps when
; printing them out. Sometimes this approach works out, and sometimes
; it doesn't:
;      [user].
;      eq(X,X).
;
;      ?- eq(Y,f(Y)).
;      Y = f(f(f(f(f(f(f(f(f(f(...)))))))))) % Looks good
;
;      ?- eq(p(Y,A),p(f(A),g(Y))).
;      Y = f(g(f(g(f(g(f(g(f(g(...)))))))))) % Still looks good
;      A = g(f(g(f(g(f(g(f(g(f(...)))))))))) 
;
;      ?- eq(p(Y,A),p(f(Y,A),g(Y,A))).
;      ...long-and-unwieldy output...
;
; If the flag ACCEPT-CYCLIC? is #f and ENVI turns out to be cyclic, we
; return #f. If ACCEPT-CYCLIC? is #t, we always return the
; environment, which may contain recursive and mutually recursive
; bindings. Setting the flag ACCEPT-CYCLIC? to #f essentially enables
; the occurs-check (see below).
;
; A careful reader might note that our unification algorithm does not
; have an explicit occurs-check. Many Prolog systems in fact omit the
; occurs check altogether for performance reasons: see SICTus Prolog
; Manual, p. 50. [http://citeseer.nj.nec.com/laboratory01sicstus.html]
; The manual states that unifying without the occurs-check is wrong
; but practical. In our algorithm however, cyclic and acyclic
; unifications have exactly the same time and space complexities. The
; occurs-check (passing #f as ACCEPT-CYCLIC?) does _not_ impose any
; penalty at all.
;
; Algorithm: breadth-first iterative composition of ENVI with itself
; until the fixpoint is reached or a set of mutually-recursive
; bindings is established. The tests below show that the algorithm has
; a very nice time and space complexities, even in pathological cases.
; The nice complexities stem from the fact that we avoid repeated
; traversals and rebuilding of terms searching for variables to
; substitute, as we did in the naive case.
;
; Because we store bindings in an associative list and use assq to
; locate a binding, we cannot generally expect a linear time
; complexity (although we do achieve it in many, including the
; pathological, cases). Storing bindings in a hash table will decrease
; the time complexity.
;
; It seems that our algorithm is somewhat similar to Gerard Huet's
; linear unification algorithm. The latter has quite different intuition:
; re-writing of equations into a canonical form. In a sense,
; our unify-trees generates equations and normalize-subst strongly
; normalizes them, in O(n) iterations.


(define (normalize-subst env accept-cyclic?)
  ; Partition the env into ground bindings and composite bindings.
  ;    init-ground-terms ENVI -> GROUND-ENV NONGR-ENV
  ; where each binding in GROUND-ENV is either
  ;	var->atom  or
  ;     var->free_var  ; where free_var is a variable that is not bound by ENVI
  ; Each binding in NONGR-ENV has the form
  ;     var-> (cons var-or-atom var-or-atom)
  (define (init-ground-terms env)
    (let loop ((env env) (ground '()) (non-ground '()))
      (if (null? env) (values ground non-ground)
	(let*
	  ((binding (car env))
	   (env (cdr env))
	   (var (car binding))
	   (term (cdr binding)))
	  (cond
	    ((pair? term)		; composite binding
	     (loop env ground (cons binding non-ground)))
	    ((pattern-var? term)
	     (cond
	       ((assq term ground) =>
		 (lambda (ground-binding)
		   (loop env (cons (cons var (cdr ground-binding)) ground)
		     non-ground)))
	       ; This case isn't common as unify-trees tries to avoid
	       ; bindings of the form var->bound_var
	       ((assq term non-ground) =>
		 (lambda (non-ground-binding)
		   (loop env ground
		     (cons (cons var (cdr non-ground-binding)) non-ground))))
	       ; Reduce the chain var->bound_var. Again, it's uncommon
	       ((assq term env) =>
		 (lambda (unk-binding)
		   (loop (cons (cons var (cdr unk-binding)) env)
		     ground non-ground)))
	       ; term is a free pattern-var. It's considered ground
	       (else (loop env (cons binding ground) non-ground))
	       ))
	    (else ; term is an atom
	      (loop env (cons binding ground) non-ground)))))))

  ; Perform one step of the environment composition
  ;	keep-resolving GROUND-ENV NONGR-ENV -> GROUND-ENV' NONGR-ENV' CHANGED?
  ; We compose NONGR-ENV with GROUND-ENV, add the composed bindings to the
  ; output GROUND-ENV' and return the left-over bindings in NONGR-ENV'.
  ; A boolean flag CHANGED? tells the success of the composition.
  ; Each binding of NONGR-ENV has the form
  ;     var-> (cons var-or-atom1 var-or-atom2)
  ; That binding can be composed with GROUND-ENV iff both var-or-atom1 and
  ; var-or-atom2 are (i) atoms, (ii) one is an atom and the other is a variable
  ; bound in GROUND-ENV, (iii) both are variables bound in GROUND-ENV.
  ; The composed binding becomes a ground binding and moved to GROUND-ENV'.
  (define (keep-resolving ground non-ground)
    (let loop ((env non-ground) (ground ground) (non-ground '()) (changed? #f))
      (if (null? env) (values ground non-ground changed?)
	(let*
	  ((binding (car env))
	   (env (cdr env))
	   (var (car binding))
	   (term1 (cadr binding))
	   (term2 (cddr binding))
	   (term1-var? (pattern-var? term1))
	   (term2-var? (pattern-var? term2))
	   (term1-gb (and term1-var? (assq term1 ground)))
	   (term2-gb (and term2-var? (assq term2 ground))))
	  (cond
	    ((and term1-gb term2-gb)	; successful composition
	      (loop env
		(cons (cons var (cons (cdr term1-gb) (cdr term2-gb))) ground)
		non-ground #t))
	    ((or (eq? term1-var? (not term1-gb))  ; Could not find one
	         (eq? term2-var? (not term2-gb))) ; ground binding
	      (loop env ground (cons binding non-ground) changed?))
	    (term1-gb		; term2 is atom, term1 resolves to ground
	      (loop env (cons (cons var (cons (cdr term1-gb) term2)) ground)
		non-ground #t))
	    (term2-gb		; term1 is atom, term2 resolves to ground
	      (loop env (cons (cons var (cons term1 (cdr term2-gb))) ground)
		non-ground #t))
	    (else		; both term1 and term2 are atoms
	      (loop env (cons binding ground) non-ground #t)))))))

  ; The fixpoint algorithm
  (define (iter ground non-ground changed?)
    (cond
      ((null? non-ground) ground)
      (changed?
	(call-with-values (lambda () (keep-resolving ground non-ground)) iter))
      (accept-cyclic?			; potentially we may wish to unroll
	(append non-ground ground))	; non-ground for a few steps
      (else #f)))

  ; Get the ball rolling...
  (call-with-values
    (lambda ()
      (call-with-values (lambda () (init-ground-terms env)) keep-resolving))
    iter)
)
		      

(run-test
  (display "Testing the normalization of substitutions:")
  (newline)
  (pp (normalize-subst 
	(unify-trees '(p (g _x) (f a)) '(p _y _x) '()) #t))
  (assert
    (equal? '((_a . _b))
      (normalize-subst (unify-trees '(M _a _b) '(M _b _a) '()) #t)))
  (pp (normalize-subst 
	(unify-trees '(p _x _x) '(p _y (f _y)) '()) #t))
  (pp (normalize-subst 
	(unify-trees '(p _x _x) '(p _y (f (g _y))) '()) #t))
      ; This example occurs in the termination prover
      ; It requires a cyclic substitution
  (pp (normalize-subst 
	(unify-trees '(M foo _v) '(M foo (succ _v)) '()) #t))
  (display "Testing cyclic: simple, simple/complex mutually-recursive")
  (newline)
  (pp (normalize-subst 
	(unify-trees '(f _y) '_y '()) #t))
  (pp (normalize-subst 
	(unify-trees '(p _y _a) '(p (f _a) (g _y)) '()) #t))
  (pp (normalize-subst 
	(unify-trees '(p _y _a) '(p (f _y _a) (g _y _a)) '()) #t))
  (assert
    (equal? '((_a . _d) (_b . _d) (_c . _d))
      (normalize-subst 
	(unify-trees '(f _a _b _c _d) '(f _b _c _d _a) '()) #f)))
  (assert
    (equal? '((_a . _d) (_b . _d) (_c . _d))
      (normalize-subst 
	(unify-trees '(f _a _b _c _d) '(f _d _a _b _c) '()) #f)))

   ; Tests for a bad unification from Franz Baader, Wayne Snyder,
   ; "Unification theory" (1999).
   ; The naive algorithm exponentially explodes on the following
  (display "Testing pathological, exponential cases") (newline)
  (time (pp (normalize-subst (unify-trees
	  '(h _x1 (f _y0 _y0) _y1)
	  '(h (f _x0 _x0) _y1 _x1)
	  '()) #t)))
      
  (time (pp (normalize-subst (unify-trees
	  '(h _x1 _x2 (f _y0 _y0) (f _y1 _y1) _y2)
	  '(h (f _x0 _x0)(f _x1 _x1) _y1 _y2  _x2)
	  '()) #t)))

  (time (pp (normalize-subst (unify-trees
	 '(h _x1 _x2 _x3 _x4 
	     (f _y0 _y0) (f _y1 _y1) (f _y2 _y2) (f _y3 _y3) _y4)
	 '(h (f _x0 _x0)(f _x1 _x1)(f _x2 _x2)(f _x3 _x3)
	     _y1 _y2 _y3 _y4 _x4)
	 '()) #t)))
)

; Timings for the above test (Gambit-C 3.0 on Pentium IV 2GHz):
; Notice the linear increase in time and space requirement as we
; double the length of the terms to unify
; (time (pp (normalize-subst 
; 	(unify-trees '(h _x1 (f _y0 _y0) _y1)
; 	             '(h (f _x0 _x0) _y1 _x1) '()) #t)))
;     1 ms real time
;     1 ms cpu time (1 user, 0 system)
;     no collections
;     37456 bytes allocated
;
; (time (pp (normalize-subst
;        (unify-trees '(h _x1 _x2 (f _y0 _y0) (f _y1 _y1) _y2)
; 	              '(h (f _x0 _x0) (f _x1 _x1) _y1 _y2 _x2) '()) #t)))
;     14 ms real time
;     4 ms cpu time (4 user, 0 system)
;     1 collection accounting for 1 ms cpu time (1 user, 0 system)
;     96456 bytes allocated
;
; (time (pp (normalize-subst
; 	(unify-trees 
;      '(h _x1 _x2 _x3 _x4 (f _y0 _y0) (f _y1 _y1) (f _y2 _y2) (f _y3 _y3) _y4)
;      '(h (f _x0 _x0) (f _x1 _x1) (f _x2 _x2) (f _x3 _x3) _y1 _y2 _y3 _y4 _x4)
;      '()) #t)))
;     19 ms real time
;     9 ms cpu time (9 user, 0 system)
;     no collections
;     294480 bytes allocated


; Substitute vars in term recursively, while avoiding loops
; If the variable is not found in the current env, leave it alone
(define (substitute-vars-recursively term env)
  (substitute-vars-open term (normalize-subst env #t)))

(run-test
 (assert (equal? 1 (substitute-vars-recursively '_x '((_x . 1)))))
 (assert (equal? '(a 1 (b (1 2) 1))
		 (substitute-vars-recursively 
		  '(a _x (b _y _x)) '((_x . 1) (_y 1 2)))))
 (assert (equal? '(a _y _y _y)
		 (substitute-vars-recursively  '(a _x _x _y)  '((_x . _y)))))
 (assert (equal? '(a 1 1 1)
	(substitute-vars-recursively  '(a _x _x _y)  '((_x . _y) (_y . 1)))))
 (assert (equal? '(a _x _x _x)
        (substitute-vars-recursively  '(a _x _x _y)  '((_x . _y) (_y . _x)))))
)

(run-test
  (display "Testing unify and substitute-vars-recursively together")
  (newline)
  (let ((test 
	; unify tree1 and tree2, use the substitution on term
	; to produce 'expected'
	(lambda (tree1 tree2 term expected)
	  (display "unifying ") (display tree1)
	  (display " and ") (display tree2)
	  (let* ((env (unify-trees tree1 tree2 '()))
		 (_ (begin (display " =>") (pp env) (newline)))
		 (subst-term (substitute-vars-recursively term env)))
	    (display "   subst term =>") (pp subst-term) (newline)
	    (assert (equal? expected subst-term))))))
    (test '(_x 4) '(_y _y) '(_x _y) '(4 4))
    (test '(_r (_v (_w _x) 8)) '(_r (_u (abc _u) _x))
          '(_u _x _w _v) '(8 8 abc 8))
    (test '(p (f a) (g _x)) '(p _x _y)
          '(_y _x) '((g (f a)) (f a)))
    (test '(p (g _x) (f a)) '(p _y _x)
          '(_y _x) '((g (f a)) (f a)))
    (test '(p a _x (h (g _z))) '(p _z (h _y) (h _y))
          '(_y _x _z) '((g a) (h (g a)) a))
    ; both _x and _y receive (f (f . _g124))
    (let ((lst
	    (substitute-vars-recursively '(_x _y)
	      (unify-trees '(p _x _x) '(p _y (f _y)) '()))))
      (assert (equal? (car lst) (cadr lst))))
))


; Give the list of all the vars in the term, with no duplicates
(define (term-variables term)
  (define (flatten-unique lst dest)
    (cond
     ((null? lst) dest)
     ((pair? lst) (flatten-unique (cdr lst)
				  (flatten-unique (car lst) dest)))
     ((memq lst dest) dest)
     (else (cons lst dest))))
  (flatten-unique (tree-traverse 
	   (lambda (leaf)
	     (and (pattern-var? leaf) leaf)) term)
	  '()))
(run-test
 (assert (equal? '() (term-variables '(a))))
 (assert (equal? '(_a) (term-variables '(_a))))
 (assert (equal? '() (term-variables '(add (imm 0) x))))
 (assert (equal? '(_z _x) (term-variables '(seq _x (seq _x _z)))))
 (assert (equal? '(_z _x _) (term-variables '(seq _ (seq _x _z)))))
 (assert (equal? '(_r _x _c) 
	     (term-variables '(M add _c (M mult _x (M polynomial _x _r))))))
 (assert (equal? '(_y _x)
     (term-variables '(((M _x) => _x) (M a _x _y) => (M b _y _x)))))
)

; Return a term where all the variable names are unique
(define (alfa-convert term)
  (let* ((vars (term-variables term))
	 (new-vars
	  (map
	   (lambda (_) (genvar))
	   vars)))
    (substitute-vars term (map cons vars new-vars))))

(run-test
 (cout "Testing alfa-convert..." nl)
 (pp
   (alfa-convert 
     '( ( (M even (seq _c _r)) => (M odd _r) )
	( (M even (seq _ _r)) => (M odd _r) )
	( (M odd (seq-empty)) => (no)  )
	( (M odd (seq _c _r)) => (M even _r) ))
     ))
)

; Check if two trees can be unified without creating cyclic substitutions
(define (unify-simple? tree1 tree2)
  (and-let* ((env (unify-trees tree1 tree2 '()))
	     ((normalize-subst env #f)))
    #t))

(run-test
 (display "testing unify-simple?") (newline)
 (let ((test 
	(lambda (tree1 tree2 expected)
	  (display "unifying ") (display tree1)
	  (display " and ") (display tree2)
	  (let ((result (unify-simple? tree1 tree2)))
	    (display " =>") (display result) (newline)
	    (assert (equal? expected result))))))
   (test '(M abc) '(M abc) #t)
   (test '(M abc) '(M abcd) #f)
   (test '(M abc) '(M _ _) #f)
   (test '(M abc) '(M _) #t)
   (test '(M _ a _ ) '(M a _ b) #t)
   (test '(M _a _b) '(M _b _a) #t)
   (test '(M _a abc) '(M _b _a) #t)
   (test '_b '(seq _a) #t)
   (test '(M (seq _a)) '(M _a) #f) ; cyclic unification!
   (test '(M (seq _a)) '(M _b) #t)
   (test '(M (seq _a) _a) '(M _b _c) #t)
   (test '(M (seq _a) _a) '(M _b (seq _b)) #f) ; cyclic unification!
   (test '(M foo _v) '(M foo (succ _v)) #f) ; cyclic!
   (test '(M (seq _a) _a) '(M (seq _b) _b) #t)

   (test '_x '(seq 1 (seq-empty)) #t)
   (test '_ '(seq 1 (seq-empty)) #t)
   (test '(seq-empty) '(seq 1 (seq-empty)) #f)
   (test '(seq 1 (seq-empty)) '(seq 1 (seq-empty)) #t)
   (test '(seq _x (seq-empty)) '(seq 1 (seq-empty)) #t)
   (test '(seq _x _y) '(seq 1 (seq-empty)) #t)
   (test '(seq _x _y _z) '(seq 1 (seq-empty)) #f)
   (test '(seq _x (seq _y _z)) '(seq 1 (seq 2 (seq-empty))) #t)
   (test '(seq _x (seq _x _z)) '(seq 1 (seq 2 (seq-empty))) #f)
   (test '(seq _x (seq _x _z)) '(seq 1 (seq 1 (seq-empty))) #t)
   (test '(seq _x (seq _x _z)) '(seq 1 (seq 1 (seq-empty))) #t)

   ; A few tests from Dan Friedman's "Poor-man's Logic system tutorial"
   (test '(_x _x) '(3 4) #f)
   (test '(_x 4) '(3 _x) #f)
   (test '(_x _y) '(3 4) #t)
   (test '(_x 4) '(3 _y) #t)
   (test '(_x 4) '(_y _y) #t)
   (test '(_x 4 3) '(_y _y _x) #f)
   (test '(_x 4 3 _w) '(3 _y _x _z) #t)
   (test '(p _x _x) '(p _y (f _y)) #f) ; cyclic
   (test '(p (f a) (g _x)) '(p _x _y) #t)
   (test '(p (g _x) (f a)) '(p _y _x) #t)
))