(* Simple Hindley-Milner type checker for pure lambda-calculus with let Explanation of the efficient generalization -- Remy algorithm The sound and even more efficient generalization with levels computed on demand,2 and the incremental occurs check and generalization. *) (* The language: lambda-calculus with let *) type varname = string type exp = | Var of varname (* variable *) | App of exp * exp (* application: e1 e2 *) | Lam of varname * exp (* abstraction: fun x -> e *) | Let of varname * exp * exp (* let x = e in e2 *) ;; (* The types to infer *) (* Types without quantified variables are simple types; those containing quantified variables are type schemas. A quantified variable is a TVar whose level is generic_level. Since quantifiers are always on the outside in the HM system, they are implied and not explicitly represented. Unlike sound_eager.ml, all types, not only type variables, have levels. Normally, the level of a composite type is an upper bound on the levels of its components. If a type belongs to a region n, all its subcomponents should be alive when the region n is still alive. However, levels are determined incrementally. Therefore, Composite types have two levels: level_old is always an upper bound for the levels of the components; level_new is equal or less than level_old. If level_new is less than level_old, the type is being promoted to a higher region. The type needed to be traversed and all of its components adjusted so their levels do not exceed level_new. Generalization will perform such an adjustment of levels for some types. During type traversals, level_new may have the value marked_level to signify that the type is being traversed. Encountering a type at the marked_level during the traversals means that we detect a cycle, created during unification without the occurs check. *) type level = int let generic_level = 100000000 (* as in OCaml typing/btype.ml *) let marked_level = -1 (* for marking a node, to check*) (* for cycles *) type typ = | TVar of tv ref | TArrow of typ * typ * levels and tv = Unbound of string * level | Link of typ and levels = {mutable level_old : level; mutable level_new : level} ;; (* Chase the links of bound variables, returning either a free variable or a constructed type. OCaml's typing/btype.ml has the same function with the same name. Unlike OCaml, we do path compression. *) let rec repr : typ -> typ = function | TVar ({contents = Link t} as tvr) -> let t = repr t in tvr := Link t; t | t -> t ;; (* get the level of a normalized type, which is not a bound TVar *) let get_level : typ -> level = function | TVar {contents = Unbound (_,l)} -> l | TArrow (_,_,ls) -> ls.level_new | _ -> assert false ;; let gensym_counter = ref 0 let reset_gensym : unit -> unit = fun () -> gensym_counter := 0 ;; let gensym : unit -> string = fun () -> let n = !gensym_counter in let () = incr gensym_counter in if n < 26 then String.make 1 (Char.chr (Char.code 'a' + n)) else "t" ^ string_of_int n ;; (* Determining the |let|-nesting level during the type-checking, or just the _level_. Each top-level expression to type-check is implicitly wrapped into a let. So, the numbering starts with 1. *) let current_level = ref 1 let reset_level () = current_level := 1 let reset_type_variables () = (* name from OCaml's typing/typetext.ml *) reset_gensym (); reset_level () (* Increase level *) let enter_level () = incr current_level (* Restore level *) let leave_level () = decr current_level (* Make a fresh type variable and an arrow type *) let newvar : unit -> typ = fun () -> TVar (ref (Unbound (gensym (),!current_level))) ;; let new_arrow : typ -> typ -> typ = fun ty1 ty2 -> TArrow(ty1,ty2,{level_new = !current_level; level_old = !current_level}) ;; (* Delayed occurs check. We do not do the occurs check when unifying a free type variable. Therefore, we may construct a cyclic type. The following function, executed only at the end of the type checking, checks for no cycles in the type. Incidentally, OCaml does allow cycles in the type: types are generally (equi-)recursive in OCaml. *) let rec cycle_free : typ -> unit = function | TVar {contents = Unbound _} -> () | TVar {contents = Link ty} -> cycle_free ty | TArrow (_,_,ls) when ls.level_new = marked_level -> failwith "occurs check" | TArrow (t1,t2,ls) -> let level = ls.level_new in ls.level_new <- marked_level; cycle_free t1; cycle_free t2; ls.level_new <- level ;; (* Main unification *) (* Quantified variables are unexpected: they should've been instantiated *) (* The occurs check is lazy; therefore, cycles could be created accidentally. We have to watch for them. *) (* Update the level of the type so that it does not exceed the given level l. Invariant: a level of a type can only decrease (assigning the type generic_level is special, and does not count as the `update') The existing level of the type cannot be generic_level (quantified variables must be specially instantiated) or marked_level (in which case, we encounter a cycle). If the type to update is composite and its new and old levels were the same and the new level is updated to a smaller level, the whole type is put into the to_be_level_adjusted queue for later traversal and adjustment of the levels of components. This work queue to_be_level_adjusted is akin to the list of assignments from the old generation to the new maintained by a generational garbage collector (such as the one in OCaml). The update_level itself takes constant time. *) let to_be_level_adjusted = ref [] let reset_level_adjustment () = to_be_level_adjusted := [] let update_level : level -> typ -> unit = fun l -> function | TVar ({contents = Unbound (n,l')} as tvr) -> assert (not (l' = generic_level)); if l < l' then tvr := Unbound (n,l) | TArrow (_,_,ls) as ty -> assert (not (ls.level_new = generic_level)); if ls.level_new = marked_level then failwith "occurs check"; if l < ls.level_new then begin if ls.level_new = ls.level_old then to_be_level_adjusted := ty :: !to_be_level_adjusted; ls.level_new <- l end | _ -> assert false ;; (* Unifying a free variable tv with a type t takes constant time: it merely links tv to t (setting the level of t to tv if tv's level was smaller). Therefore, cycles may be created accidentally, and the complete update of type levels may have to be done at a later time. Incidentally, another unification may need to traverse the type with the pending level update. That unification will do the level update along the way. *) let rec unify : typ -> typ -> unit = fun t1 t2 -> if t1 == t2 then () (* t1 and t2 are physically the same *) else match (repr t1,repr t2) with | (TVar ({contents = Unbound (_,l1)} as tv1) as t1, (* unify two free vars *) (TVar ({contents = Unbound (_,l2)} as tv2) as t2)) -> if tv1 == tv2 then () (* the same variable *) else (* bind the higher-level var *) if l1 > l2 then tv1 := Link t2 else tv2 := Link t1 | (TVar ({contents = Unbound (_,l)} as tv),t') | (t',TVar ({contents = Unbound (_,l)} as tv)) -> update_level l t'; tv := Link t' | (TArrow (tyl1,tyl2,ll), TArrow (tyr1,tyr2,lr)) -> if ll.level_new = marked_level || lr.level_new = marked_level then failwith "cycle: occurs check"; let min_level = min ll.level_new lr.level_new in ll.level_new <- marked_level; lr.level_new <- marked_level; unify_lev min_level tyl1 tyr1; unify_lev min_level tyl2 tyr2; ll.level_new <- min_level; lr.level_new <- min_level (* everything else is the unification error *) and unify_lev l ty1 ty2 = let ty1 = repr ty1 in update_level l ty1; unify ty1 ty2 ;; (* The type environment *) type env = (varname * typ) list ;; (* Sound generalization: generalize (convert to quantified vars) only those free TVars whose level is greater than the current. These TVars belong to dead regions. A quantified var is a TVar at the generic_level. We traverse only those parts of the type that may contain type variables at the level greater than the current. If a type has the level of the current or smaller, all of its components have the level not exceeding the current -- and so that type does not have to be traversed. After generalization, a constructed type receives the generic_level if at least one of its components is quantified. However, before generalization we must perform the pending level updates. After all, a pending update may decrease the level of a type variable (promote it to a wider region) and thus save the variable from quantification. We do not need to do all of the pending updates: only those that deal with types whose level_old > current_level. If level_old <= current_level, the type contains no generalizable type variables anyway. *) let force_delayed_adjustments () = let rec loop acc level ty = match repr ty with | TVar ({contents = Unbound (name,l)} as tvr) when l > level -> tvr := Unbound (name,level); acc | TArrow (_,_,ls) when ls.level_new = marked_level -> failwith "occurs check" | TArrow (_,_,ls) as ty -> if ls.level_new > level then ls.level_new <- level; adjust_one acc ty | _ -> acc (* only deals with composite types *) and adjust_one acc = function | TArrow (_, _, ls) as ty when ls.level_old <= !current_level -> ty::acc (* update later *) | TArrow (_, _, ls) when ls.level_old = ls.level_new -> acc (* already updated *) | TArrow (ty1, ty2, ls) -> let level = ls.level_new in ls.level_new <- marked_level; let acc = loop acc level ty1 in let acc = loop acc level ty2 in ls.level_new <- level; ls.level_old <- level; acc | _ -> assert false in to_be_level_adjusted := List.fold_left adjust_one [] !to_be_level_adjusted; ;; let gen : typ -> unit = fun ty -> force_delayed_adjustments (); let rec loop ty = match repr ty with | TVar ({contents = Unbound (name,l)} as tvr) when l > !current_level -> tvr := Unbound (name,generic_level) | TArrow (ty1,ty2,ls) when ls.level_new > !current_level -> let ty1 = repr ty1 and ty2 = repr ty2 in loop ty1; loop ty2; let l = max (get_level ty1) (get_level ty2) in ls.level_old <- l; ls.level_new <- l (* set the exact level upper bound *) | _ -> () in loop ty ;; (* instantiation: replace schematic variables with fresh TVars. Only the components at generic_level are traversed, since only those may contain quantified type variables. *) let inst : typ -> typ = let rec loop subst = function | TVar {contents = Unbound (name,l)} when l = generic_level -> begin try (List.assoc name subst, subst) with Not_found -> let tv = newvar () in (tv, (name,tv)::subst) end | TVar {contents = Link ty} -> loop subst ty | TArrow (ty1,ty2,ls) when ls.level_new = generic_level -> let (ty1,subst) = loop subst ty1 in let (ty2,subst) = loop subst ty2 in (new_arrow ty1 ty2, subst) | ty -> (ty, subst) in fun ty -> fst (loop [] ty) ;; (* Trivial type checker. Type checking errors are delivered as exceptions *) let rec typeof : env -> exp -> typ = fun env -> function | Var x -> inst (List.assoc x env) | Lam (x,e) -> let ty_x = newvar () in let ty_e = typeof ((x,ty_x)::env) e in new_arrow ty_x ty_e | App (e1,e2) -> let ty_fun = typeof env e1 in let ty_arg = typeof env e2 in let ty_res = newvar () in unify ty_fun (new_arrow ty_arg ty_res); ty_res | Let (x,e,e2) -> enter_level (); let ty_e = typeof env e in leave_level (); gen ty_e; typeof ((x,ty_e)::env) e2 ;; (* Type-check the top-level expression *) let top_type_check : exp -> typ = fun exp -> reset_type_variables (); reset_level_adjustment (); let ty = typeof [] exp in cycle_free ty; ty ;; (* tests *) (* partial pattern-match is a part of the test *) [@@@ warning "-8"] let id = Lam("x",Var"x");; let c1 = Lam("x",Lam("y",App (Var"x",Var"y")));; let TArrow (TVar {contents = Unbound ("a", 1)}, TVar {contents = Unbound ("a", 1)}, {level_old = 1; level_new = 1}) = top_type_check id ;; let TArrow (TVar {contents = Link (TArrow (TVar {contents = Unbound ("b", 1)}, TVar {contents = Unbound ("c", 1)}, {level_old = 1; level_new = 1}))}, TArrow (TVar {contents = Unbound ("b", 1)}, TVar {contents = Unbound ("c", 1)}, {level_old = 1; level_new = 1}), {level_old = 1; level_new = 1}) = top_type_check c1 ;; let TArrow (TArrow (TVar {contents = Unbound ("d", 1)}, TVar {contents = Unbound ("e", 1)}, {level_old = 1; level_new = 1}), TArrow (TVar {contents = Unbound ("d", 1)}, TVar {contents = Unbound ("e", 1)}, {level_old = 1; level_new = 1}), {level_old = 1; level_new = 1}) = top_type_check (Let ("x",c1,Var"x"));; let TArrow (TVar {contents = Unbound ("b", 1)}, TVar {contents = Unbound ("b", 1)}, {level_old = 1; level_new = 1}) = top_type_check (Let ("y",Lam ("z",Var"z"), Var"y"));; let TArrow (TVar {contents = Unbound ("a", 1)}, TArrow (TVar {contents = Unbound ("c", 1)}, TVar {contents = Unbound ("c", 1)}, {level_old = 1; level_new = 1}), {level_old = 1; level_new = 1}) = top_type_check (Lam ("x", Let ("y",Lam ("z",Var"z"), Var"y")));; let TArrow (TVar {contents = Link (TVar {contents = Unbound ("c", 1)})}, TVar {contents = Link (TVar {contents = Unbound ("c", 1)})}, {level_old = 1; level_new = 1}) = top_type_check (Lam ("x", Let ("y",Lam ("z",Var"z"), App (Var"y",Var"x"))));; try let _ = top_type_check (Lam ("x",App (Var"x",Var"x"))) in assert false; with Failure e -> print_endline e ;; try let _ = top_type_check (Let ("x",Var"x",Var"x")) in assert false with Not_found -> print_endline "unbound var" ;; (* Max Heiber's example *) (* fun y -> y (fun z -> y z) *) try let _ = top_type_check (Lam ("y", App (Var"y",(Lam ("z", (App (Var"y", Var"z"))))))) in assert false with Failure e -> print_endline e ;; (* The problem pointed out by kirang fun x -> fun y -> fun k -> k (k x y) (k y x) no longer occurs here, since we do path compression *) let TArrow (TVar {contents = Unbound ("a", 1)}, TArrow (TVar {contents = Link (TVar {contents = Unbound ("a", 1)})}, TArrow (TVar {contents = Link (TArrow (TVar {contents = Unbound ("a", 1)}, TVar {contents = Link (TArrow (TVar {contents = Link (TVar {contents = Unbound ("a", 1)})}, TVar {contents = Link (TVar {contents = Unbound ("a", 1)})}, {level_old = 1; level_new = 1}))}, {level_old = 1; level_new = 1}))}, TVar {contents = Link (TVar {contents = Unbound ("a", 1)})}, {level_old = 1; level_new = 1}), {level_old = 1; level_new = 1}), {level_old = 1; level_new = 1}) = top_type_check (Lam ("x", Lam ("y", Lam ("k", App (App (Var "k", App (App (Var "k", Var "x"), Var "y")), App (App (Var "k", Var "y"), Var "x")) )))) (* id can be `self-applied', on the surface of it *) let TVar {contents = Link (TArrow (TVar {contents = Unbound ("c", 1)}, TVar {contents = Unbound ("c", 1)}, {level_old = 1; level_new = 1}))} = top_type_check (Let ("id",id, App (Var"id",Var"id")));; let TArrow (TVar {contents = Unbound ("i", 1)}, TVar {contents = Unbound ("i", 1)}, {level_old = 1; level_new = 1}) = top_type_check (Let ("x",c1, Let ("y", Let ("z",App(Var"x",id), Var "z"), Var"y")));; (* fun x -> fun y -> let x = x y in fun x -> y x;; - : (('a -> 'b) -> 'c) -> ('a -> 'b) -> 'a -> 'b = *) let TArrow (TVar {contents = Link (TArrow (TVar {contents = Link (TArrow (TVar {contents = Unbound ("d", 1)}, TVar {contents = Unbound ("e", 1)}, {level_old = 1; level_new = 1}))}, TVar {contents = Unbound ("c", 1)}, {level_old = 1; level_new = 1}))}, TArrow (TVar {contents = Link (TArrow (TVar {contents = Unbound ("d", 1)}, TVar {contents = Unbound ("e", 1)}, {level_old = 1; level_new = 1}))}, TArrow (TVar {contents = Unbound ("d", 1)}, TVar {contents = Unbound ("e", 1)}, {level_old = 1; level_new = 1}), {level_old = 1; level_new = 1}), {level_old = 1; level_new = 1}) = top_type_check (Lam ("x", Lam("y",Let ("x",App (Var"x",Var"y"), Lam ("x",App (Var"y",Var"x"))))));; (* now sound generalization ! *) let TArrow (TVar {contents = Unbound ("a", 1)}, TVar {contents = Unbound ("a", 1)}, {level_old = 1; level_new = 1}) = top_type_check (Lam ("x", Let ("y",Var"x", Var"y")));; (* now sound generalization ! *) let TArrow (TVar {contents = Unbound ("a", 1)}, TArrow (TVar {contents = Unbound ("c", 1)}, TVar {contents = Unbound ("a", 1)}, {level_old = 1; level_new = 1}), {level_old = 1; level_new = 1}) = top_type_check (Lam ("x", Let ("y",Lam ("z",Var"x"), Var"y")));; (* now sound generalization ! *) let TArrow (TVar {contents = Link (TArrow (TVar {contents = Unbound ("b", 1)}, TVar {contents = Unbound ("c", 1)}, {level_old = 1; level_new = 1}))}, TArrow (TVar {contents = Unbound ("b", 1)}, TVar {contents = Unbound ("c", 1)}, {level_old = 1; level_new = 1}), {level_old = 1; level_new = 1}) = top_type_check (Lam ("x", Let ("y",Lam ("z",App (Var"x",Var"z")), Var"y")));; (* now sound generalization ! *) let TArrow (TVar {contents = Link (TArrow (TVar {contents = Unbound ("b", 1)}, TVar {contents = Link (TArrow (TVar {contents = Unbound ("b", 1)}, TVar {contents = Unbound ("d", 1)}, {level_old = 1; level_new = 1}))}, {level_old = 1; level_new = 1}))}, TArrow (TVar {contents = Unbound ("b", 1)}, TVar {contents = Unbound ("d", 1)}, {level_old = 1; level_new = 1}), {level_old = 1; level_new = 1}) = top_type_check (Lam ("x", Lam("y",Let ("x",App (Var"x",Var"y"), App (Var"x",Var"y")))));; (* now sound generalization ! *) try let _ = top_type_check (Lam ("x",Let("y",Var"x", App (Var"y",Var"y")))) in assert false with Failure e -> print_endline e ;; (* now sound generalization ! *) (* fun x -> let y = let z = x (fun x -> x) in z in y;; - : (('a -> 'a) -> 'b) -> 'b = *) let TArrow (TVar {contents = Link (TArrow (TArrow (TVar {contents = Unbound ("b", 1)}, TVar {contents = Unbound ("b", 1)}, {level_old = 1; level_new = 1}), TVar {contents = Unbound ("c", 1)}, {level_old = 1; level_new = 1}))}, TVar {contents = Unbound ("c", 1)}, {level_old = 1; level_new = 1}) = top_type_check (Lam ("x", Let ("y", Let ("z",App(Var"x",id), Var "z"), Var"y")));; print_endline "\nAll Done\n";;