(* Embedding a language with subtyping
Writing functions that accept only a subset of values of an algebraic
data type (restricted only to particular data constructors) is
the problem that occurs with surprising regularity, in
particular, when some sort of permissions, rights or other such
semi-lattices are involved.
Here is one of the simplest but representative examples, designed
by Josh Berdine (Caml-list, Jun 21, 2020).
Suppose we have a data type of expressions
type exp = Int of int | Float of float | Add of exp * exp | String of string
We want to build lists of expressions such as
let l1 = [Int 1; Float 2.0; Add (Add (Int 3, Float 1.0), Int 4)]
let l2 = String "str" :: l1
and define functions
val eval : exp -> exp
val show : exp -> string
and use these functions to sum up a list of expressions and to print
the list out. It goes without much saying that we would like to avoid
building expressions like Add (String "str", Int 1) -- that is, an
attempt to create such an expression should fail, with \emph{a clear error
message} well describing the source of the problem and its
location. Although we should be able to show both l1 and
l2 above, attempting to sum up l2 should likewise raise a type
error with a clear error message.
Although the example looks like a poster example for GADTs and polymorphic
variants, they alone don't solve the problem. For example, we can index exp
with types
type _ exp =
| Int : int -> int exp
| Float : float -> float exp
| Add : int exp * int exp -> int exp
| String : string -> string exp
and restrict Add constructor to take exp only of int type. However, we want to
also add floats, and to Add (Float 1.0, Int 5). So, GADTs don't work if we
want to specify several allowed constructors. Also, now Int 1 and Float 2.0
have different types, and we can't put them into the same list.
Polymorphic variants almost solve the problem. However, their typing is weak
and the error occurs late (for example, one can write both `int 1 and
`int 1.0). Also, we need recursive polymorphic variants, which bring a host
of their own problems. It may seem that GADTs plus polymorphic variants may
do the trick. However, variance and subtyping GADTs is an open problem,
and hardly supported in OCaml.
We show three solutions to the problem. The first has an imperfection,
but is easier to explain. The second removes the imperfection. The
third one is verbose, but requires the least of language features and
can be implemented in SML or perhaps even Java. All three solutions
give the end user the same static guarantees, statically preventing
building of senseless expressions. All three rely on the tagless-final
style, in effect viewing the problem as a language design problem --
designing a DSL with subtyping. It has been my experience that the
tagless-final, algebraic point of view typically requires less fancy
typing and is hence more robust and informative.
*)
(* #use "subtyping.ml";;
*)
(* Thus, let's think of Josh Berdine's problem as a DSL with subtyping.
Let's define its syntax and the type system, as an OCaml signature:
It is a language with int, float and string literals, and an addition
operation. The user should be able to add ints and floats (and keep adding),
but should be precluded from adding strings.
*)
module type exp = sig
type +'a t
val int : int -> [> `int] t
val float : float -> [> `float] t
val add : ([< `int | `float] as 'n) t -> 'n t -> [> `int | `float] t
val string : string -> [> `string] t
end
(* The language expressions are indexed with a subset of tags `int,
`float, `string. The language has the obvious subtyping: [> `int]
can be weakened to [> `int | `float]. This weakening, and general typechecking
of our DSL is performed by OCaml's type checker for us.
*)
(* Let's see an example: *)
module Ex1(I:exp) = struct
open I
(* We can put expressions of different sorts into the same list *)
let l1 = [int 1; float 2.0; add (add (int 3) (float 1.0)) (int 4)]
(* val l1 : [> `float | `int ] I.t list *)
let l2 = string "str" :: l1
(* val l2 : [> `float | `int | `string ] I.t list *)
(* An attempt to build an invalid expression fails, with a good error message:
let x = add (int 1) (string "s")
^^^^^^^^^^^^
Error: This expression has type [> `string ] t
but an expression was expected of type [< `float | `int > `int ] t
The second variant type does not allow tag(s) `string
*)
(* We can define a function to sum up elements of a list of expressions,
returning a single expression with addition
*)
let rec sum l = List.fold_left add (int 0) l
(* val sum : [ `float | `int ] I.t list -> [ `float | `int ] I.t *)
(* We can sum up l1 *)
let l1v = sum l1
(* val l1v : [ `float | `int ] I.t *)
(* but, predictably, not l2
let l2v = sum l2
^^
Error: This expression has type [> `float | `int | `string ] t list
but an expression was expected of type [ `float | `int ] t list
The second variant type does not allow tag(s) `string
*)
end
(* One doesn't have to use functor as we just did. First-class modules,
or even records/objects will work too.
*)
(* Now, what we can do with the expressions? We can print/show them *)
module Show : (exp with type 'a t = string) = struct
type 'a t = string
let int = string_of_int
let float = string_of_float
let string x = "\"" ^ String.escaped x ^ "\""
let add x y = Printf.sprintf "Add(%s,%s)" x y
end
let _ = let module M = Ex1(Show) in M.l1
(* - : string list = ["1"; "2."; "Add(Add(3,1.),4)"] *)
let _ = let module M = Ex1(Show) in M.l2
(* - : string list = ["\"str\""; "1"; "2."; "Add(Add(3,1.),4)"] *)
let _ = let module M = Ex1(Show) in M.l1v
(* - : string = "Add(Add(Add(0,1),2.),Add(Add(3,1.),4))" *)
(* Here is another operation:
Josh Berdine wrote: ``suppose I also wanted to define a function
which computed the set of int literals that appear in the (unreduced)
expression?''
*)
(* That is easy: we just interpret the same expressions in a different
way: instead of showing them, we compute the set of integers that occur in
their literals.
*)
module IntSet = Set.Make(struct type t = int let compare = compare end)
module CollectInts : (exp with type 'a t = IntSet.t) = struct
type 'a t = IntSet.t
let int x = IntSet.singleton x
let float x = IntSet.empty
let string x = IntSet.empty
let add x y = IntSet.union x y
end
let _ = let module M = Ex1(CollectInts) in List.map IntSet.elements M.l1
(* - : int list list = [[1]; []; [3; 4]] *)
let _ = let module M = Ex1(CollectInts) in IntSet.elements M.l1v
(* - : int list = [0; 1; 3; 4] *)
(* Another interpreter:
Josh Berdine: More pressingly, I think, is what operations such as
`map_ints : (int -> int) -> 'a exp -> 'a exp` would look like. With an
initial style representation, this could be defined by traversing the
abstract syntax and rebuilding it with the updated `int`s.
*)
(* In tagless-final, it looks essentially like this, only simpler.
There is no need to write any traversal (a tagless-final term
itself specifies the traversal of itself)
*)
module MapInts(Map:sig val f : int -> int end)(I:exp) = struct
type 'a t = 'a I.t
let int x = I.int (Map.f x)
let float = I.float
let string = I.string
let add = I.add
end
let _ = let module M = Ex1(MapInts(struct let f = succ end)(Show)) in M.l1
(* - : string list = ["2"; "2."; "Add(Add(4,1.),5)"] *)
let _ = let module M = Ex1(MapInts(struct let f = succ end)(Show)) in M.l2
(* - : string list = ["\"str\""; "2"; "2."; "Add(Add(4,1.),5)"] *)
let _ = let module M = Ex1(MapInts(struct let f = succ end)(Show)) in M.l1v
(* - : string = "Add(Add(Add(1,2),2.),Add(Add(4,1.),5))" *)
(* We can also reduce them. The following is the reducer -- although
in reality it does the normalization-by-evaluation (NBE)
*)
(* A subset of expressions: values *)
module type vals = sig
type +'a t
val int : int -> [> `int] t
val float : float -> [> `float] t
val string : string -> [> `string] t
end
module Reducer(I:vals) :
(sig include exp val observe : ([> `float | `int ] as 'a) t -> 'a I.t end)
= struct
type 'a t = Unk of 'a I.t | Int of int | Float of float
let observe = function
| Unk x -> x
| Int x -> I.int x
| Float x -> I.float x
let int x = Int x
let float x = Float x
let string x = Unk (I.string x)
let add x y = match (x,y) with
| (Int x, Int y) -> Int (x+y)
| (Float x, Float y) -> Float (x+.y)
| (Int x, Float y) | (Float y, Int x) -> Float (float_of_int x +. y)
(* This is the imperfection. We know that the case cannot happen,
but this is not clear from the type system.
We should stress that this imperfection does not compromise the guarantees
offered to the end user, who never sees the internals of Reducer,
and can't even access them.
*)
| _ -> assert false
end
(* We can now evaluate exp to values. We need a way to show them though.
Luckily, exp is a superset of values, and we already wrote the show
interpreter for exp
*)
let _ = let module I = Reducer(Show) in
let module M = Ex1(I) in List.map I.observe M.l1
(* - : string list = ["1"; "2."; "8."] *)
let _ = let module I = Reducer(Show) in
let module M = Ex1(I) in List.map I.observe M.l2
(* - : string list = ["\"str\""; "1"; "2."; "8."] *)
let _ = let module I = Reducer(Show) in
let module M = Ex1(I) in I.observe M.l1v
(* - : string = "11." *)
(* ------------------------------------------------------------------------
* The second solution: let's remove the imperfection, however minor
*)
module type exp = sig
type +'a t
type tint (* all abstract *)
type tfloat
type tstring
val int : int -> [> `int of tint] t
val float : float -> [> `float of tfloat] t
val add : ([< `int of tint | `float of tfloat] as 'n) t -> 'n t ->
[> `int of tint | `float of tfloat ] t
val string : string -> [> `string of tstring] t
end
(* The new language is essentially as the above, but the phantom labels
have more phantomness (more structure)
*)
(* Let's see an example. It is the same example. The inferred types
however are a bit fancier
*)
module Ex1(I:exp) = struct
open I
(* We can put expressions of different sorts into the same list *)
let l1 = [int 1; float 2.0; add (add (int 3) (float 1.0)) (int 4)]
(* val l1 : [> `float of I.tfloat | `int of I.tint ] I.t list *)
let l2 = string "str" :: l1
(* val l2 :
[> `float of I.tfloat | `int of I.tint | `string of I.tstring ] I.t list
*)
(* An attempt to build an invalid expression fails, with a good error message:
let x = add (int 1) (string "s")
*)
(* We can define a function to sum up elements of a list of expressions,
returning a single expression with addition
*)
let rec sum l = List.fold_left add (int 0) l
(*
val sum :
[ `float of I.tfloat | `int of I.tint ] I.t list ->
[ `float of I.tfloat | `int of I.tint ] I.t
*)
(* We can sum up l1 *)
let l1v = sum l1
(* val l1v : [ `float of I.tfloat | `int of I.tint ] I.t *)
(* but, predictably, not l2
let l2v = sum l2
^^
Error: This expression has type
[> `float of tfloat | `int of tint | `string of tstring ] t list
but an expression was expected of type
[ `float of tfloat | `int of tint ] t list
The second variant type does not allow tag(s) `string
*)
end
(* Now, what we can do with the expressions? We can print/show them *)
module Show : (exp with type 'a t = string) = struct
type 'a t = string
type tint = unit
type tfloat = unit
type tstring = unit
let int = string_of_int
let float = string_of_float
let string x = "\"" ^ String.escaped x ^ "\""
let add x y = Printf.sprintf "Add(%s,%s)" x y
end
(* The results are exactly the same *)
let _ = let module M = Ex1(Show) in M.l1
(* - : string list = ["1"; "2."; "Add(Add(3,1.),4)"] *)
let _ = let module M = Ex1(Show) in M.l2
(* - : string list = ["\"str\""; "1"; "2."; "Add(Add(3,1.),4)"] *)
let _ = let module M = Ex1(Show) in M.l1v
(* - : string = "Add(Add(Add(0,1),2.),Add(Add(3,1.),4))" *)
(* We can also reduce them. The following is the reducer -- although
in reality it does normalization-by-evaluation (NBE)
*)
(* A subset of expressions: values *)
module type vals = sig
type +'a t
type tint (* all abstract *)
type tfloat
type tstring
val int : int -> [> `int of tint] t
val float : float -> [> `float of tfloat] t
val string : string -> [> `string of tstring] t
end
module Reducer(I:vals) :
(sig include exp
val observe :
[< `float of tfloat | `int of tint | `string of tstring ] t ->
[> `float of I.tfloat | `int of I.tint | `string of I.tstring ] I.t
end)
= struct
type tint = int
type tfloat = float
type tstring = string
type 'a t = 'a
let observe = function
| `string x -> I.string x
| `int x -> I.int x
| `float x -> I.float x
let int x = `int x
let float x = `float x
let string x = `string x
let add x y = match (x,y) with
| (`int x, `int y) -> `int (x+y)
| (`float x, `float y) -> `float (x+.y)
| (`int x, `float y) | (`float y, `int x) -> `float (float_of_int x +. y)
(* No longer there are any odd pattern matching. add is now total,
and even the compiler sees that.
*)
end
(* However, observe now returns the top of vals... *)
(* We can now evaluate exp to values. We need a way to show them though.
Luckily, exp is a superset of values, and we already wrote the show
interpreter for exp
*)
let _ = let module I = Reducer(Show) in
let module M = Ex1(I) in List.map I.observe M.l1
(* - : string list = ["1"; "2."; "8."] *)
let _ = let module I = Reducer(Show) in
let module M = Ex1(I) in List.map I.observe M.l2
(* - : string list = ["\"str\""; "1"; "2."; "8."] *)
let _ = let module I = Reducer(Show) in
let module M = Ex1(I) in I.observe M.l1v
(* - : string = "11." *)
(* ------------------------------------------------------------------------
The third solution: the most verbose, but requiring no polymorphic variants
or reliance on the metalanguage fir subtyping.
We implement the subtyping ourselves, as coercion subtyping
One may imagine many ways of implementing this solution. We show just one
*)
(* The basic interface for the join operation *)
module type join = sig
type ('a,'b,'c) join (* meaning, 'a and 'b are joinable as 'c *)
val refl : ('a,'a,'a) join
val symm : ('a,'b,'c) join -> ('b,'a,'c) join
val trans : ('a,_,'c) join -> ('c,'d,'w) join -> ('a,'d,'w) join
end
module type exp = sig
type tnum
type top
type 'a t
include join
val int : int -> tnum t
val float : float -> tnum t
val add : tnum t -> tnum t -> tnum t
val string : string -> top t
val jnt : (tnum, top, top) join
val up : ('a,_,'c) join -> 'a t -> 'c t
end
(* The language expressions are indexed with a subset of tags `int,
`float, `string. The language has obvious subtyping: [> `int]
can be weakened to [> `int | `float]. This weakening, and general typechecking
of our DSL is performed by OCaml's type checker for us.
*)
(* Let's see an example: *)
module Ex1(I:exp) = struct
open I
(* We can put expressions of different sorts into the same list,
if we coerce them appropriately
*)
let l1 = [int 1; float 2.0; add (add (int 3) (float 1.0)) (int 4)]
(* val l1 : I.tnum I.t list *)
let l2 = string "str" :: List.map (up jnt) l1
(* val l2 : I.top I.t list *)
(* An attempt to build an invalid expression fails, because there is no
coercion from t to tnum
let x = add (int 1) (string "s")
*)
(* We can define a function to sum up elements of a list of expressions,
returning a single expression with addition
*)
let rec sum l = List.fold_left add (int 0) l
(* val sum : I.tnum I.t list -> I.tnum I.t *)
(* We can sum up l1 *)
let l1v = sum l1
(* val l1v : I.tnum I.t *)
(* but, predictably, not l2
let l2v = sum l2
^^
Error: This expression has type top t list but an expression was
expected of type tnum t list
Type top is not compatible with type tnum
*)
end
(* Now, what we can do with the expressions? We can print/show them *)
module Show : (exp with type 'a t = string) = struct
type tnum = unit
type top = unit
type 'a t = string
type ('a,'b,'c) join = unit
let refl = ()
let symm x = ()
let trans x y = ()
let int = string_of_int
let float = string_of_float
let string x = "\"" ^ String.escaped x ^ "\""
let add x y = Printf.sprintf "Add(%s,%s)" x y
let jnt = ()
let up () x = x
end
(* The results are still the same *)
let _ = let module M = Ex1(Show) in M.l1
(* - : string list = ["1"; "2."; "Add(Add(3,1.),4)"] *)
let _ = let module M = Ex1(Show) in M.l2
(* - : string list = ["\"str\""; "1"; "2."; "Add(Add(3,1.),4)"] *)
let _ = let module M = Ex1(Show) in M.l1v
(* - : string = "Add(Add(Add(0,1),2.),Add(Add(3,1.),4))" *)
(* We can also reduce them. The following is the reducer -- although
in reality it does normalization-by-evaluation (NBE)
*)
(* A subset of expressions: values *)
module type vals = sig
type tnum
type top
type 'a t
include join
val int : int -> tnum t
val float : float -> tnum t
val string : string -> top t
val jnt : (tnum, top, top) join
val up : ('a,_,'c) join -> 'a t -> 'c t
end
(* We could have kept the labels tnum and top global, and used GADT
for 'a t.
Now that we don't need subtyping, GADTs are usable. In that case,
the type of observe will really be 'a t -> 'a I.t.
However, we promised this solution should be simple.
So, we keep it simple, to the SML level. No fancy types.
*)
module Reducer(I:vals) :
(sig include exp val observe : 'a t -> I.top I.t end)
= struct
type tnum = Int of int | Float of float
type top = unit
type 'a t = 'a * I.top I.t
let observe (_,x) = x
let int x = (Int x, I.up I.jnt @@ I.int x)
let float x = (Float x, I.up I.jnt @@ I.float x)
let string x = ((), I.string x)
let add x y = match (fst x, fst y) with
| (Int x, Int y) -> int (x+y)
| (Float x, Float y) -> float (x+.y)
| (Int x, Float y) | (Float y, Int x) -> float (float_of_int x +. y)
(* No longer there are any odd pattern matching. add is now
clearly total
*)
type ('a,'b,'c) join = ('a t -> 'c t) * ('b t -> 'c t)
let refl = ((fun x -> x), (fun x -> x))
let symm x = (snd x, fst x)
let trans x y = ((fun a -> fst y (fst x a)), (snd y))
let jnt = ((fun (_,x) -> ((),x)), (fun x -> x))
let up (f,_) x = f x
end
(* We can now evaluate exp to values. We need a way to show them though.
Luckily, exp is a superset of values, and we already wrote the show
interpreter for exp
*)
let _ = let module I = Reducer(Show) in
let module M = Ex1(I) in List.map I.observe M.l1
(* - : string list = ["1"; "2."; "8."] *)
let _ = let module I = Reducer(Show) in
let module M = Ex1(I) in List.map I.observe M.l2
(* - : string list = ["\"str\""; "1"; "2."; "8."] *)
let _ = let module I = Reducer(Show) in
let module M = Ex1(I) in I.observe M.l1v
(* - : string = "11." *)
;;