(**
Eliminating Array bound checking with non-dependent types
An attempt to give as literal as possible translation of
Hongwei Xi's code (Xi and Pfenning, PLDI98) into OCaml.
Please see eliminating-array-bound-check-literally.hs
for the corresponding Haskell code.
Please see eliminating-array-bound-check-functor.ml for
a functor-based approach. It is simpler, and sufficient if the security
kernel is stateless.
The present translation has a lot of common with KMP-deptype.hs
(the translation of another Hongwei Xi's code, from the same paper).
The file eliminating-array-bound-check.lhs gives a slightly
less literal (but perhaps more elegant) translation.
$Id: eliminating-array-bound-checks.ml,v 1.1 2019/07/30 08:15:10 oleg Exp oleg $
Original version: 1.2 2006/09/06 04:42:15
Updated: July 2014 (using private int)
*)
(*
Our example is `bsearch', taken from the famous paper "Eliminating
Array Bound Checking Through Dependent Types" by Hongwei Xi and Frank
Pfenning (PLDI'98). Hongwei Xi's code was written in SML extended
with a restricted form of dependent types. Here is the original code
of the example (taken from Figure 3 of that paper, see also
http://www-2.cs.cmu.edu/~hwxi/DML/examples/)
datatype 'a answer = NONE | SOME of int * 'a
assert sub <| {n:nat, i:nat | i < n } 'a array(n) * int(i) -> 'a
assert length <| {n:nat} 'a array(n) -> int(n)
fun('a){size:nat}
bsearch cmp (key, arr) =
let
fun look(lo, hi) =
if hi >= lo then
let
val m = (hi + lo) div 2
val x = sub(arr, m)
in
case cmp(key, x) of
LESS => look(lo, m-1)
| EQUAL => (SOME(m, x))
| GREATER => look(m+1, hi)
end
else NONE
where look <|
{l:nat, h:int | 0 <= l <= size /\ 0 <= h+1 <= size } int(l) * int(h)
-> 'a answer
in
look (0, length arr - 1)
end
where bsearch <| ('a * 'a -> order) -> 'a * 'a array(size) -> 'a answer
The text after `<|' are dependent type annotations. They _must_ be
specified by the programmer -- even for internal functions such as `look'.
*)
(* The index interface and its invariants *)
module type Index = sig
type index = private int (* i:index implies lwb <= i <= upb *)
type indexL = private int (* i:indexL implies lwb <= i *)
type indexH = private int (* i:indexH implies i <= upb *)
val lwb : indexL
val upb : indexH
val bsucc : index -> indexL
val bpred : index -> indexH
val bmid : index -> index -> index
val bcmp : indexL -> indexH -> (index * index) option
end
(* The straightforward implementation, parameterized by upb, which could be
an arbitrary integer. The lower bound is assumed zero.
We can easily verify the implementation respects the invariants of the
interface
*)
module IndexF(S:sig val upb:int end) : Index = struct
type index = int (* Within the implementation, index, indexL and *)
type indexL = int (* indexH types are no longer private, so we *)
type indexH = int (* may create their values *)
let lwb : indexL = 0
let upb : indexH = S.upb
let bsucc : index -> indexL = Stdlib.succ
let bpred : index -> indexH = Stdlib.pred
let bmid : index -> index -> index = fun x y -> x + (y - x)/2
let[@inline] bcmp : indexL -> indexH -> (index * index) option = fun i j ->
if i <= j then Some (i,j) else None
end
let _ = let module M = IndexF(struct let upb = 5 end) in
(M.lwb :> int)
;;
let _ = let module M = IndexF(struct let upb = 5 end) in
M.(if bcmp lwb upb = None then "Greater" else "Less")
;;
(* Type error!
let _ = let module M = IndexF(struct let lwb = 0 let upb = 5 end) in
((M.bsucc M.lwb) :> int)
;;
let _ = let module M = IndexF(struct let upb = 5 end) in
M.(if bcmp upb lwb = None then "Greater" else "Less")
;;
*)
module BArray(S: sig type el val arr : el array end) = struct
include IndexF(struct let upb = Array.length S.arr - 1 end)
let[@inline] bget : index -> S.el =
fun i -> Array.unsafe_get S.arr (i :> int)
end
let bsearch (type a) : (a*a -> int) -> a -> a array -> (int * a) option =
fun cmp key arr ->
let module M = BArray(struct type el = a let arr = arr end) in
let open M in
let rec look (lo:indexL) (hi:indexH) = (* type annotations are for clarity*)
match bcmp lo hi with
| None -> None
| Some (lo',hi') ->
let m = bmid lo' hi' in
let x = bget m in
let cmp_r = cmp (key,x) in
if cmp_r < 0 then look lo (bpred m)
else if cmp_r = 0 then Some ((m :> int), x)
else look (bsucc m) hi
in
look lwb upb
(* Tests *)
let barr1 = [|'a';'b';'c';'d';'e';'f';'g';'h'|]
let uncurry f = fun (x,y) -> f x y
let Some (2,'c') = bsearch (uncurry compare) 'c' barr1
let Some (0,'a') = bsearch (uncurry compare) 'a' barr1
let Some (7,'h') = bsearch (uncurry compare) 'h' barr1
let None = bsearch (uncurry compare) 'x' barr1
let None = bsearch (uncurry compare) 'x' [||]
(* There is an efficiency problem: indirect access to bsucc, etc. *)
(* we remedy it with quantified types:
the standard conversion of existentials to universals
*)
(** ----------------------------------------------------------------------
The old (since 2004) version of the code, using universal quantification
(branding)
The code relies on a compact general-purpose trusted kernel below.
The phantom type [s] is a type proxy for the pair of numbers
(low,high) -- the boundaries of array indices (low >= 0, high <
max_int/2). We don't know what array indices are until the run time.
Yet we do know (due to the way barray, etc, types are constructed)
that the same [s] means the the same interval of indices. The
safety depends on the trusted way of creating branded types: types
bindex, barray etc. are abstract and their implementation is known to
the trusted kernel only. The uniqueness of [s] afforded by the
explicit universal quantification prevents mixing up of different
brands.
The value of the type ['s bindex] denotes an index value that
is assuredly { low <= i <= high } where low and high are the bounds
represented by the type parameter [s].
The value of the type ['s bindexL] denotes an index value that
is assuredly { low <= i } where low is the lower bound
represented by the type parameter [s].
The value of the type ['s bindexH] denotes an index value that
is assuredly { i <= high } where high is the upper bound
represented by the type parameter [s].
We must re-iterate that safety depends on assurances of the code that
constructs bindex values. Because of the high assurance, we must
formulate the safety properties as propositions, and prove
them. Fortunately, the code below is compact and straightforward, as
well as general purpose.
The function [brand] is the introduction rule for barray, which determines
the run-time bounds of an array and passes the branded array and
the low and upper bounds to its continuation.
The function brand has a higher-rank type. It is the
quantification of [s] as well as the abstractness of barray (in the
kernel interface) guarantee that the same brand entails the same bounds.
The function [index_cmp] does index comparison, comparing
bindexL with bindexH. If the first is less than or equal to
the second, invoke the onle continuation, passing the indices
converted to bindex.
Because a branded index is assuredly within the bounds of the array of
the same brand, we can _safely_ use Array.unsafe_get
*)
module TrustedKernel :
sig
type ('s,'a) barray
type 's bindex = private int
type 's bindexL = private int
type 's bindexH = private int
(* It used to be that higher-rank types in OCaml were
limited to records. Not any more. Still, records are
useful.
*)
type ('w,'a) brand_k =
{bk : 's . ('s,'a) barray * 's bindexL * 's bindexH -> 'w}
val brand : 'a array -> ('w,'a) brand_k -> 'w
(* the type of [bmiddle] assures that all indices
involved have the same brand -- that is, the same lower and upper
boundaries.
*)
val bmiddle : 's bindex -> 's bindex -> 's bindex
val index_cmp : 's bindexL -> 's bindexH
-> ('s bindex * 's bindex) option
(* Index arithmetics *)
val bsucc : 's bindex -> 's bindexL
val bpred : 's bindex -> 's bindexH
val bget : ('s,'a) barray -> 's bindex -> 'a
end =
struct
(* In reality, all these branded types are merely type aliases.
They should have no run-time overhead
*)
type ('s,'a) barray = 'a array
type 's bindex = int
type 's bindexL = int
type 's bindexH = int
type ('w,'a) brand_k =
{bk : 's . ('s,'a) barray * 's bindexL * 's bindexH -> 'w}
(*
Proposition: For the argument (a,l,h) of the continuation k,
l >= low bound of the array a (which is 0 in OCaml),
h <= upper bound of the array a.
Proof: from the semantics of the function `length' and the semantics
of OCaml arrays (taken here as an axiom)
and the code itself.
*)
let brand a k = let len = Array.length a in
let _ = assert (len < max_int/2) in
k.bk (a, 0, (len-1))
(* Proposition: l <= i1 <= h, l <= i2 <= h |- l <= (i1 + i2) `div` 2 <= h
Proof: plain arithmetics, the fact h < max_int/2 guarantees the
freedom from overflow.
*)
let bmiddle i1 i2 = (i1 + i2)/2
(* Safety proposition:
{i:int | low <= i}, {j:int | j <= high}, i <= j
===> {i,j:int | low <= i,j <= high}. See the definition of bindex then.
*)
let index_cmp i j = if i <= j then Some (i,j) else None
(* Safety proposition:
{i:int | low <= i <= high}
===> {i:int | low <= i+1} . See the definition of bindexL then.
*)
let bsucc = succ
(* Safety proposition:
{i:int | low <= i <= high}
===> {i:int | i-1 <= high} . See the definition of bindexH then.
*)
let bpred = pred
let bget = Array.unsafe_get
end
;;
(* Do a simple test for eigen-variables, to assure us that the eigen-variable
's is indeed unique and unforgeable. Two different branding of the same
array must indeed be different
*)
open TrustedKernel;;
(* We get the expected type error. The generativity is working!
let test_eigen =
let arr = [|1|] in
brand arr {bk = (fun barr1 ->
brand arr {bk = (fun barr2 ->
barr1 == barr2
)})}
;;
*)
(* ---------------------------------------------------------------------- *)
(* Finally, our bsearch function
The code below is just as algorithmically efficient as the Dependent SML
code: one middle index computation, one element comparison, one index
comparison, one index increment or decrement per iteration. There are
no type annotations as none are needed. The function [bget] is a statically
safe array indexing operator. The type system and the trust properties
of the kernel below guarantee that in the expression [bget arr m] the
index [m] is positively in range of the array [arr] bounds.
*)
let bsearch' cmp key (arr,lo,hi) =
let rec look lo hi = match index_cmp lo hi with
| None -> None
| Some (lo',hi') ->
let m = bmiddle lo' hi' in
let x = bget arr m in
let cmp_r = cmp (key,x) in
if cmp_r < 0 then look lo (bpred m)
else if cmp_r = 0 then Some ((m : 's bindex :> int), x)
else look (bsucc m) hi
in
look lo hi
let bsearch cmp (key, arr) =
brand arr {bk = fun arrb -> bsearch' cmp key arrb}
(*
val bsearch : ('a * 'b -> int) -> 'a * 'b array -> (int * 'b) option =
*)
(* Tests *)
let barr1 = [|'a';'b';'c';'d';'e';'f';'g';'h'|]
let uncurry f = fun (x,y) -> f x y
let Some (2,'c') = bsearch (uncurry compare) ('c',barr1)
let Some (0,'a') = bsearch (uncurry compare) ('a',barr1)
let Some (7,'h') = bsearch (uncurry compare) ('h',barr1)
let None = bsearch (uncurry compare) ('x',barr1)
let None = bsearch (uncurry compare) ('x',[||])
;;
(* ---------------------------------------------------------------------- *)
(* Alain Frisch, in his comment on the caml-list on Sep 4, 2006 pointed out
the alternative to higher-rank record types: functors over
anonymous structures and functors over value signatures:
module GenT(X:sig end) : sig type t val v : t end =
struct type t = int let v = 1 end
;;
let module M1 = GenT(struct end) in
let module M2 = GenT(struct end) in
M1.v = M2.v (* Error, as expected! *)
;;
This approach crucially relies on local modules (the `let module' form
that can be used not necessarily at the top level), available in
OCaml.
The structure really has to be anonymous to achieve genericity
let module N = struct let x = 1 end in
let module M1 = GenT(N) in
let module M2 = GenT(N) in
M1.v = M2.v (* But this passes *)
;;
But this is OK: we can put this structure into the trusted kernel...
Furthermore, we can parameterize the functor by the length of the array,
and thus obtain generativity in this way:
module GT(X:sig val x : int end) : sig type t val v : t end =
struct type t = int let v = 1 end
;;
let module M1 = GT(struct let x = 1 end) in
let module M2 = GT(struct let x = 1 end) in
M1.v = M2.v (* problem *)
;;
let module N = struct let x = 1 end in
let module M1 = GT(N) in
let module M2 = GT(N) in
M1.v = M2.v (* no problem *)
;;
Alain Frisch wrote:
``As a work-around, you can take only the length of the array as an argument:
module TrustedKernel(L: sig val len: int end) : sig
type 'a barray
type bindex
...
end = struct
let brand a =
assert(Array.length a = L.len);
(a,0,L.len - 1)
end
On the one hand, this adds one run-time check, but on the other hand it
makes the abstract index types depend only on the length (so the trusted
kernel could be used in algorithms which work with several arrays of the
same size simultaneously).''
Although TrustedKernel is an applicative rather than generative
functor, see Figs 6 and 7 in
``A Type System for Higher-Order Modules'',
Derek Dreyer, Karl Crary, Robert Harper
http://www.cs.cmu.edu/~crary/papers/2003/thoms/thoms.pdf
the difference is irrelevant if the TrustedKernel is stateless. Alain
Frisch's suggestion is implemented in
eliminating-array-bound-check-functor.ml. The present approach is
more explicit and thus better amenable to formalization.
*)