(**
Eliminating Array bound checking with non-dependent types
An attempt to give as literal as possible translation of
Hongwei Xi's code (Xi and Pfenning, PLDI98) into OCaml.
Please see ../Haskell/eliminating-array-bound-check-literally.hs
for the corresponding Haskell code.
Please see eliminating-array-bound-check-functor.ml for
a functor-based approach. It is simpler, and sufficient if the security
kernel is stateless.
The present translation has a lot of common with ../Haskell/KMP-deptype.hs
(the translation of another Hongwei Xi's code, from the same paper).
The file ../Haskell/eliminating-array-bound-check.lhs gives a slightly
less literal (but perhaps more elegant) translation.
$Id: eliminating-array-bound-check-literally.ml,v 1.2 2006/09/06 04:42:15 oleg Exp oleg $
*)
(*
Our example is `bsearch', taken from the famous paper "Eliminating
Array Bound Checking Through Dependent Types" by Hongwei Xi and Frank
Pfenning (PLDI'98). Hongwei Xi's code was written in SML extended
with a restricted form of dependent types. Here is the original code
of the example (taken from Figure 3 of that paper, see also
http://www-2.cs.cmu.edu/~hwxi/DML/examples/)
datatype 'a answer = NONE | SOME of int * 'a
assert sub <| {n:nat, i:nat | i < n } 'a array(n) * int(i) -> 'a
assert length <| {n:nat} 'a array(n) -> int(n)
fun('a){size:nat}
bsearch cmp (key, arr) =
let
fun look(lo, hi) =
if hi >= lo then
let
val m = (hi + lo) div 2
val x = sub(arr, m)
in
case cmp(key, x) of
LESS => look(lo, m-1)
| EQUAL => (SOME(m, x))
| GREATER => look(m+1, hi)
end
else NONE
where look <|
{l:nat, h:int | 0 <= l <= size /\ 0 <= h+1 <= size } int(l) * int(h)
-> 'a answer
in
look (0, length arr - 1)
end
where bsearch <| ('a * 'a -> order) -> 'a * 'a array(size) -> 'a answer
The text after `<|' are dependent type annotations. They _must_ be
specified by the programmer -- even for internal functions such as `look'.
*)
(** ----------------------------------------------------------------------
The trusted kernel
The code relies on a compact general-purpose trusted kernel below.
The phantom type [s] is a type proxy for the pair of numbers
(low,high) -- the boundaries of array indices (low >= 0, high <
max_int/2). We don't know what array indices are until the run time.
Yet we do know (due to the way barray, etc, types are constructed)
that the same [s] means the the same interval of indices. The
safety depends on the trusted way of creating branded types: types
bindex, barray etc. are abstract and their implementation is known to
the trusted kernel only. The uniqueness of [s] afforded by the
explicit universal quantification prevents mixing up of different
brands.
The value of the type ['s bindex] denotes an index value that
is assuredly { low <= i <= high } where low and high are the bounds
represented by the type parameter [s].
The value of the type ['s bindexL] denotes an index value that
is assuredly { low <= i } where low is the lower bound
represented by the type parameter [s].
The value of the type ['s bindexH] denotes an index value that
is assuredly { i <= high } where high is the upper bound
represented by the type parameter [s].
We must re-iterate that safety depends on assurances of the code that
constructs bindex values. Because of the high assurance, we must
formulate the safety properties as propositions, and prove
them. Fortunately, the code below is compact and straightforward, as
well as general purpose.
The function [brand] is the introduction rule for barray, which determines
the run-time bounds of an array and passes the branded array and
the low and upper bounds to its continuation.
The function brand has a higher-rank type. It is the
quantification of [s] as well as the abstractness of barray (in the
kernel interface) guarantee that the same brand entails the same bounds.
The function [index_cmp] does index comparison, comparing
bindexL with bindexH. If the first is less than or equal to
the second, invoke the onle continuation, passing the indices
converted to bindex.
Because a branded index is assuredly within the bounds of the array of
the same brand, we can _safely_ use Array.unsafe_get
*)
module TrustedKernel :
sig
type ('s,'a) barray
type 's bindex
val unbi : 's bindex -> int
type 's bindexL
type 's bindexH
(* For historical reasons, higher-rank types in OCaml are
limited to records.
*)
type ('w,'a) brand_k =
{bk : 's . ('s,'a) barray * 's bindexL * 's bindexH -> 'w}
val brand : 'a array -> ('w,'a) brand_k -> 'w
(* the type of [bmiddle] assures that all indices
involved have the same brand -- that is, the same lower and upper
boundaries.
*)
val bmiddle : 's bindex -> 's bindex -> 's bindex
val index_cmp : 's bindexL -> 's bindexH
-> (unit -> 'w) (* onelse continuation *)
-> ('s bindex -> 's bindex -> 'w) (* onle cont *)
-> 'w
(* Index arithmetics *)
val bsucc : 's bindex -> 's bindexL
val bpred : 's bindex -> 's bindexH
val bget : ('s,'a) barray -> 's bindex -> 'a
end =
struct
(* In reality, all these branded types are merely type aliases.
They should have no run-time overhead
*)
type ('s,'a) barray = 'a array
type 's bindex = int
type 's bindexL = int
type 's bindexH = int
type ('w,'a) brand_k =
{bk : 's . ('s,'a) barray * 's bindexL * 's bindexH -> 'w}
let unbi x = x
(*
Proposition: For the argument (a,l,h) of the continuation k,
l >= low bound of the array a (which is 0 in OCaml),
h <= upper bound of the array a.
Proof: from the semantics of the function `length' and the semantics
of OCaml arrays (taken here as an axiom)
and the code itself.
*)
let brand a k = let len = Array.length a in
let _ = assert (len < max_int/2) in
k.bk (a, 0, (len-1))
(* Proposition: l <= i1 <= h, l <= i2 <= h |- l <= (i1 + i2) `div` 2 <= h
Proof: plain arithmetics, the fact h < max_int/2 guarantees the
freedom from overflow.
*)
let bmiddle i1 i2 = (i1 + i2)/2
(* Safety proposition:
{i:int | low <= i}, {j:int | j <= high}, i <= j
===> {i,j:int | low <= i,j <= high}. See the definition of bindex then.
*)
let index_cmp i j onother onle = if i <= j then onle i j else onother ()
(* Safety proposition:
{i:int | low <= i <= high}
===> {i:int | low <= i+1} . See the definition of bindexL then.
*)
let bsucc = succ
(* Safety proposition:
{i:int | low <= i <= high}
===> {i:int | i-1 <= high} . See the definition of bindexH then.
*)
let bpred = pred
let bget = Array.unsafe_get
end
;;
(* Do a simple test for eigen-variables, to assure us that the eigen-variable
's is indeed unique and unforgeable. Two different branding of the same
array must indeed be different
*)
open TrustedKernel;;
(* We get the expected type error. The generativity is working!
let test_eigen =
let arr = [|1|] in
brand arr {bk = (fun barr1 ->
brand arr {bk = (fun barr2 ->
barr1 == barr2
)})}
;;
*)
(* ---------------------------------------------------------------------- *)
(* Finally, our bsearch function
The code below is just as algorithmically efficient as the Dependent SML
code: one middle index computation, one element comparison, one index
comparison, one index increment or decrement per iteration. There are
no type annotations as none are needed. The function [bget] is a statically
safe array indexing operator. The type system and the trust properties
of the kernel below guarantee that in the expression [bget arr m] the
index [m] is positively in range of the array [arr] bounds.
*)
let bsearch' cmp (key,(arr,lo,hi)) =
let rec look lo hi = index_cmp lo hi (fun () -> None)
(fun lo' hi' ->
let m = bmiddle lo' hi' in
let x = bget arr m in
let cmp_r = cmp (key,x) in
if cmp_r < 0 then look lo (bpred m)
else if cmp_r = 0 then Some (unbi m, x)
else look (bsucc m) hi)
in
look lo hi
let bsearch cmp (key, arr) =
brand arr {bk = fun arrb -> bsearch' cmp (key, arrb)}
;;
(* Tests *)
let barr1 = [|'a';'b';'c';'d';'e';'f';'g';'h'|]
let uncurry f = fun (x,y) -> f x y
let btest1 = bsearch (uncurry compare) ('c',barr1)
let btest2 = bsearch (uncurry compare) ('a',barr1)
let btest3 = bsearch (uncurry compare) ('h',barr1)
let btest4 = bsearch (uncurry compare) ('x',barr1)
let btest5 = bsearch (uncurry compare) ('x',[||])
;;
(* ---------------------------------------------------------------------- *)
(* Alain Frisch, in his comment on the caml-list on Sep 4, 2006 pointed out
the alternative to higher-rank record types: functors over
anonymous structures and functors over value signatures:
module GenT(X:sig end) : sig type t val v : t end =
struct type t = int let v = 1 end
;;
let module M1 = GenT(struct end) in
let module M2 = GenT(struct end) in
M1.v = M2.v (* Error, as expected! *)
;;
This approach crucially relies on local modules (the `let module' form
that can be used not necessarily at the top level), available in
OCaml.
The structure really has to be anonymous to achieve genericity
let module N = struct let x = 1 end in
let module M1 = GenT(N) in
let module M2 = GenT(N) in
M1.v = M2.v (* But this passes *)
;;
But this is OK: we can put this structure into the trusted kernel...
Furthermore, we can parameterize the functor by the length of the array,
and thus obtain generativity in this way:
module GT(X:sig val x : int end) : sig type t val v : t end =
struct type t = int let v = 1 end
;;
let module M1 = GT(struct let x = 1 end) in
let module M2 = GT(struct let x = 1 end) in
M1.v = M2.v (* problem *)
;;
let module N = struct let x = 1 end in
let module M1 = GT(N) in
let module M2 = GT(N) in
M1.v = M2.v (* no problem *)
;;
Alain Frisch wrote:
``As a work-around, you can take only the length of the array as an argument:
module TrustedKernel(L: sig val len: int end) : sig
type 'a barray
type bindex
...
end = struct
let brand a =
assert(Array.length a = L.len);
(a,0,L.len - 1)
end
On the one hand, this adds one run-time check, but on the other hand it
makes the abstract index types depend only on the length (so the trusted
kernel could be used in algorithms which work with several arrays of the
same size simultaneously).''
Although TrustedKernel is an applicative rather than generative
functor, see Figs 6 and 7 in
``A Type System for Higher-Order Modules'',
Derek Dreyer, Karl Crary, Robert Harper
http://www.cs.cmu.edu/~crary/papers/2003/thoms/thoms.pdf
the difference is irrelevant if the TrustedKernel is stateless. Alain
Frisch's suggestion is implemented in
eliminating-array-bound-check-functor.ml. The present approach is
more explicit and thus better amenable to formalization.
*)