(** Eliminating Array bound checking with non-dependent types An attempt to give as literal as possible translation of Hongwei Xi's code (Xi and Pfenning, PLDI98) into OCaml. Please see ../Haskell/eliminating-array-bound-check-literally.hs for the corresponding Haskell code. Please see eliminating-array-bound-check-functor.ml for a functor-based approach. It is simpler, and sufficient if the security kernel is stateless. The present translation has a lot of common with ../Haskell/KMP-deptype.hs (the translation of another Hongwei Xi's code, from the same paper). The file ../Haskell/eliminating-array-bound-check.lhs gives a slightly less literal (but perhaps more elegant) translation. $Id: eliminating-array-bound-check-literally.ml,v 1.2 2006/09/06 04:42:15 oleg Exp oleg $ *) (* Our example is `bsearch', taken from the famous paper "Eliminating Array Bound Checking Through Dependent Types" by Hongwei Xi and Frank Pfenning (PLDI'98). Hongwei Xi's code was written in SML extended with a restricted form of dependent types. Here is the original code of the example (taken from Figure 3 of that paper, see also http://www-2.cs.cmu.edu/~hwxi/DML/examples/) datatype 'a answer = NONE | SOME of int * 'a assert sub <| {n:nat, i:nat | i < n } 'a array(n) * int(i) -> 'a assert length <| {n:nat} 'a array(n) -> int(n) fun('a){size:nat} bsearch cmp (key, arr) = let fun look(lo, hi) = if hi >= lo then let val m = (hi + lo) div 2 val x = sub(arr, m) in case cmp(key, x) of LESS => look(lo, m-1) | EQUAL => (SOME(m, x)) | GREATER => look(m+1, hi) end else NONE where look <| {l:nat, h:int | 0 <= l <= size /\ 0 <= h+1 <= size } int(l) * int(h) -> 'a answer in look (0, length arr - 1) end where bsearch <| ('a * 'a -> order) -> 'a * 'a array(size) -> 'a answer The text after `<|' are dependent type annotations. They _must_ be specified by the programmer -- even for internal functions such as `look'. *) (** ---------------------------------------------------------------------- The trusted kernel The code relies on a compact general-purpose trusted kernel below. The phantom type [s] is a type proxy for the pair of numbers (low,high) -- the boundaries of array indices (low >= 0, high < max_int/2). We don't know what array indices are until the run time. Yet we do know (due to the way barray, etc, types are constructed) that the same [s] means the the same interval of indices. The safety depends on the trusted way of creating branded types: types bindex, barray etc. are abstract and their implementation is known to the trusted kernel only. The uniqueness of [s] afforded by the explicit universal quantification prevents mixing up of different brands. The value of the type ['s bindex] denotes an index value that is assuredly { low <= i <= high } where low and high are the bounds represented by the type parameter [s]. The value of the type ['s bindexL] denotes an index value that is assuredly { low <= i } where low is the lower bound represented by the type parameter [s]. The value of the type ['s bindexH] denotes an index value that is assuredly { i <= high } where high is the upper bound represented by the type parameter [s]. We must re-iterate that safety depends on assurances of the code that constructs bindex values. Because of the high assurance, we must formulate the safety properties as propositions, and prove them. Fortunately, the code below is compact and straightforward, as well as general purpose. The function [brand] is the introduction rule for barray, which determines the run-time bounds of an array and passes the branded array and the low and upper bounds to its continuation. The function brand has a higher-rank type. It is the quantification of [s] as well as the abstractness of barray (in the kernel interface) guarantee that the same brand entails the same bounds. The function [index_cmp] does index comparison, comparing bindexL with bindexH. If the first is less than or equal to the second, invoke the onle continuation, passing the indices converted to bindex. Because a branded index is assuredly within the bounds of the array of the same brand, we can _safely_ use Array.unsafe_get *) module TrustedKernel : sig type ('s,'a) barray type 's bindex val unbi : 's bindex -> int type 's bindexL type 's bindexH (* For historical reasons, higher-rank types in OCaml are limited to records. *) type ('w,'a) brand_k = {bk : 's . ('s,'a) barray * 's bindexL * 's bindexH -> 'w} val brand : 'a array -> ('w,'a) brand_k -> 'w (* the type of [bmiddle] assures that all indices involved have the same brand -- that is, the same lower and upper boundaries. *) val bmiddle : 's bindex -> 's bindex -> 's bindex val index_cmp : 's bindexL -> 's bindexH -> (unit -> 'w) (* onelse continuation *) -> ('s bindex -> 's bindex -> 'w) (* onle cont *) -> 'w (* Index arithmetics *) val bsucc : 's bindex -> 's bindexL val bpred : 's bindex -> 's bindexH val bget : ('s,'a) barray -> 's bindex -> 'a end = struct (* In reality, all these branded types are merely type aliases. They should have no run-time overhead *) type ('s,'a) barray = 'a array type 's bindex = int type 's bindexL = int type 's bindexH = int type ('w,'a) brand_k = {bk : 's . ('s,'a) barray * 's bindexL * 's bindexH -> 'w} let unbi x = x (* Proposition: For the argument (a,l,h) of the continuation k, l >= low bound of the array a (which is 0 in OCaml), h <= upper bound of the array a. Proof: from the semantics of the function `length' and the semantics of OCaml arrays (taken here as an axiom) and the code itself. *) let brand a k = let len = Array.length a in let _ = assert (len < max_int/2) in k.bk (a, 0, (len-1)) (* Proposition: l <= i1 <= h, l <= i2 <= h |- l <= (i1 + i2) `div` 2 <= h Proof: plain arithmetics, the fact h < max_int/2 guarantees the freedom from overflow. *) let bmiddle i1 i2 = (i1 + i2)/2 (* Safety proposition: {i:int | low <= i}, {j:int | j <= high}, i <= j ===> {i,j:int | low <= i,j <= high}. See the definition of bindex then. *) let index_cmp i j onother onle = if i <= j then onle i j else onother () (* Safety proposition: {i:int | low <= i <= high} ===> {i:int | low <= i+1} . See the definition of bindexL then. *) let bsucc = succ (* Safety proposition: {i:int | low <= i <= high} ===> {i:int | i-1 <= high} . See the definition of bindexH then. *) let bpred = pred let bget = Array.unsafe_get end ;; (* Do a simple test for eigen-variables, to assure us that the eigen-variable 's is indeed unique and unforgeable. Two different branding of the same array must indeed be different *) open TrustedKernel;; (* We get the expected type error. The generativity is working! let test_eigen = let arr = [|1|] in brand arr {bk = (fun barr1 -> brand arr {bk = (fun barr2 -> barr1 == barr2 )})} ;; *) (* ---------------------------------------------------------------------- *) (* Finally, our bsearch function The code below is just as algorithmically efficient as the Dependent SML code: one middle index computation, one element comparison, one index comparison, one index increment or decrement per iteration. There are no type annotations as none are needed. The function [bget] is a statically safe array indexing operator. The type system and the trust properties of the kernel below guarantee that in the expression [bget arr m] the index [m] is positively in range of the array [arr] bounds. *) let bsearch' cmp (key,(arr,lo,hi)) = let rec look lo hi = index_cmp lo hi (fun () -> None) (fun lo' hi' -> let m = bmiddle lo' hi' in let x = bget arr m in let cmp_r = cmp (key,x) in if cmp_r < 0 then look lo (bpred m) else if cmp_r = 0 then Some (unbi m, x) else look (bsucc m) hi) in look lo hi let bsearch cmp (key, arr) = brand arr {bk = fun arrb -> bsearch' cmp (key, arrb)} ;; (* Tests *) let barr1 = [|'a';'b';'c';'d';'e';'f';'g';'h'|] let uncurry f = fun (x,y) -> f x y let btest1 = bsearch (uncurry compare) ('c',barr1) let btest2 = bsearch (uncurry compare) ('a',barr1) let btest3 = bsearch (uncurry compare) ('h',barr1) let btest4 = bsearch (uncurry compare) ('x',barr1) let btest5 = bsearch (uncurry compare) ('x',[||]) ;; (* ---------------------------------------------------------------------- *) (* Alain Frisch, in his comment on the caml-list on Sep 4, 2006 pointed out the alternative to higher-rank record types: functors over anonymous structures and functors over value signatures: module GenT(X:sig end) : sig type t val v : t end = struct type t = int let v = 1 end ;; let module M1 = GenT(struct end) in let module M2 = GenT(struct end) in M1.v = M2.v (* Error, as expected! *) ;; This approach crucially relies on local modules (the `let module' form that can be used not necessarily at the top level), available in OCaml. The structure really has to be anonymous to achieve genericity let module N = struct let x = 1 end in let module M1 = GenT(N) in let module M2 = GenT(N) in M1.v = M2.v (* But this passes *) ;; But this is OK: we can put this structure into the trusted kernel... Furthermore, we can parameterize the functor by the length of the array, and thus obtain generativity in this way: module GT(X:sig val x : int end) : sig type t val v : t end = struct type t = int let v = 1 end ;; let module M1 = GT(struct let x = 1 end) in let module M2 = GT(struct let x = 1 end) in M1.v = M2.v (* problem *) ;; let module N = struct let x = 1 end in let module M1 = GT(N) in let module M2 = GT(N) in M1.v = M2.v (* no problem *) ;; Alain Frisch wrote: ``As a work-around, you can take only the length of the array as an argument: module TrustedKernel(L: sig val len: int end) : sig type 'a barray type bindex ... end = struct let brand a = assert(Array.length a = L.len); (a,0,L.len - 1) end On the one hand, this adds one run-time check, but on the other hand it makes the abstract index types depend only on the length (so the trusted kernel could be used in algorithms which work with several arrays of the same size simultaneously).'' Although TrustedKernel is an applicative rather than generative functor, see Figs 6 and 7 in ``A Type System for Higher-Order Modules'', Derek Dreyer, Karl Crary, Robert Harper http://www.cs.cmu.edu/~crary/papers/2003/thoms/thoms.pdf the difference is irrelevant if the TrustedKernel is stateless. Alain Frisch's suggestion is implemented in eliminating-array-bound-check-functor.ml. The present approach is more explicit and thus better amenable to formalization. *)