(* Hongwei Xi's Dependent ML example: Knuth-Morris-Pratt string matching
with statically assured safe array access
*)
(*
The following Dependent ML code is downloaded from
\url{http://www-2.cs.cmu.edu/~hwxi/DML/examples/kmpHard.mini}
KMP is an imperative algorithm, which also uses indices in a creative way:
Index -1 (which is deliberately out-of-bounds) is used as a special value.
The code below is actually Dependent ML. The stuff after <| are DML
declarations (of dependent types).
The function 'sub' is a DML array access operation with no bound check.
The function 'length' is a dependently-typed built-in function to obtain
the run-time value of the length of a string or any other array.
The functions arrayShift, subShift and updateShift are creator,
accessor and setter for the shiftArray, whose element type is dependent
on the run-time value (the length of the pattern string).
The kmpHard.mini file downloaded from the above URL had three small
issues in the algorithm, which are corrected below. Incidentally, even
in the DML code the dependent types did not -- and were not intended
to -- assure the total correctness. Only the safety of array access
was at stake. Thus, writing test code is still a good idea; we have to
write less test code now.
Our reimplementation of the code with the lightweight static
gurantees forced us to deal with the edge cases, of the string or the
pattern being empty (or both). It is not clear what the DML function
kmpMatch below will do if invoked with an empty pattern. Probably some
obscure compiler error because the type intShift becomes unpopulated.
It seems that we have to write type annotations even of local functions.
Furthermore, the DML type annotation [<| unit] in loopShift is worrisome:
if we need to write such a trivial annotation sometimes, the type checking
algorithm doesn't seem robust.
*)
(*
* This is an implementation of the Knuth-Morris-Pratt string matching
* algorithm in standard ML. Also this demonstrates some interesting questions
* involved with array bounds checks.
*)
structure KMP =
struct
assert sub <| {size:nat, i:int | 0 <= i < size}
'a array(size) * int(i) -> 'a
and length <| {n:nat} 'a array(n) -> int(n)
fun{slen:nat, plen:nat}
kmpMatch(str, pat) =
let
type intShift = [i:int| 0 <= i+1 < plen ] int(i)
assert arrayShift <| {size:nat}
int(size) * intShift -> intShift array(size)
and subShift <| {size:nat, i:int | 0 <= i < size}
intShift array(size) * int(i) -> intShift
and updateShift <| {size:nat, i:int | 0 <= i < size}
intShift array(size) * int(i) * intShift -> unit
val slen = length(str)
and plen = length(pat)
val shiftArray = arrayShift(plen, ~1)
fun loopShift(i, j) = (* calculate the shift array *)
if (j = plen) then ()
else
if sub(pat, j) <> sub(pat, i+1) then
if (i >= 0) then
loopShift(subShift(shiftArray, i), j)
else loopShift(~1, j+1)
else ((if (i+1 < j)
then updateShift(shiftArray, j, i+1)
else ()) <| unit;
loopShift(subShift(shiftArray, j), j+1))
where loopShift <| {j:int | 0 < j <= plen}
intShift * int(j) -> unit
val _ = loopShift(~1, 1)
fun loop(s, p) = (* this the main search function *)
if p < plen then
if s < slen then
if sub(str, s) = sub(pat, p) then loop(s+1, p+1)
else
if (p = 0) then loop(s+1, p)
else loop(s, subShift(shiftArray, p-1)+1)
else ~1
else s - plen
where loop <| {s:nat, p:nat | s <= slen /\ p <= plen}
int(s) * int(p) -> int
in
loop(0, 0)
end
where kmpMatch <| int array(slen) * int array(plen) -> int
end