(* Generators in OCaml
We optimize the straightforward implementation of the general
theory (see generator-hansei.ml) to avoid the overhead of
Hansei. We thus _derive_ the optimal implementation of
generators based on delimited control.
*)
(*
The fundamental primitives of Hansei are |dist| to make a choice
and |reify0| to reify a probabilistic program into a lazy tree
of choices |'a pV|:
val dist : (prob * 'a) list -> 'a
type 'a vc = V of 'a (* leaf *)
| C of (unit -> 'a pV) (* unexpanded branch *)
and 'a pV = (prob * 'a vc) list
val reify0 : (unit -> 'a) -> 'a pV
The primitives |reify0| and |dist| satisfy the following laws
(We use a particular version of dist, flip 0.5, defined as
dist [(0.5,true); (0.5,false)], which chooses true or false with
equal probability):
reify0 (fun () -> V exp) === [(1.0, V exp)]
if exp is a value or an expression whose evaluation does not
invoke dist.
reify0(fun () -> V (ctx[flip 0.5])) ===
[(0.5, C(fun () -> reify0 (fun () -> V (ctx[true]))));
(0.5, C(fun () -> reify0 (fun () -> V (ctx[false]))))]
where ctx[] is the evaluation context.
Previously, in generator-hansei.ml, we defined
let yield x = if flip 0.5 then raise x
We apply the laws of |reify| and |dist| to the specific expression:
reify0 (fun () -> V(try exp with YT x -> x))
If exp raises no |YT| exception and contains no |yield| then
reify0 (fun () -> V(exp)) === [(1.0, V exp)]
If exp contains |yield (YT x)| in the evaluation context |ctx[]| then
reify0 (fun () -> V(try exp with YT x -> x))
===
reify0 (fun () -> V(try ctx[yield (YT x)] with YT x -> x))
===
reify0 (fun () -> V(try ctx[if flip 0.5 then raise (YT x)] with YT x -> x))
===
[(0.5, C (fun () -> reify0(fun () ->
V (try ctx[if true then raise (YT x)] with YT x -> x))));
(0.5, C (fun () -> reify0(fun () ->
V (try ctx[if false then raise (YT x)] with YT x -> x))))]
===
[(0.5, C (fun () -> reify0(fun () ->
V (try ctx[raise (YT x)] with YT x -> x))));
(0.5, C (fun () -> reify0(fun () ->
V(try ctx[()] with YT x -> x))))]
===
{assuming ctx does not attempt to catch the exception YT}
===
[(0.5, C (fun () -> reify0(fun () -> V x)));
(0.5, C (fun () -> reify0(fun () -> V(try ctx[()] with YT x -> x))))]
===
[(0.5, C (fun () -> [(1.0, V x)]))),
(0.5, C (fun () -> reify0(fun () -> V(try ctx[()] with YT x -> x))))]
Let us flatten the above tree into a stream, defined as
type 'a stream = Nil of 'a | Cons of 'a * (unit -> 'a stream);;
and ignore the probabilities:
let rec stream_append s1 s2 =
match s1 with
| Nil x -> Cons (x,s2)
| Cons (x,s1') -> Cons (x,fun () -> stream_append (s1' ()) s2)
;;
let rec pV_to_stream = function
| [(_,V x)] -> Nil x
| [(_,C t1);(_,C t2)] ->
stream_append (pV_to_stream (t1 ()))
(fun () -> pV_to_stream (t2 ()))
(see stream_reify in generator-hansei.ml)
The pattern-matching in pV_to_stream is inexhaustive; however,
according to our derivation, the other cases may not occur.
We obtain that
pV_to_stream (reify0 (fun () -> V(try exp with YT x -> x)))
is either |Nil exp| (if exp is a value or evaluates without yield)
or
Cons (x, fun () -> pV_to_stream (reify0(fun () ->
V(try ctx[()] with YT x -> x))))
if exp is ctx[yield x].
Let us contrast the result with the laws of delimited control
operators shift0 and reset:
reset (fun () -> exp) === exp
if exp is either a value or evaluates without invoking shift0
reset(fun () -> ctx[shift0 (fun k -> sbody)]) ===
let k v = reset (fun () -> ctx[v]) in
sbody
where the evaluation context ctx does not cross the reset
boundary.
The contrast immediately shows how to satisfy the equations
for |pV_to_stream . reify0| and |yield| using shift0/reset.
We define
let msplit th = reset (fun () -> Nil (th ()))
let yield x = shift0 (fun k -> Cons (x,k))
and indeed obtain
msplit (fun () -> exp) === Nil exp
if exp is a value or evaluates without yield
msplit (fun () -> ctx[yield x])
=== let k v = reset (fun () -> Nil (ctx[v])) in
Cons (x,k)
=== Cons (x, fun () -> msplit (fun () -> ctx[()]))
The code below is the straightforward transcription of the derived
implementation. The only difference is using multi-prompt delimited
control operators from the delimcc library. Instead of exceptions such
as YT we use prompts. The multi-prompt operators also let
us separate the type of the yielded value from the type of the overall
return value.
Although Hansei is implemented in terms of delimcc as well, the Hansei
implementation differs quite a bit in detail from the code below.
*)
open Delimcc;;
type 'a stream = Nil | Cons of 'a * (unit -> 'a stream);;
(* In Python, `yield' is a keyword. In OCaml, it is a regular OCaml function.
Furthermore, it is a user-defined function, in one line of code.
To get generators there is no need to extend a language.
*)
let yield p x = shift0 p (fun k -> Cons (x,k));;
(*
The yield above is quite like yield from generator-hansei.ml
We separated exception like (YT x) into `exception wrapper',
the prompt, and the wrapped, yielded value, x.
*)
(* Reify the generator expression into a stream *)
(* The name msplit may remind us of LogicT *)
(* The overall result of the generator must be unit.
If the generator yields the useful value, one should store
it in a reference cell, and retrieve after msplit.
One may say that we should have Nil carry the return value,
and let it be of a different type. That will work. Alas,
we cannot use prompts bound to top-level (or module-scope)
variables, because of the value restriction. That is inconvenient.
*)
let msplit p (thunk : unit -> unit) =
push_prompt p (fun () -> thunk (); Nil);;
(* Re-export the new_prompt from Delimcc, with a more restricted type *)
let new_prompt = Delimcc.new_prompt;;
(* A few useful functions on a stream, analogous to those from LogicT *)
(* take at most n elements from a stream, returning another stream *)
let stream_take n s =
let rec loop n s = if n <= 0 then Nil
else match s () with
| Nil -> Nil
| Cons (x,t) -> Cons (x, fun () -> loop (pred n) t)
in loop n (fun () -> s)
;;
(* val stream_take : int -> 'a stream -> 'a stream = *)
(* Map a function to a stream and collect the result in a list *)
let stream_mapl f =
let rec loop acc = function
| Nil -> List.rev acc
| Cons (x,t) -> loop (f x::acc) (t ())
in loop []
;;
(* A simplified version in which the mapping function returns unit,
and no accumulation is needed.
*)
let rec stream_iter (f : 'a -> unit) = function
| Nil -> ()
| Cons (x,t) -> f x; stream_iter f (t ())
;;
(* This is analogous to the for-loop *)
let for_loop p gen f = stream_iter f (msplit p gen);;
(* we can easily define the `while' loop, by introducing stream_takewhile
and composing it with msplit.
We may limit the number of elements to process, composing msplit with
stream_take.
Many loop iterators could be defined; loops become definable!
*)
(* Defining Icon's find:
[find s1 s2] generates the positions at which s1 occurs in s2.
We use 0-based indexing (unlike Icon's 1-based)
*)
(* Auxiliary function: check if s1 occurs in s2 at position n *)
let string_eq_at n s1 s2 =
n + String.length s1 <= String.length s2 &&
let rec loop i =
i >= String.length s1 || (s1.[i] = s2.[i+n] && loop (succ i))
in loop 0
;;
(*
let true = string_eq_at 0 "abc" "abc";;
let false = string_eq_at 0 "abc" "ab";;
let true = string_eq_at 0 "ab" "abc";;
let true = string_eq_at 0 "" "abc";;
let true = string_eq_at 3 "" "abc";;
let false = string_eq_at 4 "" "abc";;
let false = string_eq_at 2 "bc" "abc";;
let true = string_eq_at 1 "bc" "abc";;
*)
let pfind = new_prompt ();;
let find s1 s2 =
let rec loop i =
if i + String.length s1 <= String.length s2 then
begin
if string_eq_at i s1 s2 then yield pfind i;
loop (succ i)
end
in loop 0;;