(* Generators in OCaml We optimize the straightforward implementation of the general theory (see generator-hansei.ml) to avoid the overhead of Hansei. We thus _derive_ the optimal implementation of generators based on delimited control. *) (* The fundamental primitives of Hansei are |dist| to make a choice and |reify0| to reify a probabilistic program into a lazy tree of choices |'a pV|: val dist : (prob * 'a) list -> 'a type 'a vc = V of 'a (* leaf *) | C of (unit -> 'a pV) (* unexpanded branch *) and 'a pV = (prob * 'a vc) list val reify0 : (unit -> 'a) -> 'a pV The primitives |reify0| and |dist| satisfy the following laws (We use a particular version of dist, flip 0.5, defined as dist [(0.5,true); (0.5,false)], which chooses true or false with equal probability): reify0 (fun () -> V exp) === [(1.0, V exp)] if exp is a value or an expression whose evaluation does not invoke dist. reify0(fun () -> V (ctx[flip 0.5])) === [(0.5, C(fun () -> reify0 (fun () -> V (ctx[true])))); (0.5, C(fun () -> reify0 (fun () -> V (ctx[false]))))] where ctx[] is the evaluation context. Previously, in generator-hansei.ml, we defined let yield x = if flip 0.5 then raise x We apply the laws of |reify| and |dist| to the specific expression: reify0 (fun () -> V(try exp with YT x -> x)) If exp raises no |YT| exception and contains no |yield| then reify0 (fun () -> V(exp)) === [(1.0, V exp)] If exp contains |yield (YT x)| in the evaluation context |ctx[]| then reify0 (fun () -> V(try exp with YT x -> x)) === reify0 (fun () -> V(try ctx[yield (YT x)] with YT x -> x)) === reify0 (fun () -> V(try ctx[if flip 0.5 then raise (YT x)] with YT x -> x)) === [(0.5, C (fun () -> reify0(fun () -> V (try ctx[if true then raise (YT x)] with YT x -> x)))); (0.5, C (fun () -> reify0(fun () -> V (try ctx[if false then raise (YT x)] with YT x -> x))))] === [(0.5, C (fun () -> reify0(fun () -> V (try ctx[raise (YT x)] with YT x -> x)))); (0.5, C (fun () -> reify0(fun () -> V(try ctx[()] with YT x -> x))))] === {assuming ctx does not attempt to catch the exception YT} === [(0.5, C (fun () -> reify0(fun () -> V x))); (0.5, C (fun () -> reify0(fun () -> V(try ctx[()] with YT x -> x))))] === [(0.5, C (fun () -> [(1.0, V x)]))), (0.5, C (fun () -> reify0(fun () -> V(try ctx[()] with YT x -> x))))] Let us flatten the above tree into a stream, defined as type 'a stream = Nil of 'a | Cons of 'a * (unit -> 'a stream);; and ignore the probabilities: let rec stream_append s1 s2 = match s1 with | Nil x -> Cons (x,s2) | Cons (x,s1') -> Cons (x,fun () -> stream_append (s1' ()) s2) ;; let rec pV_to_stream = function | [(_,V x)] -> Nil x | [(_,C t1);(_,C t2)] -> stream_append (pV_to_stream (t1 ())) (fun () -> pV_to_stream (t2 ())) (see stream_reify in generator-hansei.ml) The pattern-matching in pV_to_stream is inexhaustive; however, according to our derivation, the other cases may not occur. We obtain that pV_to_stream (reify0 (fun () -> V(try exp with YT x -> x))) is either |Nil exp| (if exp is a value or evaluates without yield) or Cons (x, fun () -> pV_to_stream (reify0(fun () -> V(try ctx[()] with YT x -> x)))) if exp is ctx[yield x]. Let us contrast the result with the laws of delimited control operators shift0 and reset: reset (fun () -> exp) === exp if exp is either a value or evaluates without invoking shift0 reset(fun () -> ctx[shift0 (fun k -> sbody)]) === let k v = reset (fun () -> ctx[v]) in sbody where the evaluation context ctx does not cross the reset boundary. The contrast immediately shows how to satisfy the equations for |pV_to_stream . reify0| and |yield| using shift0/reset. We define let msplit th = reset (fun () -> Nil (th ())) let yield x = shift0 (fun k -> Cons (x,k)) and indeed obtain msplit (fun () -> exp) === Nil exp if exp is a value or evaluates without yield msplit (fun () -> ctx[yield x]) === let k v = reset (fun () -> Nil (ctx[v])) in Cons (x,k) === Cons (x, fun () -> msplit (fun () -> ctx[()])) The code below is the straightforward transcription of the derived implementation. The only difference is using multi-prompt delimited control operators from the delimcc library. Instead of exceptions such as YT we use prompts. The multi-prompt operators also let us separate the type of the yielded value from the type of the overall return value. Although Hansei is implemented in terms of delimcc as well, the Hansei implementation differs quite a bit in detail from the code below. *) open Delimcc;; type 'a stream = Nil | Cons of 'a * (unit -> 'a stream);; (* In Python, `yield' is a keyword. In OCaml, it is a regular OCaml function. Furthermore, it is a user-defined function, in one line of code. To get generators there is no need to extend a language. *) let yield p x = shift0 p (fun k -> Cons (x,k));; (* The yield above is quite like yield from generator-hansei.ml We separated exception like (YT x) into `exception wrapper', the prompt, and the wrapped, yielded value, x. *) (* Reify the generator expression into a stream *) (* The name msplit may remind us of LogicT *) (* The overall result of the generator must be unit. If the generator yields the useful value, one should store it in a reference cell, and retrieve after msplit. One may say that we should have Nil carry the return value, and let it be of a different type. That will work. Alas, we cannot use prompts bound to top-level (or module-scope) variables, because of the value restriction. That is inconvenient. *) let msplit p (thunk : unit -> unit) = push_prompt p (fun () -> thunk (); Nil);; (* Re-export the new_prompt from Delimcc, with a more restricted type *) let new_prompt = Delimcc.new_prompt;; (* A few useful functions on a stream, analogous to those from LogicT *) (* take at most n elements from a stream, returning another stream *) let stream_take n s = let rec loop n s = if n <= 0 then Nil else match s () with | Nil -> Nil | Cons (x,t) -> Cons (x, fun () -> loop (pred n) t) in loop n (fun () -> s) ;; (* val stream_take : int -> 'a stream -> 'a stream = *) (* Map a function to a stream and collect the result in a list *) let stream_mapl f = let rec loop acc = function | Nil -> List.rev acc | Cons (x,t) -> loop (f x::acc) (t ()) in loop [] ;; (* A simplified version in which the mapping function returns unit, and no accumulation is needed. *) let rec stream_iter (f : 'a -> unit) = function | Nil -> () | Cons (x,t) -> f x; stream_iter f (t ()) ;; (* This is analogous to the for-loop *) let for_loop p gen f = stream_iter f (msplit p gen);; (* we can easily define the `while' loop, by introducing stream_takewhile and composing it with msplit. We may limit the number of elements to process, composing msplit with stream_take. Many loop iterators could be defined; loops become definable! *) (* Defining Icon's find: [find s1 s2] generates the positions at which s1 occurs in s2. We use 0-based indexing (unlike Icon's 1-based) *) (* Auxiliary function: check if s1 occurs in s2 at position n *) let string_eq_at n s1 s2 = n + String.length s1 <= String.length s2 && let rec loop i = i >= String.length s1 || (s1.[i] = s2.[i+n] && loop (succ i)) in loop 0 ;; (* let true = string_eq_at 0 "abc" "abc";; let false = string_eq_at 0 "abc" "ab";; let true = string_eq_at 0 "ab" "abc";; let true = string_eq_at 0 "" "abc";; let true = string_eq_at 3 "" "abc";; let false = string_eq_at 4 "" "abc";; let false = string_eq_at 2 "bc" "abc";; let true = string_eq_at 1 "bc" "abc";; *) let pfind = new_prompt ();; let find s1 s2 = let rec loop i = if i + String.length s1 <= String.length s2 then begin if string_eq_at i s1 s2 then yield pfind i; loop (succ i) end in loop 0;;