(* Lifted map-reduce, loops over individuals and (all) subsets of individuals *)
(* This is a different approach, more in the spirit of HANSEI.
It partitions the work between reify and map_reduce_sw differently.
The end results are identical to those achieved in lifted_nested_mp.ml
and lifted_tuples.ml.
The present code extends lifted.ml to handle loops over sets of
individuals. We add more constraints to our language: membership
and cardinality constraints. The first part of this file is virtually
identical to that of lifted.ml (some data types are more general though).
Iterating over all subsets of a the domain is implemented near the end
of this file.
Although this code is not described in the paper, it should be easy
to understand from the description in the paper. After all, it solves
the same problem.
The code to lift is a regular OCaml code, which may use loop variables
using a restricted set of operations. At present, there are only two
operation on loop variables: the equality comparison equ and
the inequality lte (which is used for subset inclusion rather for
numeric inequalities).
Future work: express the queries like the following:
what is the product, over every individual x, of the number of subsets
of the domain that contain x?
and of queries that express intersection (for example, count the number of
subsets of individuals with a non-empty intersection).
*)
open Delimcc;;
open Big_int;;
(*
#load "nums.cma";;
*)
(* The `Logic' (or loop) variable of type 'i *)
(* We use the value of the type `generation' to impose the total order on
logic variables. The variable of the greatest generation has been
created by the most recent reify. Of course that holds only
if no loop variable escapes from the scope of the reify that created
it. Well, if it does, the corresponding prompt will no longer
be set so that any attempt to use such a variable will lead to
failure.
*)
type generation = int
let new_generation =
let cnt = ref 0 in
fun () ->
incr cnt; !cnt
;;
(* The variable and the interface to the reifier,
which `runs' the lifted computation.
The request is delivered to the closest reify and always concerns
its own variable. Therefore, we don't need to mention that
variable in the request.
We also define available constrain operations
*)
type indiv = string (* I'll generalize later *)
;;
type 'i var = Val of 'i | Var of generation * 'i req prompt
and ('i,'r) ctx =
(* Equality of the loop variable with some other variable of value *)
| C_eq of 'i var * (bool -> 'r)
(* The loop variable contains the individual *)
| C_in of indiv * (bool -> 'r) (* This should be replaced with 'i ivar
to answer intersection-like queries *)
| C_card of (int -> 'r)
and 'i req = Req of ('i,'i req) ctx
(* The result of reification: the lazy tree of `cases' terminated with
the default case. Compare with the result of HANSEI reification, the
lazy tree of weighted choices.
It is a binary tree: the first branch corresponds to the case when
the constraint 'i ctx holds; the second branch is the result when
the constraint does not hold.
*)
and ('i,'r) switch = Default of 'r
| Case of ('i, ('i,'r) switch) ctx
;;
(* Polymorphic equality over loop variables *)
(*--start-equ--*)
let equ : 'i var -> 'i var -> bool = fun x y ->
match (x,y) with
| (Var (gx,px), Var (gy,py)) ->
if gx > gy then
shift px (fun k -> Req (C_eq (y,k)))
else if gx < gy then
shift py (fun k -> Req (C_eq (x,k)))
else true
| (Val x, Val y) -> x = y
| (Var (_,px), y) | (y, Var (_,px)) ->
shift px (fun k -> Req (C_eq (y,k)))
(*--end-equ--*)
;;
let rec member x = function
| [] -> false
| (h::t) -> equ x h || member x t
;;
(* The reifier *)
(* Unlike the reifier in the paper, it does not try to solve the
constraints. It merely reifies the loop body to a tree.
It is quite like reify0 in HANSEI: it is very dumb.
All smarts are in map_reduce, in the function that uses the
reified data structure.
*)
let reify : ('i var -> 'r) -> ('i,'r) switch = fun f ->
let var_prompt = new_prompt () in
let var = Var (new_generation (), var_prompt) in
let presult = new_prompt () in
let k_switch loop k r = push_prompt presult (fun () -> loop (k r)) in
let rec loop = function
| Req C_eq (v,k) ->
abort presult (Case (C_eq (v, k_switch loop k)))
| Req C_in (v,k) ->
abort presult (Case (C_in (v, k_switch loop k)))
| Req C_card (k) ->
abort presult (Case (C_card (k_switch loop k)))
in
push_prompt presult (fun () ->
loop (push_prompt var_prompt (fun () ->
abort presult (Default (f var)))))
;;
(* The map-reduce part *)
type 'i coll = {coll_member : 'i var -> bool;
coll_cardinality : unit -> int};;
(* A sample collection used in all of the examples *)
(* The sample domain of people of cardinality 100.
We distinguish three special people
*)
let people =
{coll_member = (fun v -> true); (* Assume this is the whole domain *)
coll_cardinality = (fun () -> 100);
};;
(* A commutative monoid *)
type 'r monoid = {union : 'r -> 'r -> 'r;
empty : 'r}
let sum_monoid = {union = (+); empty = 0}
;;
let rec repeat monoid r = function
| 0 -> monoid.empty
| 1 -> r
| n when n mod 2 = 0 ->
repeat monoid (monoid.union r r) (n/2)
| n ->
monoid.union r (repeat monoid r (n-1))
;;
(* Fold over the collection coll. We apply the map function represented
by the switch, and reduce the result with the commutative monoid.
This is quite like explore of HANSEI
*)
let map_reduce_sw monoid coll fsw =
let rec loop coll = function
| Default x -> repeat monoid x (coll.coll_cardinality ())
| Case (C_eq (v,k)) ->
(match v with Var _ -> print_endline "case-var"
| _ -> print_endline "case-val");
if coll.coll_member v
then
monoid.union
(loop {coll_member = (fun x -> equ x v);
coll_cardinality = (fun () -> 1)}
(k true))
(loop {coll_member =
(fun x -> if equ x v then false else coll.coll_member x);
coll_cardinality =
(fun () -> let c = coll.coll_cardinality () in
if c = 0 then 0 else pred c)}
(k false))
else
loop coll (k false)
in loop coll fsw;;
let sum_over coll = map_reduce_sw sum_monoid coll;;
(* Simple tests, identical to the ones in lifted.ml *)
let 100 = sum_over people (reify (fun x -> 1));;
let 101 = sum_over people (reify (fun x ->
if equ x (Val "alice") then 2 else 1));;
let 103 = sum_over people (reify (fun x ->
if equ x (Val "alice") then 2 else
if equ x (Val "bob") then 3 else 1));;
let 203 = sum_over people (reify (fun x ->
if equ x (Val "alice") then
(if equ (Val "alice") x then 102 else -100)
else if equ x (Val "bob") then 3 else 1));;
let 1 = sum_over people (reify (fun x ->
if equ x (Val "alice") then
(if equ (Val "bob") x then 102 else -100)
else if equ x (Val "bob") then 3 else 1));;
let 10000 =
sum_over people (reify (fun x ->
sum_over people (reify (fun y -> 1))));;
let 9900 =
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
if equ y (Val "alice") then 0 else 1))));;
let 9900 =
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
if equ x (Val "alice") then 0 else 1))));;
let 9900 =
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
if equ y x then 0 else 1))));;
let 9900 =
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
if equ x y then 0 else 1))));;
let 29800 = (* 99 * (99 + 200) + (99+100) *)
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
if equ x y then
(if equ x (Val "alice") then
100 else 200)
else 1))));;
let 29800 = (* 99 * (99 + 200) + (99+100) *)
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
if equ x y then
(if equ x (Val "alice") then
(if equ y (Val "alice") then 100 else -100000)
else 200)
else 1))));;
(* Our running example in the paper *)
let 9703 =
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
if equ x (Val "alice") ||
equ y (Val "bob") ||
equ x y
then 0 else 1))))
;;
let 990000 =
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
sum_over people (reify (fun z ->
if equ x z then 0 else 1))))));;
let 990000 =
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
sum_over people (reify (fun z ->
if equ z x then 0 else 1))))));;
let 990000 =
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
sum_over people (reify (fun z ->
if equ x y then 0 else 1))))));;
let 990000 =
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
sum_over people (reify (fun z ->
if equ y x then 0 else 1))))));;
let 999900 = (* 100 * (9900 + 99) *)
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
sum_over people (reify (fun z ->
if equ y x && equ x z then 0 else 1))))));;
let 999900 = (* 100 * (9900 + 99) *)
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
sum_over people (reify (fun z ->
if equ y x && equ x z && equ y z then 0 else 1))))));;
let 980100 =
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
sum_over people (reify (fun z ->
if equ y x || equ x z then 0 else 1))))));;
let 970200 = (* 100 * 99 * 98 *)
sum_over people (reify (fun x ->
sum_over people (reify (fun y ->
sum_over people (reify (fun z ->
if equ x z || equ z y || equ x y then 0 else 1))))));;
(* Iterating over all subsets of a the domain *)
(* We represent a set of subsets of a domain in the following compact
way, using two disjoint subsets S1 and S2 of the domain and the optional
cardinality constraint ccard:
The two sets S1 and S2 denote the following set of subsets:
{ S1 U s | s <- Powerset(S2), card(S1 U s) = ccard}
If ccard is given, the cardinality is
choose (ccard - card(S1)) (card S2).
Otherwise, is the cardinality of Powerset(S2), that is,
2^Card(S2)
*)
let zero = zero_big_int
let one = big_int_of_int 1
and two = big_int_of_int 2
and three = big_int_of_int 3
;;
let union_big = add_big_int
and empty_big = zero_big_int;;
let rec repeat_big r n = mult_big_int r n;;
(*
let rec repeat_big r n =
if sign_big_int n = 0 then empty_big
else if eq_big_int n unit_big_int then r
else
let (q,rem) = quomod_big_int n (big_int_of_int 2) in
let r2 = repeat_big (union_big r r) q in
if sign_big_int rem = 0 then r2 else union_big r r2
;;
*)
type indiv_sets = (* A set of groups of individuals *)
{indivs_s1 : indiv list; (* Set S1, see description above *)
indivs_s2 : indiv list; (* Set S2 *)
ccard : int option;
}
;;
(* A sample collection used in all of the examples *)
(* The sample domain of people of cardinality 100.
We distinguish three special people
*)
let all_people =
let rec loop acc = function
| 0 -> acc
| n -> loop (("p" ^ string_of_int n) :: acc) (n-1)
in loop ["alice"; "bob"; "carol"] 97
;;
(* The powerset of all_people, the domain of the power-set iteration *)
let all_people_groups =
{indivs_s1 = []; indivs_s2 = all_people; ccard = None};;
(* Check if a collection s contains the element x
Suppose we have a loop
map_reduce_powerset coll (reify (fun s ->
...
map_reduce_sw S1 (reify (fun x ->
if contains x s then ....
)))
In the inner loop, x ranges over all elements of the collection
S1. Therefore, (contains x s) is true when x happens to be in the
intersection of S1 and s. At present, we will only consider
the examples where S1 is s, so x is always in the intersection,
providing s is not empty (in the latter
case, there should be no loop iterations to start with).
In the future, perhaps we need a special request: C_not_empty
*)
let contains x s = match (x,s) with
| (Var _, Var _) -> true
| (Val v, Var (g,p)) -> shift p (fun k -> Req (C_in (v,k)))
| (_, Val _) ->
failwith "Memb in literal collections not needed so far"
;;
let get_card = function
| Var (_,p) -> shift p (fun k -> Req (C_card k))
| Val _ -> failwith "Don't handle literal collections; not needed so far"
;;
(* Turn a loop variable of the set type into a collection *)
let make_coll s =
{coll_member = (fun x -> contains x s);
coll_cardinality = (fun () -> get_card s)}
;;
(* Delete an element from a list. Not tail recursive: will optimize later *)
let rec del x = function [] -> []
| (h::t) -> if x = h then t else h::del x t
;;
(* Pre-computed Pascal triangle (again, automatically sized) *)
type 'a llist = Cons of 'a * 'a llist Lazy.t;;
let rec lnth n (Cons (h,t)) =
if n = 0 then h else lnth (pred n) (Lazy.force t)
;;
let rec lmap f (Cons (h,t)) =
Cons (f h,lazy (lmap f (Lazy.force t)));;
let rec pascal =
Cons ([big_int_of_int 1;big_int_of_int 2;big_int_of_int 1],
lazy
(lmap (fun line ->
List.map2 add_big_int
(line @ [zero_big_int]) (zero_big_int::line)) pascal))
;;
let comb_choose n m =
if n = 0 then unit_big_int
else if n = m then unit_big_int
else List.nth (lnth (m-2) pascal) n;;
(*
string_of_big_int (comb_choose 2 4);;
6
string_of_big_int (comb_choose 1 10);;
10
string_of_big_int (comb_choose 2 10);;
45
*)
(* Obtain the cardinality of a set of sets of individuals *)
let get_group_card = function
| {ccard = None} as coll ->
power_int_positive_int 2 (List.length coll.indivs_s2)
| {ccard = Some n} as coll ->
comb_choose (n - List.length coll.indivs_s1) (List.length coll.indivs_s2)
;;
(* Implicitly we use the sum monoid for big_int *)
let map_reduce_powerset group_coll fsw =
let rec loop coll = function
| Default x ->
Printf.printf "default %d %s\n" x
(string_of_big_int (get_group_card coll));
repeat_big (big_int_of_int x) (get_group_card coll)
| Case (C_in (v,k)) -> (* Inclusion constraint for indiv v *)
Printf.printf "inclusion of %s\n" v;
if List.mem v coll.indivs_s1 then
loop coll (k true) (* any subset definitely has v *)
else if not (List.mem v coll.indivs_s2) then
loop coll (k false) (* no subset can have v *)
else if begin (* otherwise, card(S1) becomes > *)
match coll.ccard with (* ccard *)
| None -> false
| Some n -> n = List.length coll.indivs_s1
end
then loop coll (k false)
else
let coll = {coll with indivs_s2 = del v coll.indivs_s2} in
union_big
(loop {coll with indivs_s1 = v :: coll.indivs_s1}
(k true))
(loop coll (k false))
| Case (C_card k) -> (* Find cardinality of the subset s *)
begin match coll.ccard with
| Some n -> loop coll (k n)
| None ->
let nmax = List.length coll.indivs_s1 +
List.length coll.indivs_s2 in
let rec iter acc i =
if i > nmax then acc else
let coll = {coll with ccard = Some i} in
iter (union_big (loop coll (k i)) acc) (succ i)
in iter empty_big (List.length coll.indivs_s1)
end
in loop group_coll fsw;;
(* The number of all subsets of people that include Alice. It is 2^99 *)
let "633825300114114700748351602688" =
string_of_big_int
(map_reduce_powerset all_people_groups (reify (fun s ->
if contains (Val "alice") s then 1 else 0)))
;;
let indic_monoid = {union = (fun x y -> if x > 0 || y > 0 then 1 else 0);
empty = 0}
;;
(* The same as above *)
let "633825300114114700748351602688" =
string_of_big_int
(map_reduce_powerset all_people_groups (reify (fun s ->
map_reduce_sw indic_monoid (make_coll s) (reify (fun x ->
if equ x (Val "alice") then 1 else 0)))))
;;
(* The number of subsets that contain either Alice or Bob.
This is 2^99 + 2^98
*)
let "950737950171172051122527404032" =
string_of_big_int
(map_reduce_powerset all_people_groups (reify (fun s ->
map_reduce_sw indic_monoid (make_coll s) (reify (fun x ->
map_reduce_sw indic_monoid (make_coll s) (reify (fun y ->
if equ x (Val "alice") || equ y (Val "bob") then 1 else 0)))))))
;;
(* Find all subsets that contain two elements that are equal to each other.
This is all subsets but the empty set.
The result is 2^100 - 1
*)
let "1267650600228229401496703205375" =
string_of_big_int
(map_reduce_powerset all_people_groups (reify (fun s ->
map_reduce_sw indic_monoid (make_coll s) (reify (fun x ->
map_reduce_sw indic_monoid (make_coll s) (reify (fun y ->
if equ x y then 1 else 0)))))))
;;
(* The simplified example in Sec 5.2 of the paper; see check.hs *)
let "3075003443503627470680627800440832" =
string_of_big_int
(map_reduce_powerset all_people_groups (reify (fun s ->
map_reduce_sw sum_monoid (make_coll s) (reify (fun x ->
map_reduce_sw sum_monoid (make_coll s) (reify (fun y ->
if equ x (Val "alice") || equ y (Val "bob") || equ x y
then 0 else 1)))))))
;;
let big_prod_monoid = {union = mult_big_int; empty = unit_big_int}
;;
(* big-int version *)
let map_reduce_powerset group_coll fsw =
let rec loop coll = function
| Default x ->
repeat_big x (get_group_card coll)
| Case (C_in (v,k)) -> (* Inclusion constraint for indiv v *)
if List.mem v coll.indivs_s1 then
loop coll (k true) (* any subset definitely has v *)
else if not (List.mem v coll.indivs_s2) then
loop coll (k false) (* no subset can have v *)
else if begin (* otherwise, card(S1) becomes > *)
match coll.ccard with (* ccard *)
| None -> false
| Some n -> n = List.length coll.indivs_s1
end
then loop coll (k false)
else
let coll = {coll with indivs_s2 = del v coll.indivs_s2} in
union_big
(loop {coll with indivs_s1 = v :: coll.indivs_s1}
(k true))
(loop coll (k false))
| Case (C_card k) -> (* Find cardinality of the subset s *)
begin match coll.ccard with
| Some n -> loop coll (k n)
| None ->
let nmax = List.length coll.indivs_s1 +
List.length coll.indivs_s2 in
let rec iter acc i =
if i > nmax then acc else
let coll = {coll with ccard = Some i} in
iter (union_big (loop coll (k i)) acc) (succ i)
in iter empty_big (List.length coll.indivs_s1)
end
in loop group_coll fsw;;
(* The complete example in Sec 5.2 of the paper; see check.hs *)
let answer_prod =
(map_reduce_powerset all_people_groups (reify (fun s ->
map_reduce_sw big_prod_monoid (make_coll s) (reify (fun x ->
map_reduce_sw big_prod_monoid (make_coll s) (reify (fun y ->
if equ x (Val "alice") || equ y (Val "bob") || equ x y
then one else three)))))))
;;
(* "321761846005034 ..." *)
let 4630 =
String.length (string_of_big_int answer_prod);;
(* Other possible examples: find the number of all subsets
of individuals that contain Alice and Bob and whose cardinality
is an even number.
*)
print_endline "\nAll done";;